1 /* 2 * Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 25 package org.graalvm.compiler.asm.amd64; 26 27 import static jdk.vm.ci.amd64.AMD64.CPU; 28 import static jdk.vm.ci.amd64.AMD64.MASK; 29 import static jdk.vm.ci.amd64.AMD64.XMM; 30 import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD; 31 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop; 32 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop; 33 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD; 34 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND; 35 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP; 36 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR; 37 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB; 38 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB; 39 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR; 40 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC; 41 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC; 42 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG; 43 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT; 44 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0; 45 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0; 46 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1; 47 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.BYTE; 48 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.DWORD; 49 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PD; 50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PS; 51 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.QWORD; 52 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SD; 53 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SS; 54 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.WORD; 55 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128; 56 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256; 57 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ; 58 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F; 59 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38; 60 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A; 61 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_; 62 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66; 63 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2; 64 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3; 65 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0; 66 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1; 67 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG; 68 import static org.graalvm.compiler.core.common.NumUtil.isByte; 69 import static org.graalvm.compiler.core.common.NumUtil.isInt; 70 import static org.graalvm.compiler.core.common.NumUtil.isShiftCount; 71 import static org.graalvm.compiler.core.common.NumUtil.isUByte; 72 73 import java.util.EnumSet; 74 75 import org.graalvm.compiler.asm.Label; 76 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; 77 import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize; 78 import org.graalvm.compiler.core.common.calc.Condition; 79 import org.graalvm.compiler.debug.GraalError; 80 81 import jdk.vm.ci.amd64.AMD64; 82 import jdk.vm.ci.amd64.AMD64.CPUFeature; 83 import jdk.vm.ci.code.Register; 84 import jdk.vm.ci.code.Register.RegisterCategory; 85 import jdk.vm.ci.code.TargetDescription; 86 87 /** 88 * This class implements an assembler that can encode most X86 instructions. 89 */ 90 public class AMD64Assembler extends AMD64BaseAssembler { 91 92 /** 93 * Constructs an assembler for the AMD64 architecture. 94 */ 95 public AMD64Assembler(TargetDescription target) { 96 super(target); 97 } 98 99 /** 100 * The x86 condition codes used for conditional jumps/moves. 101 */ 102 public enum ConditionFlag { 103 Zero(0x4, "|zero|"), 104 NotZero(0x5, "|nzero|"), 105 Equal(0x4, "="), 106 NotEqual(0x5, "!="), 107 Less(0xc, "<"), 108 LessEqual(0xe, "<="), 109 Greater(0xf, ">"), 110 GreaterEqual(0xd, ">="), 111 Below(0x2, "|<|"), 112 BelowEqual(0x6, "|<=|"), 113 Above(0x7, "|>|"), 114 AboveEqual(0x3, "|>=|"), 115 Overflow(0x0, "|of|"), 116 NoOverflow(0x1, "|nof|"), 117 CarrySet(0x2, "|carry|"), 118 CarryClear(0x3, "|ncarry|"), 119 Negative(0x8, "|neg|"), 120 Positive(0x9, "|pos|"), 121 Parity(0xa, "|par|"), 122 NoParity(0xb, "|npar|"); 123 124 private final int value; 125 private final String operator; 126 127 ConditionFlag(int value, String operator) { 128 this.value = value; 129 this.operator = operator; 130 } 131 132 public ConditionFlag negate() { 133 switch (this) { 134 case Zero: 135 return NotZero; 136 case NotZero: 137 return Zero; 138 case Equal: 139 return NotEqual; 140 case NotEqual: 141 return Equal; 142 case Less: 143 return GreaterEqual; 144 case LessEqual: 145 return Greater; 146 case Greater: 147 return LessEqual; 148 case GreaterEqual: 149 return Less; 150 case Below: 151 return AboveEqual; 152 case BelowEqual: 153 return Above; 154 case Above: 155 return BelowEqual; 156 case AboveEqual: 157 return Below; 158 case Overflow: 159 return NoOverflow; 160 case NoOverflow: 161 return Overflow; 162 case CarrySet: 163 return CarryClear; 164 case CarryClear: 165 return CarrySet; 166 case Negative: 167 return Positive; 168 case Positive: 169 return Negative; 170 case Parity: 171 return NoParity; 172 case NoParity: 173 return Parity; 174 } 175 throw new IllegalArgumentException(); 176 } 177 178 public int getValue() { 179 return value; 180 } 181 182 @Override 183 public String toString() { 184 return operator; 185 } 186 } 187 188 /** 189 * Operand size and register type constraints. 190 */ 191 private enum OpAssertion { 192 ByteAssertion(CPU, CPU, BYTE), 193 ByteOrLargerAssertion(CPU, CPU, BYTE, WORD, DWORD, QWORD), 194 WordOrLargerAssertion(CPU, CPU, WORD, DWORD, QWORD), 195 DwordOrLargerAssertion(CPU, CPU, DWORD, QWORD), 196 WordOrDwordAssertion(CPU, CPU, WORD, QWORD), 197 QwordAssertion(CPU, CPU, QWORD), 198 FloatAssertion(XMM, XMM, SS, SD, PS, PD), 199 PackedFloatAssertion(XMM, XMM, PS, PD), 200 SingleAssertion(XMM, XMM, SS), 201 DoubleAssertion(XMM, XMM, SD), 202 PackedDoubleAssertion(XMM, XMM, PD), 203 IntToFloatAssertion(XMM, CPU, DWORD, QWORD), 204 FloatToIntAssertion(CPU, XMM, DWORD, QWORD); 205 206 private final RegisterCategory resultCategory; 207 private final RegisterCategory inputCategory; 208 private final OperandSize[] allowedSizes; 209 210 OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes) { 211 this.resultCategory = resultCategory; 212 this.inputCategory = inputCategory; 213 this.allowedSizes = allowedSizes; 214 } 215 216 protected boolean checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg) { 217 assert resultReg == null || resultCategory.equals(resultReg.getRegisterCategory()) : "invalid result register " + resultReg + " used in " + op; 218 assert inputReg == null || inputCategory.equals(inputReg.getRegisterCategory()) : "invalid input register " + inputReg + " used in " + op; 219 220 for (OperandSize s : allowedSizes) { 221 if (size == s) { 222 return true; 223 } 224 } 225 226 assert false : "invalid operand size " + size + " used in " + op; 227 return false; 228 } 229 230 } 231 232 protected static final int P_0F = 0x0F; 233 protected static final int P_0F38 = 0x380F; 234 protected static final int P_0F3A = 0x3A0F; 235 236 /** 237 * Base class for AMD64 opcodes. 238 */ 239 public static class AMD64Op { 240 241 private final String opcode; 242 243 protected final int prefix1; 244 protected final int prefix2; 245 protected final int op; 246 247 private final boolean dstIsByte; 248 private final boolean srcIsByte; 249 250 private final OpAssertion assertion; 251 private final CPUFeature feature; 252 253 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 254 this(opcode, prefix1, prefix2, op, assertion == OpAssertion.ByteAssertion, assertion == OpAssertion.ByteAssertion, assertion, feature); 255 } 256 257 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 258 this.opcode = opcode; 259 this.prefix1 = prefix1; 260 this.prefix2 = prefix2; 261 this.op = op; 262 263 this.dstIsByte = dstIsByte; 264 this.srcIsByte = srcIsByte; 265 266 this.assertion = assertion; 267 this.feature = feature; 268 } 269 270 protected final void emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc) { 271 if (prefix1 != 0) { 272 asm.emitByte(prefix1); 273 } 274 if (size.getSizePrefix() != 0) { 275 asm.emitByte(size.getSizePrefix()); 276 } 277 int rexPrefix = 0x40 | rxb; 278 if (size == QWORD) { 279 rexPrefix |= 0x08; 280 } 281 if (rexPrefix != 0x40 || (dstIsByte && dstEnc >= 4) || (srcIsByte && srcEnc >= 4)) { 282 asm.emitByte(rexPrefix); 283 } 284 if (prefix2 > 0xFF) { 285 asm.emitShort(prefix2); 286 } else if (prefix2 > 0) { 287 asm.emitByte(prefix2); 288 } 289 asm.emitByte(op); 290 } 291 292 protected final boolean verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg) { 293 assert feature == null || asm.supports(feature) : String.format("unsupported feature %s required for %s", feature, opcode); 294 assert assertion.checkOperands(this, size, resultReg, inputReg); 295 return true; 296 } 297 298 public OperandSize[] getAllowedSizes() { 299 return assertion.allowedSizes; 300 } 301 302 protected final boolean isSSEInstruction() { 303 if (feature == null) { 304 return false; 305 } 306 switch (feature) { 307 case SSE: 308 case SSE2: 309 case SSE3: 310 case SSSE3: 311 case SSE4A: 312 case SSE4_1: 313 case SSE4_2: 314 return true; 315 default: 316 return false; 317 } 318 } 319 320 public final OpAssertion getAssertion() { 321 return assertion; 322 } 323 324 @Override 325 public String toString() { 326 return opcode; 327 } 328 } 329 330 /** 331 * Base class for AMD64 opcodes with immediate operands. 332 */ 333 public static class AMD64ImmOp extends AMD64Op { 334 335 private final boolean immIsByte; 336 337 protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) { 338 this(opcode, immIsByte, prefix, op, assertion, null); 339 } 340 341 protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 342 super(opcode, 0, prefix, op, assertion, feature); 343 this.immIsByte = immIsByte; 344 } 345 346 protected final void emitImmediate(AMD64Assembler asm, OperandSize size, int imm) { 347 if (immIsByte) { 348 assert imm == (byte) imm; 349 asm.emitByte(imm); 350 } else { 351 size.emitImmediate(asm, imm); 352 } 353 } 354 355 protected final int immediateSize(OperandSize size) { 356 if (immIsByte) { 357 return 1; 358 } else { 359 return size.getBytes(); 360 } 361 } 362 } 363 364 /** 365 * Opcode with operand order of either RM or MR for 2 address forms. 366 */ 367 public abstract static class AMD64RROp extends AMD64Op { 368 369 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 370 super(opcode, prefix1, prefix2, op, assertion, feature); 371 } 372 373 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 374 super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature); 375 } 376 377 public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src); 378 } 379 380 /** 381 * Opcode with operand order of RM. 382 */ 383 public static class AMD64RMOp extends AMD64RROp { 384 // @formatter:off 385 public static final AMD64RMOp IMUL = new AMD64RMOp("IMUL", P_0F, 0xAF, OpAssertion.ByteOrLargerAssertion); 386 public static final AMD64RMOp BSF = new AMD64RMOp("BSF", P_0F, 0xBC); 387 public static final AMD64RMOp BSR = new AMD64RMOp("BSR", P_0F, 0xBD); 388 // POPCNT, TZCNT, and LZCNT support word operation. However, the legacy size prefix should 389 // be emitted before the mandatory prefix 0xF3. Since we are not emitting bit count for 390 // 16-bit operands, here we simply use DwordOrLargerAssertion. 391 public static final AMD64RMOp POPCNT = new AMD64RMOp("POPCNT", 0xF3, P_0F, 0xB8, OpAssertion.DwordOrLargerAssertion, CPUFeature.POPCNT); 392 public static final AMD64RMOp TZCNT = new AMD64RMOp("TZCNT", 0xF3, P_0F, 0xBC, OpAssertion.DwordOrLargerAssertion, CPUFeature.BMI1); 393 public static final AMD64RMOp LZCNT = new AMD64RMOp("LZCNT", 0xF3, P_0F, 0xBD, OpAssertion.DwordOrLargerAssertion, CPUFeature.LZCNT); 394 public static final AMD64RMOp MOVZXB = new AMD64RMOp("MOVZXB", P_0F, 0xB6, false, true, OpAssertion.WordOrLargerAssertion); 395 public static final AMD64RMOp MOVZX = new AMD64RMOp("MOVZX", P_0F, 0xB7, OpAssertion.DwordOrLargerAssertion); 396 public static final AMD64RMOp MOVSXB = new AMD64RMOp("MOVSXB", P_0F, 0xBE, false, true, OpAssertion.WordOrLargerAssertion); 397 public static final AMD64RMOp MOVSX = new AMD64RMOp("MOVSX", P_0F, 0xBF, OpAssertion.DwordOrLargerAssertion); 398 public static final AMD64RMOp MOVSXD = new AMD64RMOp("MOVSXD", 0x63, OpAssertion.QwordAssertion); 399 public static final AMD64RMOp MOVB = new AMD64RMOp("MOVB", 0x8A, OpAssertion.ByteAssertion); 400 public static final AMD64RMOp MOV = new AMD64RMOp("MOV", 0x8B); 401 public static final AMD64RMOp CMP = new AMD64RMOp("CMP", 0x3B); 402 403 // MOVD/MOVQ and MOVSS/MOVSD are the same opcode, just with different operand size prefix 404 public static final AMD64RMOp MOVD = new AMD64RMOp("MOVD", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 405 public static final AMD64RMOp MOVQ = new AMD64RMOp("MOVQ", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 406 public static final AMD64RMOp MOVSS = new AMD64RMOp("MOVSS", P_0F, 0x10, OpAssertion.FloatAssertion, CPUFeature.SSE); 407 public static final AMD64RMOp MOVSD = new AMD64RMOp("MOVSD", P_0F, 0x10, OpAssertion.FloatAssertion, CPUFeature.SSE); 408 409 // TEST is documented as MR operation, but it's symmetric, and using it as RM operation is more convenient. 410 public static final AMD64RMOp TESTB = new AMD64RMOp("TEST", 0x84, OpAssertion.ByteAssertion); 411 public static final AMD64RMOp TEST = new AMD64RMOp("TEST", 0x85); 412 // @formatter:on 413 414 protected AMD64RMOp(String opcode, int op) { 415 this(opcode, 0, op); 416 } 417 418 protected AMD64RMOp(String opcode, int op, OpAssertion assertion) { 419 this(opcode, 0, op, assertion); 420 } 421 422 protected AMD64RMOp(String opcode, int prefix, int op) { 423 this(opcode, 0, prefix, op, null); 424 } 425 426 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion) { 427 this(opcode, 0, prefix, op, assertion, null); 428 } 429 430 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 431 this(opcode, 0, prefix, op, assertion, feature); 432 } 433 434 protected AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) { 435 super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null); 436 } 437 438 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) { 439 this(opcode, prefix1, prefix2, op, OpAssertion.WordOrLargerAssertion, feature); 440 } 441 442 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 443 super(opcode, prefix1, prefix2, op, assertion, feature); 444 } 445 446 @Override 447 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) { 448 assert verify(asm, size, dst, src); 449 if (isSSEInstruction()) { 450 Register nds = Register.None; 451 switch (op) { 452 case 0x10: 453 case 0x51: 454 if ((size == SS) || (size == SD)) { 455 nds = dst; 456 } 457 break; 458 case 0x2A: 459 case 0x54: 460 case 0x55: 461 case 0x56: 462 case 0x57: 463 case 0x58: 464 case 0x59: 465 case 0x5A: 466 case 0x5C: 467 case 0x5D: 468 case 0x5E: 469 case 0x5F: 470 nds = dst; 471 break; 472 default: 473 break; 474 } 475 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, size == QWORD); 476 asm.emitByte(op); 477 asm.emitModRM(dst, src); 478 } else { 479 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding); 480 asm.emitModRM(dst, src); 481 } 482 } 483 484 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src) { 485 assert verify(asm, size, dst, null); 486 if (isSSEInstruction()) { 487 Register nds = Register.None; 488 switch (op) { 489 case 0x51: 490 if ((size == SS) || (size == SD)) { 491 nds = dst; 492 } 493 break; 494 case 0x2A: 495 case 0x54: 496 case 0x55: 497 case 0x56: 498 case 0x57: 499 case 0x58: 500 case 0x59: 501 case 0x5A: 502 case 0x5C: 503 case 0x5D: 504 case 0x5E: 505 case 0x5F: 506 nds = dst; 507 break; 508 default: 509 break; 510 } 511 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, size == QWORD); 512 asm.emitByte(op); 513 asm.emitOperandHelper(dst, src, 0); 514 } else { 515 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0); 516 asm.emitOperandHelper(dst, src, 0); 517 } 518 } 519 } 520 521 /** 522 * Opcode with operand order of MR. 523 */ 524 public static class AMD64MROp extends AMD64RROp { 525 // @formatter:off 526 public static final AMD64MROp MOVB = new AMD64MROp("MOVB", 0x88, OpAssertion.ByteAssertion); 527 public static final AMD64MROp MOV = new AMD64MROp("MOV", 0x89); 528 529 // MOVD and MOVQ are the same opcode, just with different operand size prefix 530 // Note that as MR opcodes, they have reverse operand order, so the IntToFloatingAssertion must be used. 531 public static final AMD64MROp MOVD = new AMD64MROp("MOVD", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 532 public static final AMD64MROp MOVQ = new AMD64MROp("MOVQ", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 533 534 // MOVSS and MOVSD are the same opcode, just with different operand size prefix 535 public static final AMD64MROp MOVSS = new AMD64MROp("MOVSS", P_0F, 0x11, OpAssertion.FloatAssertion, CPUFeature.SSE); 536 public static final AMD64MROp MOVSD = new AMD64MROp("MOVSD", P_0F, 0x11, OpAssertion.FloatAssertion, CPUFeature.SSE); 537 // @formatter:on 538 539 protected AMD64MROp(String opcode, int op) { 540 this(opcode, 0, op); 541 } 542 543 protected AMD64MROp(String opcode, int op, OpAssertion assertion) { 544 this(opcode, 0, op, assertion); 545 } 546 547 protected AMD64MROp(String opcode, int prefix, int op) { 548 this(opcode, prefix, op, OpAssertion.WordOrLargerAssertion); 549 } 550 551 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion) { 552 this(opcode, prefix, op, assertion, null); 553 } 554 555 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 556 this(opcode, 0, prefix, op, assertion, feature); 557 } 558 559 protected AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 560 super(opcode, prefix1, prefix2, op, assertion, feature); 561 } 562 563 @Override 564 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) { 565 assert verify(asm, size, src, dst); 566 if (isSSEInstruction()) { 567 Register nds = Register.None; 568 switch (op) { 569 case 0x11: 570 if ((size == SS) || (size == SD)) { 571 nds = src; 572 } 573 break; 574 default: 575 break; 576 } 577 asm.simdPrefix(src, nds, dst, size, prefix1, prefix2, size == QWORD); 578 asm.emitByte(op); 579 asm.emitModRM(src, dst); 580 } else { 581 emitOpcode(asm, size, getRXB(src, dst), src.encoding, dst.encoding); 582 asm.emitModRM(src, dst); 583 } 584 } 585 586 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src) { 587 assert verify(asm, size, src, null); 588 if (isSSEInstruction()) { 589 asm.simdPrefix(src, Register.None, dst, size, prefix1, prefix2, size == QWORD); 590 asm.emitByte(op); 591 } else { 592 emitOpcode(asm, size, getRXB(src, dst), src.encoding, 0); 593 } 594 asm.emitOperandHelper(src, dst, 0); 595 } 596 } 597 598 /** 599 * Opcodes with operand order of M. 600 */ 601 public static class AMD64MOp extends AMD64Op { 602 // @formatter:off 603 public static final AMD64MOp NOT = new AMD64MOp("NOT", 0xF7, 2); 604 public static final AMD64MOp NEG = new AMD64MOp("NEG", 0xF7, 3); 605 public static final AMD64MOp MUL = new AMD64MOp("MUL", 0xF7, 4); 606 public static final AMD64MOp IMUL = new AMD64MOp("IMUL", 0xF7, 5); 607 public static final AMD64MOp DIV = new AMD64MOp("DIV", 0xF7, 6); 608 public static final AMD64MOp IDIV = new AMD64MOp("IDIV", 0xF7, 7); 609 public static final AMD64MOp INC = new AMD64MOp("INC", 0xFF, 0); 610 public static final AMD64MOp DEC = new AMD64MOp("DEC", 0xFF, 1); 611 public static final AMD64MOp PUSH = new AMD64MOp("PUSH", 0xFF, 6); 612 public static final AMD64MOp POP = new AMD64MOp("POP", 0x8F, 0, OpAssertion.WordOrDwordAssertion); 613 // @formatter:on 614 615 private final int ext; 616 617 protected AMD64MOp(String opcode, int op, int ext) { 618 this(opcode, 0, op, ext); 619 } 620 621 protected AMD64MOp(String opcode, int prefix, int op, int ext) { 622 this(opcode, prefix, op, ext, OpAssertion.WordOrLargerAssertion); 623 } 624 625 protected AMD64MOp(String opcode, int op, int ext, OpAssertion assertion) { 626 this(opcode, 0, op, ext, assertion); 627 } 628 629 protected AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion) { 630 super(opcode, 0, prefix, op, assertion, null); 631 this.ext = ext; 632 } 633 634 public final void emit(AMD64Assembler asm, OperandSize size, Register dst) { 635 assert verify(asm, size, dst, null); 636 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding); 637 asm.emitModRM(ext, dst); 638 } 639 640 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst) { 641 assert verify(asm, size, null, null); 642 emitOpcode(asm, size, getRXB(null, dst), 0, 0); 643 asm.emitOperandHelper(ext, dst, 0); 644 } 645 } 646 647 /** 648 * Opcodes with operand order of MI. 649 */ 650 public static class AMD64MIOp extends AMD64ImmOp { 651 // @formatter:off 652 public static final AMD64MIOp MOVB = new AMD64MIOp("MOVB", true, 0xC6, 0, OpAssertion.ByteAssertion); 653 public static final AMD64MIOp MOV = new AMD64MIOp("MOV", false, 0xC7, 0); 654 public static final AMD64MIOp TEST = new AMD64MIOp("TEST", false, 0xF7, 0); 655 // @formatter:on 656 657 private final int ext; 658 659 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext) { 660 this(opcode, immIsByte, op, ext, OpAssertion.WordOrLargerAssertion); 661 } 662 663 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion) { 664 this(opcode, immIsByte, 0, op, ext, assertion); 665 } 666 667 protected AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion) { 668 super(opcode, immIsByte, prefix, op, assertion); 669 this.ext = ext; 670 } 671 672 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm) { 673 emit(asm, size, dst, imm, false); 674 } 675 676 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm, boolean annotateImm) { 677 assert verify(asm, size, dst, null); 678 int insnPos = asm.position(); 679 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding); 680 asm.emitModRM(ext, dst); 681 int immPos = asm.position(); 682 emitImmediate(asm, size, imm); 683 int nextInsnPos = asm.position(); 684 if (annotateImm && asm.codePatchingAnnotationConsumer != null) { 685 asm.codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 686 } 687 } 688 689 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm) { 690 emit(asm, size, dst, imm, false); 691 } 692 693 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm, boolean annotateImm) { 694 assert verify(asm, size, null, null); 695 int insnPos = asm.position(); 696 emitOpcode(asm, size, getRXB(null, dst), 0, 0); 697 asm.emitOperandHelper(ext, dst, immediateSize(size)); 698 int immPos = asm.position(); 699 emitImmediate(asm, size, imm); 700 int nextInsnPos = asm.position(); 701 if (annotateImm && asm.codePatchingAnnotationConsumer != null) { 702 asm.codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 703 } 704 } 705 } 706 707 /** 708 * Opcodes with operand order of RMI. 709 * 710 * We only have one form of round as the operation is always treated with single variant input, 711 * making its extension to 3 address forms redundant. 712 */ 713 public static class AMD64RMIOp extends AMD64ImmOp { 714 // @formatter:off 715 public static final AMD64RMIOp IMUL = new AMD64RMIOp("IMUL", false, 0x69); 716 public static final AMD64RMIOp IMUL_SX = new AMD64RMIOp("IMUL", true, 0x6B); 717 public static final AMD64RMIOp ROUNDSS = new AMD64RMIOp("ROUNDSS", true, P_0F3A, 0x0A, OpAssertion.PackedDoubleAssertion, CPUFeature.SSE4_1); 718 public static final AMD64RMIOp ROUNDSD = new AMD64RMIOp("ROUNDSD", true, P_0F3A, 0x0B, OpAssertion.PackedDoubleAssertion, CPUFeature.SSE4_1); 719 // @formatter:on 720 721 protected AMD64RMIOp(String opcode, boolean immIsByte, int op) { 722 this(opcode, immIsByte, 0, op, OpAssertion.WordOrLargerAssertion, null); 723 } 724 725 protected AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 726 super(opcode, immIsByte, prefix, op, assertion, feature); 727 } 728 729 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm) { 730 assert verify(asm, size, dst, src); 731 if (isSSEInstruction()) { 732 Register nds = Register.None; 733 switch (op) { 734 case 0x0A: 735 case 0x0B: 736 nds = dst; 737 break; 738 default: 739 break; 740 } 741 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, false); 742 asm.emitByte(op); 743 asm.emitModRM(dst, src); 744 } else { 745 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding); 746 asm.emitModRM(dst, src); 747 } 748 emitImmediate(asm, size, imm); 749 } 750 751 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm) { 752 assert verify(asm, size, dst, null); 753 if (isSSEInstruction()) { 754 Register nds = Register.None; 755 switch (op) { 756 case 0x0A: 757 case 0x0B: 758 nds = dst; 759 break; 760 default: 761 break; 762 } 763 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, false); 764 asm.emitByte(op); 765 } else { 766 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0); 767 } 768 asm.emitOperandHelper(dst, src, immediateSize(size)); 769 emitImmediate(asm, size, imm); 770 } 771 } 772 773 public static class SSEOp extends AMD64RMOp { 774 // @formatter:off 775 public static final SSEOp CVTSI2SS = new SSEOp("CVTSI2SS", 0xF3, P_0F, 0x2A, OpAssertion.IntToFloatAssertion); 776 public static final SSEOp CVTSI2SD = new SSEOp("CVTSI2SD", 0xF2, P_0F, 0x2A, OpAssertion.IntToFloatAssertion); 777 public static final SSEOp CVTTSS2SI = new SSEOp("CVTTSS2SI", 0xF3, P_0F, 0x2C, OpAssertion.FloatToIntAssertion); 778 public static final SSEOp CVTTSD2SI = new SSEOp("CVTTSD2SI", 0xF2, P_0F, 0x2C, OpAssertion.FloatToIntAssertion); 779 public static final SSEOp UCOMIS = new SSEOp("UCOMIS", P_0F, 0x2E, OpAssertion.PackedFloatAssertion); 780 public static final SSEOp SQRT = new SSEOp("SQRT", P_0F, 0x51); 781 public static final SSEOp AND = new SSEOp("AND", P_0F, 0x54, OpAssertion.PackedFloatAssertion); 782 public static final SSEOp ANDN = new SSEOp("ANDN", P_0F, 0x55, OpAssertion.PackedFloatAssertion); 783 public static final SSEOp OR = new SSEOp("OR", P_0F, 0x56, OpAssertion.PackedFloatAssertion); 784 public static final SSEOp XOR = new SSEOp("XOR", P_0F, 0x57, OpAssertion.PackedFloatAssertion); 785 public static final SSEOp ADD = new SSEOp("ADD", P_0F, 0x58); 786 public static final SSEOp MUL = new SSEOp("MUL", P_0F, 0x59); 787 public static final SSEOp CVTSS2SD = new SSEOp("CVTSS2SD", P_0F, 0x5A, OpAssertion.SingleAssertion); 788 public static final SSEOp CVTSD2SS = new SSEOp("CVTSD2SS", P_0F, 0x5A, OpAssertion.DoubleAssertion); 789 public static final SSEOp SUB = new SSEOp("SUB", P_0F, 0x5C); 790 public static final SSEOp MIN = new SSEOp("MIN", P_0F, 0x5D); 791 public static final SSEOp DIV = new SSEOp("DIV", P_0F, 0x5E); 792 public static final SSEOp MAX = new SSEOp("MAX", P_0F, 0x5F); 793 // @formatter:on 794 795 protected SSEOp(String opcode, int prefix, int op) { 796 this(opcode, prefix, op, OpAssertion.FloatAssertion); 797 } 798 799 protected SSEOp(String opcode, int prefix, int op, OpAssertion assertion) { 800 this(opcode, 0, prefix, op, assertion); 801 } 802 803 protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) { 804 super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.SSE2); 805 } 806 } 807 808 /** 809 * Arithmetic operation with operand order of RM, MR or MI. 810 */ 811 public static final class AMD64BinaryArithmetic { 812 // @formatter:off 813 public static final AMD64BinaryArithmetic ADD = new AMD64BinaryArithmetic("ADD", 0); 814 public static final AMD64BinaryArithmetic OR = new AMD64BinaryArithmetic("OR", 1); 815 public static final AMD64BinaryArithmetic ADC = new AMD64BinaryArithmetic("ADC", 2); 816 public static final AMD64BinaryArithmetic SBB = new AMD64BinaryArithmetic("SBB", 3); 817 public static final AMD64BinaryArithmetic AND = new AMD64BinaryArithmetic("AND", 4); 818 public static final AMD64BinaryArithmetic SUB = new AMD64BinaryArithmetic("SUB", 5); 819 public static final AMD64BinaryArithmetic XOR = new AMD64BinaryArithmetic("XOR", 6); 820 public static final AMD64BinaryArithmetic CMP = new AMD64BinaryArithmetic("CMP", 7); 821 // @formatter:on 822 823 private final AMD64MIOp byteImmOp; 824 private final AMD64MROp byteMrOp; 825 private final AMD64RMOp byteRmOp; 826 827 private final AMD64MIOp immOp; 828 private final AMD64MIOp immSxOp; 829 private final AMD64MROp mrOp; 830 private final AMD64RMOp rmOp; 831 832 private AMD64BinaryArithmetic(String opcode, int code) { 833 int baseOp = code << 3; 834 835 byteImmOp = new AMD64MIOp(opcode, true, 0, 0x80, code, OpAssertion.ByteAssertion); 836 byteMrOp = new AMD64MROp(opcode, 0, baseOp, OpAssertion.ByteAssertion); 837 byteRmOp = new AMD64RMOp(opcode, 0, baseOp | 0x02, OpAssertion.ByteAssertion); 838 839 immOp = new AMD64MIOp(opcode, false, 0, 0x81, code, OpAssertion.WordOrLargerAssertion); 840 immSxOp = new AMD64MIOp(opcode, true, 0, 0x83, code, OpAssertion.WordOrLargerAssertion); 841 mrOp = new AMD64MROp(opcode, 0, baseOp | 0x01, OpAssertion.WordOrLargerAssertion); 842 rmOp = new AMD64RMOp(opcode, 0, baseOp | 0x03, OpAssertion.WordOrLargerAssertion); 843 } 844 845 public AMD64MIOp getMIOpcode(OperandSize size, boolean sx) { 846 if (size == BYTE) { 847 return byteImmOp; 848 } else if (sx) { 849 return immSxOp; 850 } else { 851 return immOp; 852 } 853 } 854 855 public AMD64MROp getMROpcode(OperandSize size) { 856 if (size == BYTE) { 857 return byteMrOp; 858 } else { 859 return mrOp; 860 } 861 } 862 863 public AMD64RMOp getRMOpcode(OperandSize size) { 864 if (size == BYTE) { 865 return byteRmOp; 866 } else { 867 return rmOp; 868 } 869 } 870 } 871 872 /** 873 * Shift operation with operand order of M1, MC or MI. 874 */ 875 public static final class AMD64Shift { 876 // @formatter:off 877 public static final AMD64Shift ROL = new AMD64Shift("ROL", 0); 878 public static final AMD64Shift ROR = new AMD64Shift("ROR", 1); 879 public static final AMD64Shift RCL = new AMD64Shift("RCL", 2); 880 public static final AMD64Shift RCR = new AMD64Shift("RCR", 3); 881 public static final AMD64Shift SHL = new AMD64Shift("SHL", 4); 882 public static final AMD64Shift SHR = new AMD64Shift("SHR", 5); 883 public static final AMD64Shift SAR = new AMD64Shift("SAR", 7); 884 // @formatter:on 885 886 public final AMD64MOp m1Op; 887 public final AMD64MOp mcOp; 888 public final AMD64MIOp miOp; 889 890 private AMD64Shift(String opcode, int code) { 891 m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.WordOrLargerAssertion); 892 mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.WordOrLargerAssertion); 893 miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.WordOrLargerAssertion); 894 } 895 } 896 897 private enum VEXOpAssertion { 898 AVX1(CPUFeature.AVX, CPUFeature.AVX), 899 AVX1_2(CPUFeature.AVX, CPUFeature.AVX2), 900 AVX2(CPUFeature.AVX2, CPUFeature.AVX2), 901 AVX1_128ONLY(CPUFeature.AVX, null), 902 AVX1_256ONLY(null, CPUFeature.AVX), 903 AVX2_256ONLY(null, CPUFeature.AVX2), 904 XMM_CPU(CPUFeature.AVX, null, XMM, null, CPU, null), 905 XMM_XMM_CPU(CPUFeature.AVX, null, XMM, XMM, CPU, null), 906 CPU_XMM(CPUFeature.AVX, null, CPU, null, XMM, null), 907 AVX1_2_CPU_XMM(CPUFeature.AVX, CPUFeature.AVX2, CPU, null, XMM, null), 908 BMI1(CPUFeature.BMI1, null, CPU, CPU, CPU, null), 909 BMI2(CPUFeature.BMI2, null, CPU, CPU, CPU, null), 910 FMA(CPUFeature.FMA, null, XMM, XMM, XMM, null); 911 912 private final CPUFeature l128feature; 913 private final CPUFeature l256feature; 914 915 private final RegisterCategory rCategory; 916 private final RegisterCategory vCategory; 917 private final RegisterCategory mCategory; 918 private final RegisterCategory imm8Category; 919 920 VEXOpAssertion(CPUFeature l128feature, CPUFeature l256feature) { 921 this(l128feature, l256feature, XMM, XMM, XMM, XMM); 922 } 923 924 VEXOpAssertion(CPUFeature l128feature, CPUFeature l256feature, RegisterCategory rCategory, RegisterCategory vCategory, RegisterCategory mCategory, RegisterCategory imm8Category) { 925 this.l128feature = l128feature; 926 this.l256feature = l256feature; 927 this.rCategory = rCategory; 928 this.vCategory = vCategory; 929 this.mCategory = mCategory; 930 this.imm8Category = imm8Category; 931 } 932 933 public boolean check(AMD64 arch, AVXSize size, Register r, Register v, Register m) { 934 return check(arch, getLFlag(size), r, v, m, null); 935 } 936 937 public boolean check(AMD64 arch, AVXSize size, Register r, Register v, Register m, Register imm8) { 938 return check(arch, getLFlag(size), r, v, m, imm8); 939 } 940 941 public boolean check(AMD64 arch, int l, Register r, Register v, Register m, Register imm8) { 942 switch (l) { 943 case L128: 944 assert l128feature != null && arch.getFeatures().contains(l128feature) : "emitting illegal 128 bit instruction"; 945 break; 946 case L256: 947 assert l256feature != null && arch.getFeatures().contains(l256feature) : "emitting illegal 256 bit instruction"; 948 break; 949 } 950 if (r != null) { 951 assert r.getRegisterCategory().equals(rCategory); 952 } 953 if (v != null) { 954 assert v.getRegisterCategory().equals(vCategory); 955 } 956 if (m != null) { 957 assert m.getRegisterCategory().equals(mCategory); 958 } 959 if (imm8 != null) { 960 assert imm8.getRegisterCategory().equals(imm8Category); 961 } 962 return true; 963 } 964 965 public boolean supports(EnumSet<CPUFeature> features, AVXSize avxSize) { 966 switch (avxSize) { 967 case XMM: 968 return l128feature != null && features.contains(l128feature); 969 case YMM: 970 return l256feature != null && features.contains(l256feature); 971 default: 972 throw GraalError.shouldNotReachHere(); 973 } 974 } 975 } 976 977 /** 978 * Base class for VEX-encoded instructions. 979 */ 980 public static class VexOp { 981 protected final int pp; 982 protected final int mmmmm; 983 protected final int w; 984 protected final int op; 985 986 private final String opcode; 987 protected final VEXOpAssertion assertion; 988 989 protected VexOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 990 this.pp = pp; 991 this.mmmmm = mmmmm; 992 this.w = w; 993 this.op = op; 994 this.opcode = opcode; 995 this.assertion = assertion; 996 } 997 998 public final boolean isSupported(AMD64Assembler vasm, AVXSize size) { 999 return assertion.supports(((AMD64) vasm.target.arch).getFeatures(), size); 1000 } 1001 1002 @Override 1003 public String toString() { 1004 return opcode; 1005 } 1006 } 1007 1008 /** 1009 * VEX-encoded instructions with an operand order of RM, but the M operand must be a register. 1010 */ 1011 public static class VexRROp extends VexOp { 1012 // @formatter:off 1013 public static final VexRROp VMASKMOVDQU = new VexRROp("VMASKMOVDQU", P_66, M_0F, WIG, 0xF7, VEXOpAssertion.AVX1_128ONLY); 1014 // @formatter:on 1015 1016 protected VexRROp(String opcode, int pp, int mmmmm, int w, int op) { 1017 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1018 } 1019 1020 protected VexRROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1021 super(opcode, pp, mmmmm, w, op, assertion); 1022 } 1023 1024 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) { 1025 assert assertion.check((AMD64) asm.target.arch, size, dst, null, src); 1026 assert op != 0x1A || op != 0x5A; 1027 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); 1028 asm.emitByte(op); 1029 asm.emitModRM(dst, src); 1030 } 1031 } 1032 1033 /** 1034 * VEX-encoded instructions with an operand order of RM. 1035 */ 1036 public static class VexRMOp extends VexRROp { 1037 // @formatter:off 1038 public static final VexRMOp VCVTTSS2SI = new VexRMOp("VCVTTSS2SI", P_F3, M_0F, W0, 0x2C, VEXOpAssertion.CPU_XMM); 1039 public static final VexRMOp VCVTTSS2SQ = new VexRMOp("VCVTTSS2SQ", P_F3, M_0F, W1, 0x2C, VEXOpAssertion.CPU_XMM); 1040 public static final VexRMOp VCVTTSD2SI = new VexRMOp("VCVTTSD2SI", P_F2, M_0F, W0, 0x2C, VEXOpAssertion.CPU_XMM); 1041 public static final VexRMOp VCVTTSD2SQ = new VexRMOp("VCVTTSD2SQ", P_F2, M_0F, W1, 0x2C, VEXOpAssertion.CPU_XMM); 1042 public static final VexRMOp VCVTPS2PD = new VexRMOp("VCVTPS2PD", P_, M_0F, WIG, 0x5A); 1043 public static final VexRMOp VCVTPD2PS = new VexRMOp("VCVTPD2PS", P_66, M_0F, WIG, 0x5A); 1044 public static final VexRMOp VCVTDQ2PS = new VexRMOp("VCVTDQ2PS", P_, M_0F, WIG, 0x5B); 1045 public static final VexRMOp VCVTTPS2DQ = new VexRMOp("VCVTTPS2DQ", P_F3, M_0F, WIG, 0x5B); 1046 public static final VexRMOp VCVTTPD2DQ = new VexRMOp("VCVTTPD2DQ", P_66, M_0F, WIG, 0xE6); 1047 public static final VexRMOp VCVTDQ2PD = new VexRMOp("VCVTDQ2PD", P_F3, M_0F, WIG, 0xE6); 1048 public static final VexRMOp VBROADCASTSS = new VexRMOp("VBROADCASTSS", P_66, M_0F38, W0, 0x18); 1049 public static final VexRMOp VBROADCASTSD = new VexRMOp("VBROADCASTSD", P_66, M_0F38, W0, 0x19, VEXOpAssertion.AVX1_256ONLY); 1050 public static final VexRMOp VBROADCASTF128 = new VexRMOp("VBROADCASTF128", P_66, M_0F38, W0, 0x1A, VEXOpAssertion.AVX1_256ONLY); 1051 public static final VexRMOp VPBROADCASTI128 = new VexRMOp("VPBROADCASTI128", P_66, M_0F38, W0, 0x5A, VEXOpAssertion.AVX2_256ONLY); 1052 public static final VexRMOp VPBROADCASTB = new VexRMOp("VPBROADCASTB", P_66, M_0F38, W0, 0x78, VEXOpAssertion.AVX2); 1053 public static final VexRMOp VPBROADCASTW = new VexRMOp("VPBROADCASTW", P_66, M_0F38, W0, 0x79, VEXOpAssertion.AVX2); 1054 public static final VexRMOp VPBROADCASTD = new VexRMOp("VPBROADCASTD", P_66, M_0F38, W0, 0x58, VEXOpAssertion.AVX2); 1055 public static final VexRMOp VPBROADCASTQ = new VexRMOp("VPBROADCASTQ", P_66, M_0F38, W0, 0x59, VEXOpAssertion.AVX2); 1056 public static final VexRMOp VPMOVMSKB = new VexRMOp("VPMOVMSKB", P_66, M_0F, WIG, 0xD7, VEXOpAssertion.AVX1_2_CPU_XMM); 1057 public static final VexRMOp VPMOVSXBW = new VexRMOp("VPMOVSXBW", P_66, M_0F38, WIG, 0x20); 1058 public static final VexRMOp VPMOVSXBD = new VexRMOp("VPMOVSXBD", P_66, M_0F38, WIG, 0x21); 1059 public static final VexRMOp VPMOVSXBQ = new VexRMOp("VPMOVSXBQ", P_66, M_0F38, WIG, 0x22); 1060 public static final VexRMOp VPMOVSXWD = new VexRMOp("VPMOVSXWD", P_66, M_0F38, WIG, 0x23); 1061 public static final VexRMOp VPMOVSXWQ = new VexRMOp("VPMOVSXWQ", P_66, M_0F38, WIG, 0x24); 1062 public static final VexRMOp VPMOVSXDQ = new VexRMOp("VPMOVSXDQ", P_66, M_0F38, WIG, 0x25); 1063 public static final VexRMOp VPMOVZXBW = new VexRMOp("VPMOVZXBW", P_66, M_0F38, WIG, 0x30); 1064 public static final VexRMOp VPMOVZXBD = new VexRMOp("VPMOVZXBD", P_66, M_0F38, WIG, 0x31); 1065 public static final VexRMOp VPMOVZXBQ = new VexRMOp("VPMOVZXBQ", P_66, M_0F38, WIG, 0x32); 1066 public static final VexRMOp VPMOVZXWD = new VexRMOp("VPMOVZXWD", P_66, M_0F38, WIG, 0x33); 1067 public static final VexRMOp VPMOVZXWQ = new VexRMOp("VPMOVZXWQ", P_66, M_0F38, WIG, 0x34); 1068 public static final VexRMOp VPMOVZXDQ = new VexRMOp("VPMOVZXDQ", P_66, M_0F38, WIG, 0x35); 1069 public static final VexRMOp VPTEST = new VexRMOp("VPTEST", P_66, M_0F38, WIG, 0x17); 1070 public static final VexRMOp VSQRTPD = new VexRMOp("VSQRTPD", P_66, M_0F, WIG, 0x51); 1071 public static final VexRMOp VSQRTPS = new VexRMOp("VSQRTPS", P_, M_0F, WIG, 0x51); 1072 public static final VexRMOp VSQRTSD = new VexRMOp("VSQRTSD", P_F2, M_0F, WIG, 0x51); 1073 public static final VexRMOp VSQRTSS = new VexRMOp("VSQRTSS", P_F3, M_0F, WIG, 0x51); 1074 public static final VexRMOp VUCOMISS = new VexRMOp("VUCOMISS", P_, M_0F, WIG, 0x2E); 1075 public static final VexRMOp VUCOMISD = new VexRMOp("VUCOMISD", P_66, M_0F, WIG, 0x2E); 1076 // @formatter:on 1077 1078 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op) { 1079 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1080 } 1081 1082 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1083 super(opcode, pp, mmmmm, w, op, assertion); 1084 } 1085 1086 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) { 1087 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1088 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); 1089 asm.emitByte(op); 1090 asm.emitOperandHelper(dst, src, 0); 1091 } 1092 } 1093 1094 /** 1095 * VEX-encoded move instructions. 1096 * <p> 1097 * These instructions have two opcodes: op is the forward move instruction with an operand order 1098 * of RM, and opReverse is the reverse move instruction with an operand order of MR. 1099 */ 1100 public static final class VexMoveOp extends VexRMOp { 1101 // @formatter:off 1102 public static final VexMoveOp VMOVDQA = new VexMoveOp("VMOVDQA", P_66, M_0F, WIG, 0x6F, 0x7F); 1103 public static final VexMoveOp VMOVDQU = new VexMoveOp("VMOVDQU", P_F3, M_0F, WIG, 0x6F, 0x7F); 1104 public static final VexMoveOp VMOVAPS = new VexMoveOp("VMOVAPS", P_, M_0F, WIG, 0x28, 0x29); 1105 public static final VexMoveOp VMOVAPD = new VexMoveOp("VMOVAPD", P_66, M_0F, WIG, 0x28, 0x29); 1106 public static final VexMoveOp VMOVUPS = new VexMoveOp("VMOVUPS", P_, M_0F, WIG, 0x10, 0x11); 1107 public static final VexMoveOp VMOVUPD = new VexMoveOp("VMOVUPD", P_66, M_0F, WIG, 0x10, 0x11); 1108 public static final VexMoveOp VMOVSS = new VexMoveOp("VMOVSS", P_F3, M_0F, WIG, 0x10, 0x11); 1109 public static final VexMoveOp VMOVSD = new VexMoveOp("VMOVSD", P_F2, M_0F, WIG, 0x10, 0x11); 1110 public static final VexMoveOp VMOVD = new VexMoveOp("VMOVD", P_66, M_0F, W0, 0x6E, 0x7E, VEXOpAssertion.XMM_CPU); 1111 public static final VexMoveOp VMOVQ = new VexMoveOp("VMOVQ", P_66, M_0F, W1, 0x6E, 0x7E, VEXOpAssertion.XMM_CPU); 1112 // @formatter:on 1113 1114 private final int opReverse; 1115 1116 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) { 1117 this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1); 1118 } 1119 1120 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) { 1121 super(opcode, pp, mmmmm, w, op, assertion); 1122 this.opReverse = opReverse; 1123 } 1124 1125 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src) { 1126 assert assertion.check((AMD64) asm.target.arch, size, src, null, null); 1127 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); 1128 asm.emitByte(opReverse); 1129 asm.emitOperandHelper(src, dst, 0); 1130 } 1131 1132 public void emitReverse(AMD64Assembler asm, AVXSize size, Register dst, Register src) { 1133 assert assertion.check((AMD64) asm.target.arch, size, src, null, dst); 1134 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); 1135 asm.emitByte(opReverse); 1136 asm.emitModRM(src, dst); 1137 } 1138 } 1139 1140 public interface VexRRIOp { 1141 void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8); 1142 } 1143 1144 /** 1145 * VEX-encoded instructions with an operand order of RMI. 1146 */ 1147 public static final class VexRMIOp extends VexOp implements VexRRIOp { 1148 // @formatter:off 1149 public static final VexRMIOp VPERMQ = new VexRMIOp("VPERMQ", P_66, M_0F3A, W1, 0x00, VEXOpAssertion.AVX2_256ONLY); 1150 public static final VexRMIOp VPSHUFLW = new VexRMIOp("VPSHUFLW", P_F2, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2); 1151 public static final VexRMIOp VPSHUFHW = new VexRMIOp("VPSHUFHW", P_F3, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2); 1152 public static final VexRMIOp VPSHUFD = new VexRMIOp("VPSHUFD", P_66, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2); 1153 // @formatter:on 1154 1155 private VexRMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1156 super(opcode, pp, mmmmm, w, op, assertion); 1157 } 1158 1159 @Override 1160 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { 1161 assert assertion.check((AMD64) asm.target.arch, size, dst, null, src); 1162 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); 1163 asm.emitByte(op); 1164 asm.emitModRM(dst, src); 1165 asm.emitByte(imm8); 1166 } 1167 1168 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src, int imm8) { 1169 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1170 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); 1171 asm.emitByte(op); 1172 asm.emitOperandHelper(dst, src, 1); 1173 asm.emitByte(imm8); 1174 } 1175 } 1176 1177 /** 1178 * VEX-encoded instructions with an operand order of MRI. 1179 */ 1180 public static final class VexMRIOp extends VexOp implements VexRRIOp { 1181 // @formatter:off 1182 public static final VexMRIOp VEXTRACTF128 = new VexMRIOp("VEXTRACTF128", P_66, M_0F3A, W0, 0x19, VEXOpAssertion.AVX1_256ONLY); 1183 public static final VexMRIOp VEXTRACTI128 = new VexMRIOp("VEXTRACTI128", P_66, M_0F3A, W0, 0x39, VEXOpAssertion.AVX2_256ONLY); 1184 public static final VexMRIOp VPEXTRB = new VexMRIOp("VPEXTRB", P_66, M_0F3A, W0, 0x14, VEXOpAssertion.XMM_CPU); 1185 public static final VexMRIOp VPEXTRW = new VexMRIOp("VPEXTRW", P_66, M_0F3A, W0, 0x15, VEXOpAssertion.XMM_CPU); 1186 public static final VexMRIOp VPEXTRD = new VexMRIOp("VPEXTRD", P_66, M_0F3A, W0, 0x16, VEXOpAssertion.XMM_CPU); 1187 public static final VexMRIOp VPEXTRQ = new VexMRIOp("VPEXTRQ", P_66, M_0F3A, W1, 0x16, VEXOpAssertion.XMM_CPU); 1188 // @formatter:on 1189 1190 private VexMRIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1191 super(opcode, pp, mmmmm, w, op, assertion); 1192 } 1193 1194 @Override 1195 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { 1196 assert assertion.check((AMD64) asm.target.arch, size, src, null, dst); 1197 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); 1198 asm.emitByte(op); 1199 asm.emitModRM(src, dst); 1200 asm.emitByte(imm8); 1201 } 1202 1203 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src, int imm8) { 1204 assert assertion.check((AMD64) asm.target.arch, size, src, null, null); 1205 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); 1206 asm.emitByte(op); 1207 asm.emitOperandHelper(src, dst, 1); 1208 asm.emitByte(imm8); 1209 } 1210 } 1211 1212 /** 1213 * VEX-encoded instructions with an operand order of RVMR. 1214 */ 1215 public static class VexRVMROp extends VexOp { 1216 // @formatter:off 1217 public static final VexRVMROp VPBLENDVB = new VexRVMROp("VPBLENDVB", P_66, M_0F3A, W0, 0x4C, VEXOpAssertion.AVX1_2); 1218 public static final VexRVMROp VPBLENDVPS = new VexRVMROp("VPBLENDVPS", P_66, M_0F3A, W0, 0x4A, VEXOpAssertion.AVX1); 1219 public static final VexRVMROp VPBLENDVPD = new VexRVMROp("VPBLENDVPD", P_66, M_0F3A, W0, 0x4B, VEXOpAssertion.AVX1); 1220 // @formatter:on 1221 1222 protected VexRVMROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1223 super(opcode, pp, mmmmm, w, op, assertion); 1224 } 1225 1226 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, Register src2) { 1227 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, src2); 1228 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1229 asm.emitByte(op); 1230 asm.emitModRM(dst, src2); 1231 asm.emitByte(mask.encoding() << 4); 1232 } 1233 1234 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, AMD64Address src2) { 1235 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, null); 1236 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1237 asm.emitByte(op); 1238 asm.emitOperandHelper(dst, src2, 0); 1239 asm.emitByte(mask.encoding() << 4); 1240 } 1241 } 1242 1243 /** 1244 * VEX-encoded instructions with an operand order of RVM. 1245 */ 1246 public static class VexRVMOp extends VexOp { 1247 // @formatter:off 1248 public static final VexRVMOp VANDPS = new VexRVMOp("VANDPS", P_, M_0F, WIG, 0x54); 1249 public static final VexRVMOp VANDPD = new VexRVMOp("VANDPD", P_66, M_0F, WIG, 0x54); 1250 public static final VexRVMOp VANDNPS = new VexRVMOp("VANDNPS", P_, M_0F, WIG, 0x55); 1251 public static final VexRVMOp VANDNPD = new VexRVMOp("VANDNPD", P_66, M_0F, WIG, 0x55); 1252 public static final VexRVMOp VORPS = new VexRVMOp("VORPS", P_, M_0F, WIG, 0x56); 1253 public static final VexRVMOp VORPD = new VexRVMOp("VORPD", P_66, M_0F, WIG, 0x56); 1254 public static final VexRVMOp VXORPS = new VexRVMOp("VXORPS", P_, M_0F, WIG, 0x57); 1255 public static final VexRVMOp VXORPD = new VexRVMOp("VXORPD", P_66, M_0F, WIG, 0x57); 1256 public static final VexRVMOp VADDPS = new VexRVMOp("VADDPS", P_, M_0F, WIG, 0x58); 1257 public static final VexRVMOp VADDPD = new VexRVMOp("VADDPD", P_66, M_0F, WIG, 0x58); 1258 public static final VexRVMOp VADDSS = new VexRVMOp("VADDSS", P_F3, M_0F, WIG, 0x58); 1259 public static final VexRVMOp VADDSD = new VexRVMOp("VADDSD", P_F2, M_0F, WIG, 0x58); 1260 public static final VexRVMOp VMULPS = new VexRVMOp("VMULPS", P_, M_0F, WIG, 0x59); 1261 public static final VexRVMOp VMULPD = new VexRVMOp("VMULPD", P_66, M_0F, WIG, 0x59); 1262 public static final VexRVMOp VMULSS = new VexRVMOp("VMULSS", P_F3, M_0F, WIG, 0x59); 1263 public static final VexRVMOp VMULSD = new VexRVMOp("VMULSD", P_F2, M_0F, WIG, 0x59); 1264 public static final VexRVMOp VSUBPS = new VexRVMOp("VSUBPS", P_, M_0F, WIG, 0x5C); 1265 public static final VexRVMOp VSUBPD = new VexRVMOp("VSUBPD", P_66, M_0F, WIG, 0x5C); 1266 public static final VexRVMOp VSUBSS = new VexRVMOp("VSUBSS", P_F3, M_0F, WIG, 0x5C); 1267 public static final VexRVMOp VSUBSD = new VexRVMOp("VSUBSD", P_F2, M_0F, WIG, 0x5C); 1268 public static final VexRVMOp VMINPS = new VexRVMOp("VMINPS", P_, M_0F, WIG, 0x5D); 1269 public static final VexRVMOp VMINPD = new VexRVMOp("VMINPD", P_66, M_0F, WIG, 0x5D); 1270 public static final VexRVMOp VMINSS = new VexRVMOp("VMINSS", P_F3, M_0F, WIG, 0x5D); 1271 public static final VexRVMOp VMINSD = new VexRVMOp("VMINSD", P_F2, M_0F, WIG, 0x5D); 1272 public static final VexRVMOp VDIVPS = new VexRVMOp("VDIVPS", P_, M_0F, WIG, 0x5E); 1273 public static final VexRVMOp VDIVPD = new VexRVMOp("VDIVPD", P_66, M_0F, WIG, 0x5E); 1274 public static final VexRVMOp VDIVSS = new VexRVMOp("VDIVPS", P_F3, M_0F, WIG, 0x5E); 1275 public static final VexRVMOp VDIVSD = new VexRVMOp("VDIVPD", P_F2, M_0F, WIG, 0x5E); 1276 public static final VexRVMOp VMAXPS = new VexRVMOp("VMAXPS", P_, M_0F, WIG, 0x5F); 1277 public static final VexRVMOp VMAXPD = new VexRVMOp("VMAXPD", P_66, M_0F, WIG, 0x5F); 1278 public static final VexRVMOp VMAXSS = new VexRVMOp("VMAXSS", P_F3, M_0F, WIG, 0x5F); 1279 public static final VexRVMOp VMAXSD = new VexRVMOp("VMAXSD", P_F2, M_0F, WIG, 0x5F); 1280 public static final VexRVMOp VADDSUBPS = new VexRVMOp("VADDSUBPS", P_F2, M_0F, WIG, 0xD0); 1281 public static final VexRVMOp VADDSUBPD = new VexRVMOp("VADDSUBPD", P_66, M_0F, WIG, 0xD0); 1282 public static final VexRVMOp VPAND = new VexRVMOp("VPAND", P_66, M_0F, WIG, 0xDB, VEXOpAssertion.AVX1_2); 1283 public static final VexRVMOp VPOR = new VexRVMOp("VPOR", P_66, M_0F, WIG, 0xEB, VEXOpAssertion.AVX1_2); 1284 public static final VexRVMOp VPXOR = new VexRVMOp("VPXOR", P_66, M_0F, WIG, 0xEF, VEXOpAssertion.AVX1_2); 1285 public static final VexRVMOp VPADDB = new VexRVMOp("VPADDB", P_66, M_0F, WIG, 0xFC, VEXOpAssertion.AVX1_2); 1286 public static final VexRVMOp VPADDW = new VexRVMOp("VPADDW", P_66, M_0F, WIG, 0xFD, VEXOpAssertion.AVX1_2); 1287 public static final VexRVMOp VPADDD = new VexRVMOp("VPADDD", P_66, M_0F, WIG, 0xFE, VEXOpAssertion.AVX1_2); 1288 public static final VexRVMOp VPADDQ = new VexRVMOp("VPADDQ", P_66, M_0F, WIG, 0xD4, VEXOpAssertion.AVX1_2); 1289 public static final VexRVMOp VPMULHUW = new VexRVMOp("VPMULHUW", P_66, M_0F, WIG, 0xE4, VEXOpAssertion.AVX1_2); 1290 public static final VexRVMOp VPMULHW = new VexRVMOp("VPMULHW", P_66, M_0F, WIG, 0xE5, VEXOpAssertion.AVX1_2); 1291 public static final VexRVMOp VPMULLW = new VexRVMOp("VPMULLW", P_66, M_0F, WIG, 0xD5, VEXOpAssertion.AVX1_2); 1292 public static final VexRVMOp VPMULLD = new VexRVMOp("VPMULLD", P_66, M_0F38, WIG, 0x40, VEXOpAssertion.AVX1_2); 1293 public static final VexRVMOp VPSUBB = new VexRVMOp("VPSUBB", P_66, M_0F, WIG, 0xF8, VEXOpAssertion.AVX1_2); 1294 public static final VexRVMOp VPSUBW = new VexRVMOp("VPSUBW", P_66, M_0F, WIG, 0xF9, VEXOpAssertion.AVX1_2); 1295 public static final VexRVMOp VPSUBD = new VexRVMOp("VPSUBD", P_66, M_0F, WIG, 0xFA, VEXOpAssertion.AVX1_2); 1296 public static final VexRVMOp VPSUBQ = new VexRVMOp("VPSUBQ", P_66, M_0F, WIG, 0xFB, VEXOpAssertion.AVX1_2); 1297 public static final VexRVMOp VPSHUFB = new VexRVMOp("VPSHUFB", P_66, M_0F38, WIG, 0x00, VEXOpAssertion.AVX1_2); 1298 public static final VexRVMOp VCVTSD2SS = new VexRVMOp("VCVTSD2SS", P_F2, M_0F, WIG, 0x5A); 1299 public static final VexRVMOp VCVTSS2SD = new VexRVMOp("VCVTSS2SD", P_F3, M_0F, WIG, 0x5A); 1300 public static final VexRVMOp VCVTSI2SD = new VexRVMOp("VCVTSI2SD", P_F2, M_0F, W0, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1301 public static final VexRVMOp VCVTSQ2SD = new VexRVMOp("VCVTSQ2SD", P_F2, M_0F, W1, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1302 public static final VexRVMOp VCVTSI2SS = new VexRVMOp("VCVTSI2SS", P_F3, M_0F, W0, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1303 public static final VexRVMOp VCVTSQ2SS = new VexRVMOp("VCVTSQ2SS", P_F3, M_0F, W1, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1304 public static final VexRVMOp VPCMPEQB = new VexRVMOp("VPCMPEQB", P_66, M_0F, WIG, 0x74, VEXOpAssertion.AVX1_2); 1305 public static final VexRVMOp VPCMPEQW = new VexRVMOp("VPCMPEQW", P_66, M_0F, WIG, 0x75, VEXOpAssertion.AVX1_2); 1306 public static final VexRVMOp VPCMPEQD = new VexRVMOp("VPCMPEQD", P_66, M_0F, WIG, 0x76, VEXOpAssertion.AVX1_2); 1307 public static final VexRVMOp VPCMPEQQ = new VexRVMOp("VPCMPEQQ", P_66, M_0F38, WIG, 0x29, VEXOpAssertion.AVX1_2); 1308 public static final VexRVMOp VPCMPGTB = new VexRVMOp("VPCMPGTB", P_66, M_0F, WIG, 0x64, VEXOpAssertion.AVX1_2); 1309 public static final VexRVMOp VPCMPGTW = new VexRVMOp("VPCMPGTW", P_66, M_0F, WIG, 0x65, VEXOpAssertion.AVX1_2); 1310 public static final VexRVMOp VPCMPGTD = new VexRVMOp("VPCMPGTD", P_66, M_0F, WIG, 0x66, VEXOpAssertion.AVX1_2); 1311 public static final VexRVMOp VPCMPGTQ = new VexRVMOp("VPCMPGTQ", P_66, M_0F38, WIG, 0x37, VEXOpAssertion.AVX1_2); 1312 public static final VexRVMOp VFMADD231SS = new VexRVMOp("VFMADD231SS", P_66, M_0F38, W0, 0xB9, VEXOpAssertion.FMA); 1313 public static final VexRVMOp VFMADD231SD = new VexRVMOp("VFMADD231SD", P_66, M_0F38, W1, 0xB9, VEXOpAssertion.FMA); 1314 // @formatter:on 1315 1316 private VexRVMOp(String opcode, int pp, int mmmmm, int w, int op) { 1317 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1318 } 1319 1320 protected VexRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1321 super(opcode, pp, mmmmm, w, op, assertion); 1322 } 1323 1324 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { 1325 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); 1326 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1327 asm.emitByte(op); 1328 asm.emitModRM(dst, src2); 1329 } 1330 1331 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) { 1332 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); 1333 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1334 asm.emitByte(op); 1335 asm.emitOperandHelper(dst, src2, 0); 1336 } 1337 } 1338 1339 public static final class VexGeneralPurposeRVMOp extends VexRVMOp { 1340 // @formatter:off 1341 public static final VexGeneralPurposeRVMOp ANDN = new VexGeneralPurposeRVMOp("ANDN", P_, M_0F38, WIG, 0xF2, VEXOpAssertion.BMI1); 1342 public static final VexGeneralPurposeRVMOp MULX = new VexGeneralPurposeRVMOp("MULX", P_F2, M_0F38, WIG, 0xF6, VEXOpAssertion.BMI2); 1343 public static final VexGeneralPurposeRVMOp PDEP = new VexGeneralPurposeRVMOp("PDEP", P_F2, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); 1344 public static final VexGeneralPurposeRVMOp PEXT = new VexGeneralPurposeRVMOp("PEXT", P_F3, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); 1345 // @formatter:on 1346 1347 private VexGeneralPurposeRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1348 super(opcode, pp, mmmmm, w, op, assertion); 1349 } 1350 1351 @Override 1352 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { 1353 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, src2, null); 1354 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1355 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1356 asm.emitByte(op); 1357 asm.emitModRM(dst, src2); 1358 } 1359 1360 @Override 1361 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) { 1362 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, null, null); 1363 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1364 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1365 asm.emitByte(op); 1366 asm.emitOperandHelper(dst, src2, 0); 1367 } 1368 } 1369 1370 public static final class VexGeneralPurposeRMVOp extends VexOp { 1371 // @formatter:off 1372 public static final VexGeneralPurposeRMVOp BEXTR = new VexGeneralPurposeRMVOp("BEXTR", P_, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI1); 1373 public static final VexGeneralPurposeRMVOp BZHI = new VexGeneralPurposeRMVOp("BZHI", P_, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); 1374 public static final VexGeneralPurposeRMVOp SARX = new VexGeneralPurposeRMVOp("SARX", P_F3, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2); 1375 public static final VexGeneralPurposeRMVOp SHRX = new VexGeneralPurposeRMVOp("SHRX", P_F2, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2); 1376 public static final VexGeneralPurposeRMVOp SHLX = new VexGeneralPurposeRMVOp("SHLX", P_66, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2); 1377 // @formatter:on 1378 1379 private VexGeneralPurposeRMVOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1380 super(opcode, pp, mmmmm, w, op, assertion); 1381 } 1382 1383 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { 1384 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, src1, null); 1385 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1386 asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1387 asm.emitByte(op); 1388 asm.emitModRM(dst, src1); 1389 } 1390 1391 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src1, Register src2) { 1392 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, null, null); 1393 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1394 asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1395 asm.emitByte(op); 1396 asm.emitOperandHelper(dst, src1, 0); 1397 } 1398 } 1399 1400 public static final class VexGeneralPurposeRMOp extends VexRMOp { 1401 // @formatter:off 1402 public static final VexGeneralPurposeRMOp BLSI = new VexGeneralPurposeRMOp("BLSI", P_, M_0F38, WIG, 0xF3, 3, VEXOpAssertion.BMI1); 1403 public static final VexGeneralPurposeRMOp BLSMSK = new VexGeneralPurposeRMOp("BLSMSK", P_, M_0F38, WIG, 0xF3, 2, VEXOpAssertion.BMI1); 1404 public static final VexGeneralPurposeRMOp BLSR = new VexGeneralPurposeRMOp("BLSR", P_, M_0F38, WIG, 0xF3, 1, VEXOpAssertion.BMI1); 1405 // @formatter:on 1406 private final int ext; 1407 1408 private VexGeneralPurposeRMOp(String opcode, int pp, int mmmmm, int w, int op, int ext, VEXOpAssertion assertion) { 1409 super(opcode, pp, mmmmm, w, op, assertion); 1410 this.ext = ext; 1411 } 1412 1413 @Override 1414 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) { 1415 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1416 asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1417 asm.emitByte(op); 1418 asm.emitModRM(ext, src); 1419 } 1420 1421 @Override 1422 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) { 1423 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1424 asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1425 asm.emitByte(op); 1426 asm.emitOperandHelper(ext, src, 0); 1427 } 1428 } 1429 1430 /** 1431 * VEX-encoded shift instructions with an operand order of either RVM or VMI. 1432 */ 1433 public static final class VexShiftOp extends VexRVMOp implements VexRRIOp { 1434 // @formatter:off 1435 public static final VexShiftOp VPSRLW = new VexShiftOp("VPSRLW", P_66, M_0F, WIG, 0xD1, 0x71, 2); 1436 public static final VexShiftOp VPSRLD = new VexShiftOp("VPSRLD", P_66, M_0F, WIG, 0xD2, 0x72, 2); 1437 public static final VexShiftOp VPSRLQ = new VexShiftOp("VPSRLQ", P_66, M_0F, WIG, 0xD3, 0x73, 2); 1438 public static final VexShiftOp VPSRAW = new VexShiftOp("VPSRAW", P_66, M_0F, WIG, 0xE1, 0x71, 4); 1439 public static final VexShiftOp VPSRAD = new VexShiftOp("VPSRAD", P_66, M_0F, WIG, 0xE2, 0x72, 4); 1440 public static final VexShiftOp VPSLLW = new VexShiftOp("VPSLLW", P_66, M_0F, WIG, 0xF1, 0x71, 6); 1441 public static final VexShiftOp VPSLLD = new VexShiftOp("VPSLLD", P_66, M_0F, WIG, 0xF2, 0x72, 6); 1442 public static final VexShiftOp VPSLLQ = new VexShiftOp("VPSLLQ", P_66, M_0F, WIG, 0xF3, 0x73, 6); 1443 // @formatter:on 1444 1445 private final int immOp; 1446 private final int r; 1447 1448 private VexShiftOp(String opcode, int pp, int mmmmm, int w, int op, int immOp, int r) { 1449 super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1_2); 1450 this.immOp = immOp; 1451 this.r = r; 1452 } 1453 1454 @Override 1455 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { 1456 assert assertion.check((AMD64) asm.target.arch, size, null, dst, src); 1457 asm.vexPrefix(null, dst, src, size, pp, mmmmm, w, false); 1458 asm.emitByte(immOp); 1459 asm.emitModRM(r, src); 1460 asm.emitByte(imm8); 1461 } 1462 } 1463 1464 public static final class VexMaskMoveOp extends VexOp { 1465 // @formatter:off 1466 public static final VexMaskMoveOp VMASKMOVPS = new VexMaskMoveOp("VMASKMOVPS", P_66, M_0F38, W0, 0x2C, 0x2E); 1467 public static final VexMaskMoveOp VMASKMOVPD = new VexMaskMoveOp("VMASKMOVPD", P_66, M_0F38, W0, 0x2D, 0x2F); 1468 public static final VexMaskMoveOp VPMASKMOVD = new VexMaskMoveOp("VPMASKMOVD", P_66, M_0F38, W0, 0x8C, 0x8E, VEXOpAssertion.AVX2); 1469 public static final VexMaskMoveOp VPMASKMOVQ = new VexMaskMoveOp("VPMASKMOVQ", P_66, M_0F38, W1, 0x8C, 0x8E, VEXOpAssertion.AVX2); 1470 // @formatter:on 1471 1472 private final int opReverse; 1473 1474 private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) { 1475 this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1); 1476 } 1477 1478 private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) { 1479 super(opcode, pp, mmmmm, w, op, assertion); 1480 this.opReverse = opReverse; 1481 } 1482 1483 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, AMD64Address src) { 1484 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, null); 1485 asm.vexPrefix(dst, mask, src, size, pp, mmmmm, w, false); 1486 asm.emitByte(op); 1487 asm.emitOperandHelper(dst, src, 0); 1488 } 1489 1490 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register mask, Register src) { 1491 assert assertion.check((AMD64) asm.target.arch, size, src, mask, null); 1492 asm.vexPrefix(src, mask, dst, size, pp, mmmmm, w, false); 1493 asm.emitByte(opReverse); 1494 asm.emitOperandHelper(src, dst, 0); 1495 } 1496 } 1497 1498 /** 1499 * VEX-encoded instructions with an operand order of RVMI. 1500 */ 1501 public static final class VexRVMIOp extends VexOp { 1502 // @formatter:off 1503 public static final VexRVMIOp VSHUFPS = new VexRVMIOp("VSHUFPS", P_, M_0F, WIG, 0xC6); 1504 public static final VexRVMIOp VSHUFPD = new VexRVMIOp("VSHUFPD", P_66, M_0F, WIG, 0xC6); 1505 public static final VexRVMIOp VINSERTF128 = new VexRVMIOp("VINSERTF128", P_66, M_0F3A, W0, 0x18, VEXOpAssertion.AVX1_256ONLY); 1506 public static final VexRVMIOp VINSERTI128 = new VexRVMIOp("VINSERTI128", P_66, M_0F3A, W0, 0x38, VEXOpAssertion.AVX2_256ONLY); 1507 // @formatter:on 1508 1509 private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op) { 1510 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1511 } 1512 1513 private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1514 super(opcode, pp, mmmmm, w, op, assertion); 1515 } 1516 1517 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, int imm8) { 1518 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); 1519 assert (imm8 & 0xFF) == imm8; 1520 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1521 asm.emitByte(op); 1522 asm.emitModRM(dst, src2); 1523 asm.emitByte(imm8); 1524 } 1525 1526 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, int imm8) { 1527 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); 1528 assert (imm8 & 0xFF) == imm8; 1529 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1530 asm.emitByte(op); 1531 asm.emitOperandHelper(dst, src2, 1); 1532 asm.emitByte(imm8); 1533 } 1534 } 1535 1536 /** 1537 * VEX-encoded comparison operation with an operand order of RVMI. The immediate operand is a 1538 * comparison operator. 1539 */ 1540 public static final class VexFloatCompareOp extends VexOp { 1541 // @formatter:off 1542 public static final VexFloatCompareOp VCMPPS = new VexFloatCompareOp("VCMPPS", P_, M_0F, WIG, 0xC2); 1543 public static final VexFloatCompareOp VCMPPD = new VexFloatCompareOp("VCMPPD", P_66, M_0F, WIG, 0xC2); 1544 public static final VexFloatCompareOp VCMPSS = new VexFloatCompareOp("VCMPSS", P_F2, M_0F, WIG, 0xC2); 1545 public static final VexFloatCompareOp VCMPSD = new VexFloatCompareOp("VCMPSD", P_F2, M_0F, WIG, 0xC2); 1546 // @formatter:on 1547 1548 public enum Predicate { 1549 EQ_OQ(0x00), 1550 LT_OS(0x01), 1551 LE_OS(0x02), 1552 UNORD_Q(0x03), 1553 NEQ_UQ(0x04), 1554 NLT_US(0x05), 1555 NLE_US(0x06), 1556 ORD_Q(0x07), 1557 EQ_UQ(0x08), 1558 NGE_US(0x09), 1559 NGT_US(0x0a), 1560 FALSE_OQ(0x0b), 1561 NEQ_OQ(0x0c), 1562 GE_OS(0x0d), 1563 GT_OS(0x0e), 1564 TRUE_UQ(0x0f), 1565 EQ_OS(0x10), 1566 LT_OQ(0x11), 1567 LE_OQ(0x12), 1568 UNORD_S(0x13), 1569 NEQ_US(0x14), 1570 NLT_UQ(0x15), 1571 NLE_UQ(0x16), 1572 ORD_S(0x17), 1573 EQ_US(0x18), 1574 NGE_UQ(0x19), 1575 NGT_UQ(0x1a), 1576 FALSE_OS(0x1b), 1577 NEQ_OS(0x1c), 1578 GE_OQ(0x1d), 1579 GT_OQ(0x1e), 1580 TRUE_US(0x1f); 1581 1582 private int imm8; 1583 1584 Predicate(int imm8) { 1585 this.imm8 = imm8; 1586 } 1587 1588 public static Predicate getPredicate(Condition condition, boolean unorderedIsTrue) { 1589 if (unorderedIsTrue) { 1590 switch (condition) { 1591 case EQ: 1592 return EQ_UQ; 1593 case NE: 1594 return NEQ_UQ; 1595 case LT: 1596 return NGE_UQ; 1597 case LE: 1598 return NGT_UQ; 1599 case GT: 1600 return NLE_UQ; 1601 case GE: 1602 return NLT_UQ; 1603 default: 1604 throw GraalError.shouldNotReachHere(); 1605 } 1606 } else { 1607 switch (condition) { 1608 case EQ: 1609 return EQ_OQ; 1610 case NE: 1611 return NEQ_OQ; 1612 case LT: 1613 return LT_OQ; 1614 case LE: 1615 return LE_OQ; 1616 case GT: 1617 return GT_OQ; 1618 case GE: 1619 return GE_OQ; 1620 default: 1621 throw GraalError.shouldNotReachHere(); 1622 } 1623 } 1624 } 1625 } 1626 1627 private VexFloatCompareOp(String opcode, int pp, int mmmmm, int w, int op) { 1628 super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1629 } 1630 1631 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, Predicate p) { 1632 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); 1633 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1634 asm.emitByte(op); 1635 asm.emitModRM(dst, src2); 1636 asm.emitByte(p.imm8); 1637 } 1638 1639 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, Predicate p) { 1640 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); 1641 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1642 asm.emitByte(op); 1643 asm.emitOperandHelper(dst, src2, 1); 1644 asm.emitByte(p.imm8); 1645 } 1646 } 1647 1648 public final void addl(AMD64Address dst, int imm32) { 1649 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1650 } 1651 1652 public final void addl(Register dst, int imm32) { 1653 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1654 } 1655 1656 public final void addl(Register dst, Register src) { 1657 ADD.rmOp.emit(this, DWORD, dst, src); 1658 } 1659 1660 public final void addpd(Register dst, Register src) { 1661 SSEOp.ADD.emit(this, PD, dst, src); 1662 } 1663 1664 public final void addpd(Register dst, AMD64Address src) { 1665 SSEOp.ADD.emit(this, PD, dst, src); 1666 } 1667 1668 public final void addsd(Register dst, Register src) { 1669 SSEOp.ADD.emit(this, SD, dst, src); 1670 } 1671 1672 public final void addsd(Register dst, AMD64Address src) { 1673 SSEOp.ADD.emit(this, SD, dst, src); 1674 } 1675 1676 private void addrNop4() { 1677 // 4 bytes: NOP DWORD PTR [EAX+0] 1678 emitByte(0x0F); 1679 emitByte(0x1F); 1680 emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc); 1681 emitByte(0); // 8-bits offset (1 byte) 1682 } 1683 1684 private void addrNop5() { 1685 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 1686 emitByte(0x0F); 1687 emitByte(0x1F); 1688 emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4); 1689 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 1690 emitByte(0); // 8-bits offset (1 byte) 1691 } 1692 1693 private void addrNop7() { 1694 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 1695 emitByte(0x0F); 1696 emitByte(0x1F); 1697 emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc); 1698 emitInt(0); // 32-bits offset (4 bytes) 1699 } 1700 1701 private void addrNop8() { 1702 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 1703 emitByte(0x0F); 1704 emitByte(0x1F); 1705 emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4); 1706 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 1707 emitInt(0); // 32-bits offset (4 bytes) 1708 } 1709 1710 public final void andl(Register dst, int imm32) { 1711 AND.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1712 } 1713 1714 public final void andl(Register dst, Register src) { 1715 AND.rmOp.emit(this, DWORD, dst, src); 1716 } 1717 1718 public final void andpd(Register dst, Register src) { 1719 SSEOp.AND.emit(this, PD, dst, src); 1720 } 1721 1722 public final void andpd(Register dst, AMD64Address src) { 1723 SSEOp.AND.emit(this, PD, dst, src); 1724 } 1725 1726 public final void bsfq(Register dst, Register src) { 1727 prefixq(dst, src); 1728 emitByte(0x0F); 1729 emitByte(0xBC); 1730 emitModRM(dst, src); 1731 } 1732 1733 public final void bsrl(Register dst, Register src) { 1734 prefix(dst, src); 1735 emitByte(0x0F); 1736 emitByte(0xBD); 1737 emitModRM(dst, src); 1738 } 1739 1740 public final void bswapl(Register reg) { 1741 prefix(reg); 1742 emitByte(0x0F); 1743 emitModRM(1, reg); 1744 } 1745 1746 public final void cdql() { 1747 emitByte(0x99); 1748 } 1749 1750 public final void cmovl(ConditionFlag cc, Register dst, Register src) { 1751 prefix(dst, src); 1752 emitByte(0x0F); 1753 emitByte(0x40 | cc.getValue()); 1754 emitModRM(dst, src); 1755 } 1756 1757 public final void cmovl(ConditionFlag cc, Register dst, AMD64Address src) { 1758 prefix(src, dst); 1759 emitByte(0x0F); 1760 emitByte(0x40 | cc.getValue()); 1761 emitOperandHelper(dst, src, 0); 1762 } 1763 1764 public final void cmpb(Register dst, Register src) { 1765 CMP.byteRmOp.emit(this, BYTE, dst, src); 1766 } 1767 1768 public final void cmpw(Register dst, Register src) { 1769 CMP.rmOp.emit(this, WORD, dst, src); 1770 } 1771 1772 public final void cmpl(Register dst, int imm32) { 1773 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1774 } 1775 1776 public final void cmpl(Register dst, Register src) { 1777 CMP.rmOp.emit(this, DWORD, dst, src); 1778 } 1779 1780 public final void cmpl(Register dst, AMD64Address src) { 1781 CMP.rmOp.emit(this, DWORD, dst, src); 1782 } 1783 1784 public final void cmpl(AMD64Address dst, int imm32) { 1785 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1786 } 1787 1788 /** 1789 * The 8-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg into 1790 * adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the compared 1791 * values were equal, and cleared otherwise. 1792 */ 1793 public final void cmpxchgb(Register reg, AMD64Address adr) { // cmpxchg 1794 prefixb(adr, reg); 1795 emitByte(0x0F); 1796 emitByte(0xB0); 1797 emitOperandHelper(reg, adr, 0); 1798 } 1799 1800 /** 1801 * The 16-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg 1802 * into adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the 1803 * compared values were equal, and cleared otherwise. 1804 */ 1805 public final void cmpxchgw(Register reg, AMD64Address adr) { // cmpxchg 1806 emitByte(0x66); // Switch to 16-bit mode. 1807 prefix(adr, reg); 1808 emitByte(0x0F); 1809 emitByte(0xB1); 1810 emitOperandHelper(reg, adr, 0); 1811 } 1812 1813 /** 1814 * The 32-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg 1815 * into adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the 1816 * compared values were equal, and cleared otherwise. 1817 */ 1818 public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg 1819 prefix(adr, reg); 1820 emitByte(0x0F); 1821 emitByte(0xB1); 1822 emitOperandHelper(reg, adr, 0); 1823 } 1824 1825 public final void cvtsi2sdl(Register dst, Register src) { 1826 SSEOp.CVTSI2SD.emit(this, DWORD, dst, src); 1827 } 1828 1829 public final void cvttsd2sil(Register dst, Register src) { 1830 SSEOp.CVTTSD2SI.emit(this, DWORD, dst, src); 1831 } 1832 1833 public final void decl(AMD64Address dst) { 1834 prefix(dst); 1835 emitByte(0xFF); 1836 emitOperandHelper(1, dst, 0); 1837 } 1838 1839 public final void divsd(Register dst, Register src) { 1840 SSEOp.DIV.emit(this, SD, dst, src); 1841 } 1842 1843 public final void hlt() { 1844 emitByte(0xF4); 1845 } 1846 1847 public final void imull(Register dst, Register src, int value) { 1848 if (isByte(value)) { 1849 AMD64RMIOp.IMUL_SX.emit(this, DWORD, dst, src, value); 1850 } else { 1851 AMD64RMIOp.IMUL.emit(this, DWORD, dst, src, value); 1852 } 1853 } 1854 1855 public final void incl(AMD64Address dst) { 1856 prefix(dst); 1857 emitByte(0xFF); 1858 emitOperandHelper(0, dst, 0); 1859 } 1860 1861 public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) { 1862 int shortSize = 2; 1863 int longSize = 6; 1864 long disp = jumpTarget - position(); 1865 if (!forceDisp32 && isByte(disp - shortSize)) { 1866 // 0111 tttn #8-bit disp 1867 emitByte(0x70 | cc.getValue()); 1868 emitByte((int) ((disp - shortSize) & 0xFF)); 1869 } else { 1870 // 0000 1111 1000 tttn #32-bit disp 1871 assert isInt(disp - longSize) : "must be 32bit offset (call4)"; 1872 emitByte(0x0F); 1873 emitByte(0x80 | cc.getValue()); 1874 emitInt((int) (disp - longSize)); 1875 } 1876 } 1877 1878 public final void jcc(ConditionFlag cc, Label l) { 1879 assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc"; 1880 if (l.isBound()) { 1881 jcc(cc, l.position(), false); 1882 } else { 1883 // Note: could eliminate cond. jumps to this jump if condition 1884 // is the same however, seems to be rather unlikely case. 1885 // Note: use jccb() if label to be bound is very close to get 1886 // an 8-bit displacement 1887 l.addPatchAt(position(), this); 1888 emitByte(0x0F); 1889 emitByte(0x80 | cc.getValue()); 1890 emitInt(0); 1891 } 1892 1893 } 1894 1895 public final void jccb(ConditionFlag cc, Label l) { 1896 if (l.isBound()) { 1897 int shortSize = 2; 1898 int entry = l.position(); 1899 assert isByte(entry - (position() + shortSize)) : "Dispacement too large for a short jmp"; 1900 long disp = entry - position(); 1901 // 0111 tttn #8-bit disp 1902 emitByte(0x70 | cc.getValue()); 1903 emitByte((int) ((disp - shortSize) & 0xFF)); 1904 } else { 1905 l.addPatchAt(position(), this); 1906 emitByte(0x70 | cc.getValue()); 1907 emitByte(0); 1908 } 1909 } 1910 1911 public final void jmp(int jumpTarget, boolean forceDisp32) { 1912 int shortSize = 2; 1913 int longSize = 5; 1914 long disp = jumpTarget - position(); 1915 if (!forceDisp32 && isByte(disp - shortSize)) { 1916 emitByte(0xEB); 1917 emitByte((int) ((disp - shortSize) & 0xFF)); 1918 } else { 1919 emitByte(0xE9); 1920 emitInt((int) (disp - longSize)); 1921 } 1922 } 1923 1924 @Override 1925 public final void jmp(Label l) { 1926 if (l.isBound()) { 1927 jmp(l.position(), false); 1928 } else { 1929 // By default, forward jumps are always 32-bit displacements, since 1930 // we can't yet know where the label will be bound. If you're sure that 1931 // the forward jump will not run beyond 256 bytes, use jmpb to 1932 // force an 8-bit displacement. 1933 1934 l.addPatchAt(position(), this); 1935 emitByte(0xE9); 1936 emitInt(0); 1937 } 1938 } 1939 1940 public final void jmp(Register entry) { 1941 prefix(entry); 1942 emitByte(0xFF); 1943 emitModRM(4, entry); 1944 } 1945 1946 public final void jmp(AMD64Address adr) { 1947 prefix(adr); 1948 emitByte(0xFF); 1949 emitOperandHelper(AMD64.rsp, adr, 0); 1950 } 1951 1952 public final void jmpb(Label l) { 1953 if (l.isBound()) { 1954 int shortSize = 2; 1955 // Displacement is relative to byte just after jmpb instruction 1956 int displacement = l.position() - position() - shortSize; 1957 GraalError.guarantee(isByte(displacement), "Displacement too large to be encoded as a byte: %d", displacement); 1958 emitByte(0xEB); 1959 emitByte(displacement & 0xFF); 1960 } else { 1961 l.addPatchAt(position(), this); 1962 emitByte(0xEB); 1963 emitByte(0); 1964 } 1965 } 1966 1967 public final void lead(Register dst, AMD64Address src) { 1968 prefix(src, dst); 1969 emitByte(0x8D); 1970 emitOperandHelper(dst, src, 0); 1971 } 1972 1973 public final void leaq(Register dst, AMD64Address src) { 1974 prefixq(src, dst); 1975 emitByte(0x8D); 1976 emitOperandHelper(dst, src, 0); 1977 } 1978 1979 public final void leave() { 1980 emitByte(0xC9); 1981 } 1982 1983 public final void lock() { 1984 emitByte(0xF0); 1985 } 1986 1987 public final void movapd(Register dst, Register src) { 1988 assert inRC(XMM, dst) && inRC(XMM, src); 1989 simdPrefix(dst, Register.None, src, PD, P_0F, false); 1990 emitByte(0x28); 1991 emitModRM(dst, src); 1992 } 1993 1994 public final void movaps(Register dst, Register src) { 1995 assert inRC(XMM, dst) && inRC(XMM, src); 1996 simdPrefix(dst, Register.None, src, PS, P_0F, false); 1997 emitByte(0x28); 1998 emitModRM(dst, src); 1999 } 2000 2001 public final void movb(AMD64Address dst, int imm8) { 2002 prefix(dst); 2003 emitByte(0xC6); 2004 emitOperandHelper(0, dst, 1); 2005 emitByte(imm8); 2006 } 2007 2008 public final void movb(AMD64Address dst, Register src) { 2009 assert inRC(CPU, src) : "must have byte register"; 2010 prefixb(dst, src); 2011 emitByte(0x88); 2012 emitOperandHelper(src, dst, 0); 2013 } 2014 2015 public final void movl(Register dst, int imm32) { 2016 movl(dst, imm32, false); 2017 } 2018 2019 public final void movl(Register dst, int imm32, boolean annotateImm) { 2020 int insnPos = position(); 2021 prefix(dst); 2022 emitByte(0xB8 + encode(dst)); 2023 int immPos = position(); 2024 emitInt(imm32); 2025 int nextInsnPos = position(); 2026 if (annotateImm && codePatchingAnnotationConsumer != null) { 2027 codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 2028 } 2029 } 2030 2031 public final void movl(Register dst, Register src) { 2032 prefix(dst, src); 2033 emitByte(0x8B); 2034 emitModRM(dst, src); 2035 } 2036 2037 public final void movl(Register dst, AMD64Address src) { 2038 prefix(src, dst); 2039 emitByte(0x8B); 2040 emitOperandHelper(dst, src, 0); 2041 } 2042 2043 /** 2044 * @param wide use 4 byte encoding for displacements that would normally fit in a byte 2045 */ 2046 public final void movl(Register dst, AMD64Address src, boolean wide) { 2047 prefix(src, dst); 2048 emitByte(0x8B); 2049 emitOperandHelper(dst, src, wide, 0); 2050 } 2051 2052 public final void movl(AMD64Address dst, int imm32) { 2053 prefix(dst); 2054 emitByte(0xC7); 2055 emitOperandHelper(0, dst, 4); 2056 emitInt(imm32); 2057 } 2058 2059 public final void movl(AMD64Address dst, Register src) { 2060 prefix(dst, src); 2061 emitByte(0x89); 2062 emitOperandHelper(src, dst, 0); 2063 } 2064 2065 /** 2066 * New CPUs require use of movsd and movss to avoid partial register stall when loading from 2067 * memory. But for old Opteron use movlpd instead of movsd. The selection is done in 2068 * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and 2069 * {@link AMD64MacroAssembler#movflt(Register, Register)}. 2070 */ 2071 public final void movlpd(Register dst, AMD64Address src) { 2072 assert inRC(XMM, dst); 2073 simdPrefix(dst, dst, src, PD, P_0F, false); 2074 emitByte(0x12); 2075 emitOperandHelper(dst, src, 0); 2076 } 2077 2078 public final void movlhps(Register dst, Register src) { 2079 assert inRC(XMM, dst) && inRC(XMM, src); 2080 simdPrefix(dst, src, src, PS, P_0F, false); 2081 emitByte(0x16); 2082 emitModRM(dst, src); 2083 } 2084 2085 public final void movq(Register dst, AMD64Address src) { 2086 movq(dst, src, false); 2087 } 2088 2089 public final void movq(Register dst, AMD64Address src, boolean force4BytesDisplacement) { 2090 if (inRC(XMM, dst)) { 2091 // Insn: MOVQ xmm, r/m64 2092 // Code: F3 0F 7E /r 2093 // An alternative instruction would be 66 REX.W 0F 6E /r. We prefer the REX.W free 2094 // format, because it would allow us to emit 2-bytes-prefixed vex-encoding instruction 2095 // when applicable. 2096 simdPrefix(dst, Register.None, src, SS, P_0F, false); 2097 emitByte(0x7E); 2098 emitOperandHelper(dst, src, force4BytesDisplacement, 0); 2099 } else { 2100 // gpr version of movq 2101 prefixq(src, dst); 2102 emitByte(0x8B); 2103 emitOperandHelper(dst, src, force4BytesDisplacement, 0); 2104 } 2105 } 2106 2107 public final void movq(Register dst, Register src) { 2108 assert inRC(CPU, dst) && inRC(CPU, src); 2109 prefixq(dst, src); 2110 emitByte(0x8B); 2111 emitModRM(dst, src); 2112 } 2113 2114 public final void movq(AMD64Address dst, Register src) { 2115 if (inRC(XMM, src)) { 2116 // Insn: MOVQ r/m64, xmm 2117 // Code: 66 0F D6 /r 2118 // An alternative instruction would be 66 REX.W 0F 7E /r. We prefer the REX.W free 2119 // format, because it would allow us to emit 2-bytes-prefixed vex-encoding instruction 2120 // when applicable. 2121 simdPrefix(src, Register.None, dst, PD, P_0F, false); 2122 emitByte(0xD6); 2123 emitOperandHelper(src, dst, 0); 2124 } else { 2125 // gpr version of movq 2126 prefixq(dst, src); 2127 emitByte(0x89); 2128 emitOperandHelper(src, dst, 0); 2129 } 2130 } 2131 2132 public final void movsbl(Register dst, AMD64Address src) { 2133 prefix(src, dst); 2134 emitByte(0x0F); 2135 emitByte(0xBE); 2136 emitOperandHelper(dst, src, 0); 2137 } 2138 2139 public final void movsbl(Register dst, Register src) { 2140 prefix(dst, false, src, true); 2141 emitByte(0x0F); 2142 emitByte(0xBE); 2143 emitModRM(dst, src); 2144 } 2145 2146 public final void movsbq(Register dst, AMD64Address src) { 2147 prefixq(src, dst); 2148 emitByte(0x0F); 2149 emitByte(0xBE); 2150 emitOperandHelper(dst, src, 0); 2151 } 2152 2153 public final void movsbq(Register dst, Register src) { 2154 prefixq(dst, src); 2155 emitByte(0x0F); 2156 emitByte(0xBE); 2157 emitModRM(dst, src); 2158 } 2159 2160 public final void movsd(Register dst, Register src) { 2161 AMD64RMOp.MOVSD.emit(this, SD, dst, src); 2162 } 2163 2164 public final void movsd(Register dst, AMD64Address src) { 2165 AMD64RMOp.MOVSD.emit(this, SD, dst, src); 2166 } 2167 2168 public final void movsd(AMD64Address dst, Register src) { 2169 AMD64MROp.MOVSD.emit(this, SD, dst, src); 2170 } 2171 2172 public final void movss(Register dst, Register src) { 2173 AMD64RMOp.MOVSS.emit(this, SS, dst, src); 2174 } 2175 2176 public final void movss(Register dst, AMD64Address src) { 2177 AMD64RMOp.MOVSS.emit(this, SS, dst, src); 2178 } 2179 2180 public final void movss(AMD64Address dst, Register src) { 2181 AMD64MROp.MOVSS.emit(this, SS, dst, src); 2182 } 2183 2184 public final void mulpd(Register dst, Register src) { 2185 SSEOp.MUL.emit(this, PD, dst, src); 2186 } 2187 2188 public final void mulpd(Register dst, AMD64Address src) { 2189 SSEOp.MUL.emit(this, PD, dst, src); 2190 } 2191 2192 public final void mulsd(Register dst, Register src) { 2193 SSEOp.MUL.emit(this, SD, dst, src); 2194 } 2195 2196 public final void mulsd(Register dst, AMD64Address src) { 2197 SSEOp.MUL.emit(this, SD, dst, src); 2198 } 2199 2200 public final void mulss(Register dst, Register src) { 2201 SSEOp.MUL.emit(this, SS, dst, src); 2202 } 2203 2204 public final void movswl(Register dst, AMD64Address src) { 2205 AMD64RMOp.MOVSX.emit(this, DWORD, dst, src); 2206 } 2207 2208 public final void movswq(Register dst, AMD64Address src) { 2209 AMD64RMOp.MOVSX.emit(this, QWORD, dst, src); 2210 } 2211 2212 public final void movw(AMD64Address dst, int imm16) { 2213 emitByte(0x66); // switch to 16-bit mode 2214 prefix(dst); 2215 emitByte(0xC7); 2216 emitOperandHelper(0, dst, 2); 2217 emitShort(imm16); 2218 } 2219 2220 public final void movw(AMD64Address dst, Register src) { 2221 emitByte(0x66); 2222 prefix(dst, src); 2223 emitByte(0x89); 2224 emitOperandHelper(src, dst, 0); 2225 } 2226 2227 public final void movw(Register dst, AMD64Address src) { 2228 emitByte(0x66); 2229 prefix(src, dst); 2230 emitByte(0x8B); 2231 emitOperandHelper(dst, src, 0); 2232 } 2233 2234 public final void movzbl(Register dst, AMD64Address src) { 2235 prefix(src, dst); 2236 emitByte(0x0F); 2237 emitByte(0xB6); 2238 emitOperandHelper(dst, src, 0); 2239 } 2240 2241 public final void movzbl(Register dst, Register src) { 2242 AMD64RMOp.MOVZXB.emit(this, DWORD, dst, src); 2243 } 2244 2245 public final void movzbq(Register dst, Register src) { 2246 AMD64RMOp.MOVZXB.emit(this, QWORD, dst, src); 2247 } 2248 2249 public final void movzbq(Register dst, AMD64Address src) { 2250 AMD64RMOp.MOVZXB.emit(this, QWORD, dst, src); 2251 } 2252 2253 public final void movzwl(Register dst, AMD64Address src) { 2254 AMD64RMOp.MOVZX.emit(this, DWORD, dst, src); 2255 } 2256 2257 public final void movzwq(Register dst, AMD64Address src) { 2258 AMD64RMOp.MOVZX.emit(this, QWORD, dst, src); 2259 } 2260 2261 public final void negl(Register dst) { 2262 NEG.emit(this, DWORD, dst); 2263 } 2264 2265 public final void notl(Register dst) { 2266 NOT.emit(this, DWORD, dst); 2267 } 2268 2269 public final void notq(Register dst) { 2270 NOT.emit(this, QWORD, dst); 2271 } 2272 2273 @Override 2274 public final void ensureUniquePC() { 2275 nop(); 2276 } 2277 2278 public final void nop() { 2279 nop(1); 2280 } 2281 2282 public void nop(int count) { 2283 int i = count; 2284 if (UseNormalNop) { 2285 assert i > 0 : " "; 2286 // The fancy nops aren't currently recognized by debuggers making it a 2287 // pain to disassemble code while debugging. If assert are on clearly 2288 // speed is not an issue so simply use the single byte traditional nop 2289 // to do alignment. 2290 2291 for (; i > 0; i--) { 2292 emitByte(0x90); 2293 } 2294 return; 2295 } 2296 2297 if (UseAddressNop) { 2298 // 2299 // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD. 2300 // 1: 0x90 2301 // 2: 0x66 0x90 2302 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2303 // 4: 0x0F 0x1F 0x40 0x00 2304 // 5: 0x0F 0x1F 0x44 0x00 0x00 2305 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2306 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2307 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2308 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2309 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2310 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2311 2312 // The rest coding is AMD specific - use consecutive Address nops 2313 2314 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2315 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2316 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2317 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2318 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2319 // Size prefixes (0x66) are added for larger sizes 2320 2321 while (i >= 22) { 2322 i -= 11; 2323 emitByte(0x66); // size prefix 2324 emitByte(0x66); // size prefix 2325 emitByte(0x66); // size prefix 2326 addrNop8(); 2327 } 2328 // Generate first nop for size between 21-12 2329 switch (i) { 2330 case 21: 2331 i -= 11; 2332 emitByte(0x66); // size prefix 2333 emitByte(0x66); // size prefix 2334 emitByte(0x66); // size prefix 2335 addrNop8(); 2336 break; 2337 case 20: 2338 case 19: 2339 i -= 10; 2340 emitByte(0x66); // size prefix 2341 emitByte(0x66); // size prefix 2342 addrNop8(); 2343 break; 2344 case 18: 2345 case 17: 2346 i -= 9; 2347 emitByte(0x66); // size prefix 2348 addrNop8(); 2349 break; 2350 case 16: 2351 case 15: 2352 i -= 8; 2353 addrNop8(); 2354 break; 2355 case 14: 2356 case 13: 2357 i -= 7; 2358 addrNop7(); 2359 break; 2360 case 12: 2361 i -= 6; 2362 emitByte(0x66); // size prefix 2363 addrNop5(); 2364 break; 2365 default: 2366 assert i < 12; 2367 } 2368 2369 // Generate second nop for size between 11-1 2370 switch (i) { 2371 case 11: 2372 emitByte(0x66); // size prefix 2373 emitByte(0x66); // size prefix 2374 emitByte(0x66); // size prefix 2375 addrNop8(); 2376 break; 2377 case 10: 2378 emitByte(0x66); // size prefix 2379 emitByte(0x66); // size prefix 2380 addrNop8(); 2381 break; 2382 case 9: 2383 emitByte(0x66); // size prefix 2384 addrNop8(); 2385 break; 2386 case 8: 2387 addrNop8(); 2388 break; 2389 case 7: 2390 addrNop7(); 2391 break; 2392 case 6: 2393 emitByte(0x66); // size prefix 2394 addrNop5(); 2395 break; 2396 case 5: 2397 addrNop5(); 2398 break; 2399 case 4: 2400 addrNop4(); 2401 break; 2402 case 3: 2403 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2404 emitByte(0x66); // size prefix 2405 emitByte(0x66); // size prefix 2406 emitByte(0x90); // nop 2407 break; 2408 case 2: 2409 emitByte(0x66); // size prefix 2410 emitByte(0x90); // nop 2411 break; 2412 case 1: 2413 emitByte(0x90); // nop 2414 break; 2415 default: 2416 assert i == 0; 2417 } 2418 return; 2419 } 2420 2421 // Using nops with size prefixes "0x66 0x90". 2422 // From AMD Optimization Guide: 2423 // 1: 0x90 2424 // 2: 0x66 0x90 2425 // 3: 0x66 0x66 0x90 2426 // 4: 0x66 0x66 0x66 0x90 2427 // 5: 0x66 0x66 0x90 0x66 0x90 2428 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2429 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2430 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2431 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2432 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2433 // 2434 while (i > 12) { 2435 i -= 4; 2436 emitByte(0x66); // size prefix 2437 emitByte(0x66); 2438 emitByte(0x66); 2439 emitByte(0x90); // nop 2440 } 2441 // 1 - 12 nops 2442 if (i > 8) { 2443 if (i > 9) { 2444 i -= 1; 2445 emitByte(0x66); 2446 } 2447 i -= 3; 2448 emitByte(0x66); 2449 emitByte(0x66); 2450 emitByte(0x90); 2451 } 2452 // 1 - 8 nops 2453 if (i > 4) { 2454 if (i > 6) { 2455 i -= 1; 2456 emitByte(0x66); 2457 } 2458 i -= 3; 2459 emitByte(0x66); 2460 emitByte(0x66); 2461 emitByte(0x90); 2462 } 2463 switch (i) { 2464 case 4: 2465 emitByte(0x66); 2466 emitByte(0x66); 2467 emitByte(0x66); 2468 emitByte(0x90); 2469 break; 2470 case 3: 2471 emitByte(0x66); 2472 emitByte(0x66); 2473 emitByte(0x90); 2474 break; 2475 case 2: 2476 emitByte(0x66); 2477 emitByte(0x90); 2478 break; 2479 case 1: 2480 emitByte(0x90); 2481 break; 2482 default: 2483 assert i == 0; 2484 } 2485 } 2486 2487 public final void orl(Register dst, Register src) { 2488 OR.rmOp.emit(this, DWORD, dst, src); 2489 } 2490 2491 public final void orl(Register dst, int imm32) { 2492 OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2493 } 2494 2495 // Insn: VPACKUSWB xmm1, xmm2, xmm3/m128 2496 // ----- 2497 // Insn: VPACKUSWB xmm1, xmm1, xmm2 2498 2499 public final void packuswb(Register dst, Register src) { 2500 assert inRC(XMM, dst) && inRC(XMM, src); 2501 // Code: VEX.NDS.128.66.0F.WIG 67 /r 2502 simdPrefix(dst, dst, src, PD, P_0F, false); 2503 emitByte(0x67); 2504 emitModRM(dst, src); 2505 } 2506 2507 public final void pop(Register dst) { 2508 prefix(dst); 2509 emitByte(0x58 + encode(dst)); 2510 } 2511 2512 public void popfq() { 2513 emitByte(0x9D); 2514 } 2515 2516 public final void ptest(Register dst, Register src) { 2517 assert supports(CPUFeature.SSE4_1); 2518 assert inRC(XMM, dst) && inRC(XMM, src); 2519 simdPrefix(dst, Register.None, src, PD, P_0F38, false); 2520 emitByte(0x17); 2521 emitModRM(dst, src); 2522 } 2523 2524 public final void pcmpeqb(Register dst, Register src) { 2525 assert supports(CPUFeature.SSE2); 2526 assert inRC(XMM, dst) && inRC(XMM, src); 2527 simdPrefix(dst, dst, src, PD, P_0F, false); 2528 emitByte(0x74); 2529 emitModRM(dst, src); 2530 } 2531 2532 public final void pcmpeqw(Register dst, Register src) { 2533 assert supports(CPUFeature.SSE2); 2534 assert inRC(XMM, dst) && inRC(XMM, src); 2535 simdPrefix(dst, dst, src, PD, P_0F, false); 2536 emitByte(0x75); 2537 emitModRM(dst, src); 2538 } 2539 2540 public final void pcmpeqd(Register dst, Register src) { 2541 assert supports(CPUFeature.SSE2); 2542 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2543 simdPrefix(dst, dst, src, PD, P_0F, false); 2544 emitByte(0x76); 2545 emitModRM(dst, src); 2546 } 2547 2548 public final void pcmpestri(Register dst, AMD64Address src, int imm8) { 2549 assert supports(CPUFeature.SSE4_2); 2550 assert inRC(XMM, dst); 2551 simdPrefix(dst, Register.None, src, PD, P_0F3A, false); 2552 emitByte(0x61); 2553 emitOperandHelper(dst, src, 0); 2554 emitByte(imm8); 2555 } 2556 2557 public final void pcmpestri(Register dst, Register src, int imm8) { 2558 assert supports(CPUFeature.SSE4_2); 2559 assert inRC(XMM, dst) && inRC(XMM, src); 2560 simdPrefix(dst, Register.None, src, PD, P_0F3A, false); 2561 emitByte(0x61); 2562 emitModRM(dst, src); 2563 emitByte(imm8); 2564 } 2565 2566 public final void pmovmskb(Register dst, Register src) { 2567 assert supports(CPUFeature.SSE2); 2568 assert inRC(CPU, dst) && inRC(XMM, src); 2569 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2570 emitByte(0xD7); 2571 emitModRM(dst, src); 2572 } 2573 2574 private void pmovSZx(Register dst, AMD64Address src, int op) { 2575 assert supports(CPUFeature.SSE4_1); 2576 assert inRC(XMM, dst); 2577 simdPrefix(dst, Register.None, src, PD, P_0F38, false); 2578 emitByte(op); 2579 emitOperandHelper(dst, src, 0); 2580 } 2581 2582 public final void pmovsxbw(Register dst, AMD64Address src) { 2583 pmovSZx(dst, src, 0x20); 2584 } 2585 2586 public final void pmovsxbd(Register dst, AMD64Address src) { 2587 pmovSZx(dst, src, 0x21); 2588 } 2589 2590 public final void pmovsxbq(Register dst, AMD64Address src) { 2591 pmovSZx(dst, src, 0x22); 2592 } 2593 2594 public final void pmovsxwd(Register dst, AMD64Address src) { 2595 pmovSZx(dst, src, 0x23); 2596 } 2597 2598 public final void pmovsxwq(Register dst, AMD64Address src) { 2599 pmovSZx(dst, src, 0x24); 2600 } 2601 2602 public final void pmovsxdq(Register dst, AMD64Address src) { 2603 pmovSZx(dst, src, 0x25); 2604 } 2605 2606 // Insn: VPMOVZXBW xmm1, xmm2/m64 2607 public final void pmovzxbw(Register dst, AMD64Address src) { 2608 pmovSZx(dst, src, 0x30); 2609 } 2610 2611 public final void pmovzxbd(Register dst, AMD64Address src) { 2612 pmovSZx(dst, src, 0x31); 2613 } 2614 2615 public final void pmovzxbq(Register dst, AMD64Address src) { 2616 pmovSZx(dst, src, 0x32); 2617 } 2618 2619 public final void pmovzxwd(Register dst, AMD64Address src) { 2620 pmovSZx(dst, src, 0x33); 2621 } 2622 2623 public final void pmovzxwq(Register dst, AMD64Address src) { 2624 pmovSZx(dst, src, 0x34); 2625 } 2626 2627 public final void pmovzxdq(Register dst, AMD64Address src) { 2628 pmovSZx(dst, src, 0x35); 2629 } 2630 2631 public final void pmovzxbw(Register dst, Register src) { 2632 assert supports(CPUFeature.SSE4_1); 2633 assert inRC(XMM, dst) && inRC(XMM, src); 2634 simdPrefix(dst, Register.None, src, PD, P_0F38, false); 2635 emitByte(0x30); 2636 emitModRM(dst, src); 2637 } 2638 2639 public final void push(Register src) { 2640 prefix(src); 2641 emitByte(0x50 + encode(src)); 2642 } 2643 2644 public void pushfq() { 2645 emitByte(0x9c); 2646 } 2647 2648 public final void paddd(Register dst, Register src) { 2649 assert inRC(XMM, dst) && inRC(XMM, src); 2650 simdPrefix(dst, dst, src, PD, P_0F, false); 2651 emitByte(0xFE); 2652 emitModRM(dst, src); 2653 } 2654 2655 public final void paddq(Register dst, Register src) { 2656 assert inRC(XMM, dst) && inRC(XMM, src); 2657 simdPrefix(dst, dst, src, PD, P_0F, false); 2658 emitByte(0xD4); 2659 emitModRM(dst, src); 2660 } 2661 2662 public final void pextrw(Register dst, Register src, int imm8) { 2663 assert inRC(CPU, dst) && inRC(XMM, src); 2664 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2665 emitByte(0xC5); 2666 emitModRM(dst, src); 2667 emitByte(imm8); 2668 } 2669 2670 public final void pinsrw(Register dst, Register src, int imm8) { 2671 assert inRC(XMM, dst) && inRC(CPU, src); 2672 simdPrefix(dst, dst, src, PD, P_0F, false); 2673 emitByte(0xC4); 2674 emitModRM(dst, src); 2675 emitByte(imm8); 2676 } 2677 2678 public final void por(Register dst, Register src) { 2679 assert inRC(XMM, dst) && inRC(XMM, src); 2680 simdPrefix(dst, dst, src, PD, P_0F, false); 2681 emitByte(0xEB); 2682 emitModRM(dst, src); 2683 } 2684 2685 public final void pand(Register dst, Register src) { 2686 assert inRC(XMM, dst) && inRC(XMM, src); 2687 simdPrefix(dst, dst, src, PD, P_0F, false); 2688 emitByte(0xDB); 2689 emitModRM(dst, src); 2690 } 2691 2692 public final void pxor(Register dst, Register src) { 2693 assert inRC(XMM, dst) && inRC(XMM, src); 2694 simdPrefix(dst, dst, src, PD, P_0F, false); 2695 emitByte(0xEF); 2696 emitModRM(dst, src); 2697 } 2698 2699 public final void pslld(Register dst, int imm8) { 2700 assert isUByte(imm8) : "invalid value"; 2701 assert inRC(XMM, dst); 2702 // XMM6 is for /6 encoding: 66 0F 72 /6 ib 2703 simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false); 2704 emitByte(0x72); 2705 emitModRM(6, dst); 2706 emitByte(imm8 & 0xFF); 2707 } 2708 2709 public final void psllq(Register dst, Register shift) { 2710 assert inRC(XMM, dst) && inRC(XMM, shift); 2711 simdPrefix(dst, dst, shift, PD, P_0F, false); 2712 emitByte(0xF3); 2713 emitModRM(dst, shift); 2714 } 2715 2716 public final void psllq(Register dst, int imm8) { 2717 assert isUByte(imm8) : "invalid value"; 2718 assert inRC(XMM, dst); 2719 // XMM6 is for /6 encoding: 66 0F 73 /6 ib 2720 simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false); 2721 emitByte(0x73); 2722 emitModRM(6, dst); 2723 emitByte(imm8); 2724 } 2725 2726 public final void psrad(Register dst, int imm8) { 2727 assert isUByte(imm8) : "invalid value"; 2728 assert inRC(XMM, dst); 2729 // XMM4 is for /4 encoding: 66 0F 72 /4 ib 2730 simdPrefix(AMD64.xmm4, dst, dst, PD, P_0F, false); 2731 emitByte(0x72); 2732 emitModRM(4, dst); 2733 emitByte(imm8); 2734 } 2735 2736 public final void psrld(Register dst, int imm8) { 2737 assert isUByte(imm8) : "invalid value"; 2738 assert inRC(XMM, dst); 2739 // XMM2 is for /2 encoding: 66 0F 72 /2 ib 2740 simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false); 2741 emitByte(0x72); 2742 emitModRM(2, dst); 2743 emitByte(imm8); 2744 } 2745 2746 public final void psrlq(Register dst, int imm8) { 2747 assert isUByte(imm8) : "invalid value"; 2748 assert inRC(XMM, dst); 2749 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 2750 simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false); 2751 emitByte(0x73); 2752 emitModRM(2, dst); 2753 emitByte(imm8); 2754 } 2755 2756 public final void psrldq(Register dst, int imm8) { 2757 assert isUByte(imm8) : "invalid value"; 2758 assert inRC(XMM, dst); 2759 simdPrefix(AMD64.xmm3, dst, dst, PD, P_0F, false); 2760 emitByte(0x73); 2761 emitModRM(3, dst); 2762 emitByte(imm8); 2763 } 2764 2765 public final void pshufb(Register dst, Register src) { 2766 assert supports(CPUFeature.SSSE3); 2767 assert inRC(XMM, dst) && inRC(XMM, src); 2768 simdPrefix(dst, dst, src, PD, P_0F38, false); 2769 emitByte(0x00); 2770 emitModRM(dst, src); 2771 } 2772 2773 public final void pshuflw(Register dst, Register src, int imm8) { 2774 assert supports(CPUFeature.SSE2); 2775 assert isUByte(imm8) : "invalid value"; 2776 assert inRC(XMM, dst) && inRC(XMM, src); 2777 simdPrefix(dst, Register.None, src, SD, P_0F, false); 2778 emitByte(0x70); 2779 emitModRM(dst, src); 2780 emitByte(imm8); 2781 } 2782 2783 public final void pshufd(Register dst, Register src, int imm8) { 2784 assert isUByte(imm8) : "invalid value"; 2785 assert inRC(XMM, dst) && inRC(XMM, src); 2786 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2787 emitByte(0x70); 2788 emitModRM(dst, src); 2789 emitByte(imm8); 2790 } 2791 2792 public final void psubd(Register dst, Register src) { 2793 assert inRC(XMM, dst) && inRC(XMM, src); 2794 simdPrefix(dst, dst, src, PD, P_0F, false); 2795 emitByte(0xFA); 2796 emitModRM(dst, src); 2797 } 2798 2799 public final void punpcklbw(Register dst, Register src) { 2800 assert supports(CPUFeature.SSE2); 2801 assert inRC(XMM, dst) && inRC(XMM, src); 2802 simdPrefix(dst, dst, src, PD, P_0F, false); 2803 emitByte(0x60); 2804 emitModRM(dst, src); 2805 } 2806 2807 public final void rcpps(Register dst, Register src) { 2808 assert inRC(XMM, dst) && inRC(XMM, src); 2809 simdPrefix(dst, Register.None, src, PS, P_0F, false); 2810 emitByte(0x53); 2811 emitModRM(dst, src); 2812 } 2813 2814 public final void ret(int imm16) { 2815 if (imm16 == 0) { 2816 emitByte(0xC3); 2817 } else { 2818 emitByte(0xC2); 2819 emitShort(imm16); 2820 } 2821 } 2822 2823 public final void sarl(Register dst, int imm8) { 2824 prefix(dst); 2825 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2826 if (imm8 == 1) { 2827 emitByte(0xD1); 2828 emitModRM(7, dst); 2829 } else { 2830 emitByte(0xC1); 2831 emitModRM(7, dst); 2832 emitByte(imm8); 2833 } 2834 } 2835 2836 public final void shll(Register dst, int imm8) { 2837 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2838 prefix(dst); 2839 if (imm8 == 1) { 2840 emitByte(0xD1); 2841 emitModRM(4, dst); 2842 } else { 2843 emitByte(0xC1); 2844 emitModRM(4, dst); 2845 emitByte(imm8); 2846 } 2847 } 2848 2849 public final void shll(Register dst) { 2850 // Multiply dst by 2, CL times. 2851 prefix(dst); 2852 emitByte(0xD3); 2853 emitModRM(4, dst); 2854 } 2855 2856 // Insn: SHLX r32a, r/m32, r32b 2857 2858 public final void shlxl(Register dst, Register src1, Register src2) { 2859 VexGeneralPurposeRMVOp.SHLX.emit(this, AVXSize.DWORD, dst, src1, src2); 2860 } 2861 2862 public final void shrl(Register dst, int imm8) { 2863 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2864 prefix(dst); 2865 emitByte(0xC1); 2866 emitModRM(5, dst); 2867 emitByte(imm8); 2868 } 2869 2870 public final void shrl(Register dst) { 2871 // Unsigned divide dst by 2, CL times. 2872 prefix(dst); 2873 emitByte(0xD3); 2874 emitModRM(5, dst); 2875 } 2876 2877 public final void subl(AMD64Address dst, int imm32) { 2878 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2879 } 2880 2881 public final void subl(Register dst, int imm32) { 2882 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2883 } 2884 2885 public final void subl(Register dst, Register src) { 2886 SUB.rmOp.emit(this, DWORD, dst, src); 2887 } 2888 2889 public final void subpd(Register dst, Register src) { 2890 SSEOp.SUB.emit(this, PD, dst, src); 2891 } 2892 2893 public final void subsd(Register dst, Register src) { 2894 SSEOp.SUB.emit(this, SD, dst, src); 2895 } 2896 2897 public final void subsd(Register dst, AMD64Address src) { 2898 SSEOp.SUB.emit(this, SD, dst, src); 2899 } 2900 2901 public final void testl(Register dst, int imm32) { 2902 // not using emitArith because test 2903 // doesn't support sign-extension of 2904 // 8bit operands 2905 if (dst.encoding == 0) { 2906 emitByte(0xA9); 2907 } else { 2908 prefix(dst); 2909 emitByte(0xF7); 2910 emitModRM(0, dst); 2911 } 2912 emitInt(imm32); 2913 } 2914 2915 public final void testl(Register dst, Register src) { 2916 prefix(dst, src); 2917 emitByte(0x85); 2918 emitModRM(dst, src); 2919 } 2920 2921 public final void testl(Register dst, AMD64Address src) { 2922 prefix(src, dst); 2923 emitByte(0x85); 2924 emitOperandHelper(dst, src, 0); 2925 } 2926 2927 public final void unpckhpd(Register dst, Register src) { 2928 assert inRC(XMM, dst) && inRC(XMM, src); 2929 simdPrefix(dst, dst, src, PD, P_0F, false); 2930 emitByte(0x15); 2931 emitModRM(dst, src); 2932 } 2933 2934 public final void unpcklpd(Register dst, Register src) { 2935 assert inRC(XMM, dst) && inRC(XMM, src); 2936 simdPrefix(dst, dst, src, PD, P_0F, false); 2937 emitByte(0x14); 2938 emitModRM(dst, src); 2939 } 2940 2941 public final void xorl(Register dst, Register src) { 2942 XOR.rmOp.emit(this, DWORD, dst, src); 2943 } 2944 2945 public final void xorq(Register dst, Register src) { 2946 XOR.rmOp.emit(this, QWORD, dst, src); 2947 } 2948 2949 public final void xorpd(Register dst, Register src) { 2950 SSEOp.XOR.emit(this, PD, dst, src); 2951 } 2952 2953 public final void xorps(Register dst, Register src) { 2954 SSEOp.XOR.emit(this, PS, dst, src); 2955 } 2956 2957 protected final void decl(Register dst) { 2958 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 2959 prefix(dst); 2960 emitByte(0xFF); 2961 emitModRM(1, dst); 2962 } 2963 2964 protected final void incl(Register dst) { 2965 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 2966 prefix(dst); 2967 emitByte(0xFF); 2968 emitModRM(0, dst); 2969 } 2970 2971 public final void addq(Register dst, int imm32) { 2972 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 2973 } 2974 2975 public final void addq(AMD64Address dst, int imm32) { 2976 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 2977 } 2978 2979 public final void addq(Register dst, Register src) { 2980 ADD.rmOp.emit(this, QWORD, dst, src); 2981 } 2982 2983 public final void addq(AMD64Address dst, Register src) { 2984 ADD.mrOp.emit(this, QWORD, dst, src); 2985 } 2986 2987 public final void andq(Register dst, int imm32) { 2988 AND.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 2989 } 2990 2991 public final void bsrq(Register dst, Register src) { 2992 prefixq(dst, src); 2993 emitByte(0x0F); 2994 emitByte(0xBD); 2995 emitModRM(dst, src); 2996 } 2997 2998 public final void bswapq(Register reg) { 2999 prefixq(reg); 3000 emitByte(0x0F); 3001 emitByte(0xC8 + encode(reg)); 3002 } 3003 3004 public final void cdqq() { 3005 rexw(); 3006 emitByte(0x99); 3007 } 3008 3009 public final void repStosb() { 3010 emitByte(0xf3); 3011 rexw(); 3012 emitByte(0xaa); 3013 } 3014 3015 public final void repStosq() { 3016 emitByte(0xf3); 3017 rexw(); 3018 emitByte(0xab); 3019 } 3020 3021 public final void cmovq(ConditionFlag cc, Register dst, Register src) { 3022 prefixq(dst, src); 3023 emitByte(0x0F); 3024 emitByte(0x40 | cc.getValue()); 3025 emitModRM(dst, src); 3026 } 3027 3028 public final void setb(ConditionFlag cc, Register dst) { 3029 prefix(dst, true); 3030 emitByte(0x0F); 3031 emitByte(0x90 | cc.getValue()); 3032 emitModRM(0, dst); 3033 } 3034 3035 public final void cmovq(ConditionFlag cc, Register dst, AMD64Address src) { 3036 prefixq(src, dst); 3037 emitByte(0x0F); 3038 emitByte(0x40 | cc.getValue()); 3039 emitOperandHelper(dst, src, 0); 3040 } 3041 3042 public final void cmpq(Register dst, int imm32) { 3043 CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3044 } 3045 3046 public final void cmpq(Register dst, Register src) { 3047 CMP.rmOp.emit(this, QWORD, dst, src); 3048 } 3049 3050 public final void cmpq(Register dst, AMD64Address src) { 3051 CMP.rmOp.emit(this, QWORD, dst, src); 3052 } 3053 3054 public final void cmpxchgq(Register reg, AMD64Address adr) { 3055 prefixq(adr, reg); 3056 emitByte(0x0F); 3057 emitByte(0xB1); 3058 emitOperandHelper(reg, adr, 0); 3059 } 3060 3061 public final void cvtdq2pd(Register dst, Register src) { 3062 assert inRC(XMM, dst) && inRC(XMM, src); 3063 simdPrefix(dst, Register.None, src, SS, P_0F, false); 3064 emitByte(0xE6); 3065 emitModRM(dst, src); 3066 } 3067 3068 public final void cvtsi2sdq(Register dst, Register src) { 3069 SSEOp.CVTSI2SD.emit(this, QWORD, dst, src); 3070 } 3071 3072 public final void cvttsd2siq(Register dst, Register src) { 3073 SSEOp.CVTTSD2SI.emit(this, QWORD, dst, src); 3074 } 3075 3076 public final void cvttpd2dq(Register dst, Register src) { 3077 assert inRC(XMM, dst) && inRC(XMM, src); 3078 simdPrefix(dst, Register.None, src, PD, P_0F, false); 3079 emitByte(0xE6); 3080 emitModRM(dst, src); 3081 } 3082 3083 public final void decq(Register dst) { 3084 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 3085 prefixq(dst); 3086 emitByte(0xFF); 3087 emitModRM(1, dst); 3088 } 3089 3090 public final void decq(AMD64Address dst) { 3091 DEC.emit(this, QWORD, dst); 3092 } 3093 3094 public final void imulq(Register dst, Register src) { 3095 prefixq(dst, src); 3096 emitByte(0x0F); 3097 emitByte(0xAF); 3098 emitModRM(dst, src); 3099 } 3100 3101 public final void incq(Register dst) { 3102 // Don't use it directly. Use Macroincrementq() instead. 3103 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3104 prefixq(dst); 3105 emitByte(0xFF); 3106 emitModRM(0, dst); 3107 } 3108 3109 public final void incq(AMD64Address dst) { 3110 INC.emit(this, QWORD, dst); 3111 } 3112 3113 public final void movq(Register dst, long imm64) { 3114 movq(dst, imm64, false); 3115 } 3116 3117 public final void movq(Register dst, long imm64, boolean annotateImm) { 3118 int insnPos = position(); 3119 prefixq(dst); 3120 emitByte(0xB8 + encode(dst)); 3121 int immPos = position(); 3122 emitLong(imm64); 3123 int nextInsnPos = position(); 3124 if (annotateImm && codePatchingAnnotationConsumer != null) { 3125 codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 3126 } 3127 } 3128 3129 public final void movslq(Register dst, int imm32) { 3130 prefixq(dst); 3131 emitByte(0xC7); 3132 emitModRM(0, dst); 3133 emitInt(imm32); 3134 } 3135 3136 public final void movdq(Register dst, AMD64Address src) { 3137 AMD64RMOp.MOVQ.emit(this, QWORD, dst, src); 3138 } 3139 3140 public final void movdq(AMD64Address dst, Register src) { 3141 AMD64MROp.MOVQ.emit(this, QWORD, dst, src); 3142 } 3143 3144 public final void movdq(Register dst, Register src) { 3145 if (inRC(XMM, dst) && inRC(CPU, src)) { 3146 AMD64RMOp.MOVQ.emit(this, QWORD, dst, src); 3147 } else if (inRC(XMM, src) && inRC(CPU, dst)) { 3148 AMD64MROp.MOVQ.emit(this, QWORD, dst, src); 3149 } else { 3150 throw new InternalError("should not reach here"); 3151 } 3152 } 3153 3154 public final void movdl(Register dst, Register src) { 3155 if (inRC(XMM, dst) && inRC(CPU, src)) { 3156 AMD64RMOp.MOVD.emit(this, DWORD, dst, src); 3157 } else if (inRC(XMM, src) && inRC(CPU, dst)) { 3158 AMD64MROp.MOVD.emit(this, DWORD, dst, src); 3159 } else { 3160 throw new InternalError("should not reach here"); 3161 } 3162 } 3163 3164 public final void movdl(Register dst, AMD64Address src) { 3165 AMD64RMOp.MOVD.emit(this, DWORD, dst, src); 3166 } 3167 3168 public final void movddup(Register dst, Register src) { 3169 assert supports(CPUFeature.SSE3); 3170 assert inRC(XMM, dst) && inRC(XMM, src); 3171 simdPrefix(dst, Register.None, src, SD, P_0F, false); 3172 emitByte(0x12); 3173 emitModRM(dst, src); 3174 } 3175 3176 public final void movdqu(Register dst, AMD64Address src) { 3177 assert inRC(XMM, dst); 3178 simdPrefix(dst, Register.None, src, SS, P_0F, false); 3179 emitByte(0x6F); 3180 emitOperandHelper(dst, src, 0); 3181 } 3182 3183 public final void movdqu(Register dst, Register src) { 3184 assert inRC(XMM, dst) && inRC(XMM, src); 3185 simdPrefix(dst, Register.None, src, SS, P_0F, false); 3186 emitByte(0x6F); 3187 emitModRM(dst, src); 3188 } 3189 3190 // Insn: VMOVDQU xmm2/m128, xmm1 3191 3192 public final void movdqu(AMD64Address dst, Register src) { 3193 assert inRC(XMM, src); 3194 // Code: VEX.128.F3.0F.WIG 7F /r 3195 simdPrefix(src, Register.None, dst, SS, P_0F, false); 3196 emitByte(0x7F); 3197 emitOperandHelper(src, dst, 0); 3198 } 3199 3200 public final void movslq(AMD64Address dst, int imm32) { 3201 prefixq(dst); 3202 emitByte(0xC7); 3203 emitOperandHelper(0, dst, 4); 3204 emitInt(imm32); 3205 } 3206 3207 public final void movslq(Register dst, AMD64Address src) { 3208 prefixq(src, dst); 3209 emitByte(0x63); 3210 emitOperandHelper(dst, src, 0); 3211 } 3212 3213 public final void movslq(Register dst, Register src) { 3214 prefixq(dst, src); 3215 emitByte(0x63); 3216 emitModRM(dst, src); 3217 } 3218 3219 public final void negq(Register dst) { 3220 prefixq(dst); 3221 emitByte(0xF7); 3222 emitModRM(3, dst); 3223 } 3224 3225 public final void orq(Register dst, Register src) { 3226 OR.rmOp.emit(this, QWORD, dst, src); 3227 } 3228 3229 public final void shlq(Register dst, int imm8) { 3230 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3231 prefixq(dst); 3232 if (imm8 == 1) { 3233 emitByte(0xD1); 3234 emitModRM(4, dst); 3235 } else { 3236 emitByte(0xC1); 3237 emitModRM(4, dst); 3238 emitByte(imm8); 3239 } 3240 } 3241 3242 public final void shlq(Register dst) { 3243 // Multiply dst by 2, CL times. 3244 prefixq(dst); 3245 emitByte(0xD3); 3246 emitModRM(4, dst); 3247 } 3248 3249 public final void shrq(Register dst, int imm8) { 3250 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3251 prefixq(dst); 3252 if (imm8 == 1) { 3253 emitByte(0xD1); 3254 emitModRM(5, dst); 3255 } else { 3256 emitByte(0xC1); 3257 emitModRM(5, dst); 3258 emitByte(imm8); 3259 } 3260 } 3261 3262 public final void shrq(Register dst) { 3263 prefixq(dst); 3264 emitByte(0xD3); 3265 // Unsigned divide dst by 2, CL times. 3266 emitModRM(5, dst); 3267 } 3268 3269 public final void sarq(Register dst, int imm8) { 3270 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3271 prefixq(dst); 3272 if (imm8 == 1) { 3273 emitByte(0xD1); 3274 emitModRM(7, dst); 3275 } else { 3276 emitByte(0xC1); 3277 emitModRM(7, dst); 3278 emitByte(imm8); 3279 } 3280 } 3281 3282 public final void sbbq(Register dst, Register src) { 3283 SBB.rmOp.emit(this, QWORD, dst, src); 3284 } 3285 3286 public final void subq(Register dst, int imm32) { 3287 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3288 } 3289 3290 public final void subq(AMD64Address dst, int imm32) { 3291 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3292 } 3293 3294 public final void subqWide(Register dst, int imm32) { 3295 // don't use the sign-extending version, forcing a 32-bit immediate 3296 SUB.getMIOpcode(QWORD, false).emit(this, QWORD, dst, imm32); 3297 } 3298 3299 public final void subq(Register dst, Register src) { 3300 SUB.rmOp.emit(this, QWORD, dst, src); 3301 } 3302 3303 public final void testq(Register dst, Register src) { 3304 prefixq(dst, src); 3305 emitByte(0x85); 3306 emitModRM(dst, src); 3307 } 3308 3309 public final void btrq(Register src, int imm8) { 3310 prefixq(src); 3311 emitByte(0x0F); 3312 emitByte(0xBA); 3313 emitModRM(6, src); 3314 emitByte(imm8); 3315 } 3316 3317 public final void xaddb(AMD64Address dst, Register src) { 3318 prefixb(dst, src); 3319 emitByte(0x0F); 3320 emitByte(0xC0); 3321 emitOperandHelper(src, dst, 0); 3322 } 3323 3324 public final void xaddw(AMD64Address dst, Register src) { 3325 emitByte(0x66); // Switch to 16-bit mode. 3326 prefix(dst, src); 3327 emitByte(0x0F); 3328 emitByte(0xC1); 3329 emitOperandHelper(src, dst, 0); 3330 } 3331 3332 public final void xaddl(AMD64Address dst, Register src) { 3333 prefix(dst, src); 3334 emitByte(0x0F); 3335 emitByte(0xC1); 3336 emitOperandHelper(src, dst, 0); 3337 } 3338 3339 public final void xaddq(AMD64Address dst, Register src) { 3340 prefixq(dst, src); 3341 emitByte(0x0F); 3342 emitByte(0xC1); 3343 emitOperandHelper(src, dst, 0); 3344 } 3345 3346 public final void xchgb(Register dst, AMD64Address src) { 3347 prefixb(src, dst); 3348 emitByte(0x86); 3349 emitOperandHelper(dst, src, 0); 3350 } 3351 3352 public final void xchgw(Register dst, AMD64Address src) { 3353 emitByte(0x66); 3354 prefix(src, dst); 3355 emitByte(0x87); 3356 emitOperandHelper(dst, src, 0); 3357 } 3358 3359 public final void xchgl(Register dst, AMD64Address src) { 3360 prefix(src, dst); 3361 emitByte(0x87); 3362 emitOperandHelper(dst, src, 0); 3363 } 3364 3365 public final void xchgq(Register dst, AMD64Address src) { 3366 prefixq(src, dst); 3367 emitByte(0x87); 3368 emitOperandHelper(dst, src, 0); 3369 } 3370 3371 public final void membar(int barriers) { 3372 if (target.isMP) { 3373 // We only have to handle StoreLoad 3374 if ((barriers & STORE_LOAD) != 0) { 3375 // All usable chips support "locked" instructions which suffice 3376 // as barriers, and are much faster than the alternative of 3377 // using cpuid instruction. We use here a locked add [rsp],0. 3378 // This is conveniently otherwise a no-op except for blowing 3379 // flags. 3380 // Any change to this code may need to revisit other places in 3381 // the code where this idiom is used, in particular the 3382 // orderAccess code. 3383 lock(); 3384 addl(new AMD64Address(AMD64.rsp, 0), 0); // Assert the lock# signal here 3385 } 3386 } 3387 } 3388 3389 @Override 3390 protected final void patchJumpTarget(int branch, int branchTarget) { 3391 int op = getByte(branch); 3392 assert op == 0xE8 // call 3393 || op == 0x00 // jump table entry 3394 || op == 0xE9 // jmp 3395 || op == 0xEB // short jmp 3396 || (op & 0xF0) == 0x70 // short jcc 3397 || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc 3398 : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op; 3399 3400 if (op == 0x00) { 3401 int offsetToJumpTableBase = getShort(branch + 1); 3402 int jumpTableBase = branch - offsetToJumpTableBase; 3403 int imm32 = branchTarget - jumpTableBase; 3404 emitInt(imm32, branch); 3405 } else if (op == 0xEB || (op & 0xF0) == 0x70) { 3406 3407 // short offset operators (jmp and jcc) 3408 final int imm8 = branchTarget - (branch + 2); 3409 /* 3410 * Since a wrongly patched short branch can potentially lead to working but really bad 3411 * behaving code we should always fail with an exception instead of having an assert. 3412 */ 3413 GraalError.guarantee(isByte(imm8), "Displacement too large to be encoded as a byte: %d", imm8); 3414 emitByte(imm8, branch + 1); 3415 3416 } else { 3417 3418 int off = 1; 3419 if (op == 0x0F) { 3420 off = 2; 3421 } 3422 3423 int imm32 = branchTarget - (branch + 4 + off); 3424 emitInt(imm32, branch + off); 3425 } 3426 } 3427 3428 public void nullCheck(AMD64Address address) { 3429 testl(AMD64.rax, address); 3430 } 3431 3432 @Override 3433 public void align(int modulus) { 3434 if (position() % modulus != 0) { 3435 nop(modulus - (position() % modulus)); 3436 } 3437 } 3438 3439 /** 3440 * Emits a direct call instruction. Note that the actual call target is not specified, because 3441 * all calls need patching anyway. Therefore, 0 is emitted as the call target, and the user is 3442 * responsible to add the call address to the appropriate patching tables. 3443 */ 3444 public final void call() { 3445 annotatePatchingImmediate(1, 4); 3446 emitByte(0xE8); 3447 emitInt(0); 3448 } 3449 3450 public final void call(Register src) { 3451 prefix(src); 3452 emitByte(0xFF); 3453 emitModRM(2, src); 3454 } 3455 3456 public final void int3() { 3457 emitByte(0xCC); 3458 } 3459 3460 public final void pause() { 3461 emitByte(0xF3); 3462 emitByte(0x90); 3463 } 3464 3465 private void emitx87(int b1, int b2, int i) { 3466 assert 0 <= i && i < 8 : "illegal stack offset"; 3467 emitByte(b1); 3468 emitByte(b2 + i); 3469 } 3470 3471 public final void fldd(AMD64Address src) { 3472 emitByte(0xDD); 3473 emitOperandHelper(0, src, 0); 3474 } 3475 3476 public final void flds(AMD64Address src) { 3477 emitByte(0xD9); 3478 emitOperandHelper(0, src, 0); 3479 } 3480 3481 public final void fldln2() { 3482 emitByte(0xD9); 3483 emitByte(0xED); 3484 } 3485 3486 public final void fldlg2() { 3487 emitByte(0xD9); 3488 emitByte(0xEC); 3489 } 3490 3491 public final void fyl2x() { 3492 emitByte(0xD9); 3493 emitByte(0xF1); 3494 } 3495 3496 public final void fstps(AMD64Address src) { 3497 emitByte(0xD9); 3498 emitOperandHelper(3, src, 0); 3499 } 3500 3501 public final void fstpd(AMD64Address src) { 3502 emitByte(0xDD); 3503 emitOperandHelper(3, src, 0); 3504 } 3505 3506 private void emitFPUArith(int b1, int b2, int i) { 3507 assert 0 <= i && i < 8 : "illegal FPU register: " + i; 3508 emitByte(b1); 3509 emitByte(b2 + i); 3510 } 3511 3512 public void ffree(int i) { 3513 emitFPUArith(0xDD, 0xC0, i); 3514 } 3515 3516 public void fincstp() { 3517 emitByte(0xD9); 3518 emitByte(0xF7); 3519 } 3520 3521 public void fxch(int i) { 3522 emitFPUArith(0xD9, 0xC8, i); 3523 } 3524 3525 public void fnstswAX() { 3526 emitByte(0xDF); 3527 emitByte(0xE0); 3528 } 3529 3530 public void fwait() { 3531 emitByte(0x9B); 3532 } 3533 3534 public void fprem() { 3535 emitByte(0xD9); 3536 emitByte(0xF8); 3537 } 3538 3539 public final void fsin() { 3540 emitByte(0xD9); 3541 emitByte(0xFE); 3542 } 3543 3544 public final void fcos() { 3545 emitByte(0xD9); 3546 emitByte(0xFF); 3547 } 3548 3549 public final void fptan() { 3550 emitByte(0xD9); 3551 emitByte(0xF2); 3552 } 3553 3554 public final void fstp(int i) { 3555 emitx87(0xDD, 0xD8, i); 3556 } 3557 3558 @Override 3559 public AMD64Address makeAddress(Register base, int displacement) { 3560 return new AMD64Address(base, displacement); 3561 } 3562 3563 @Override 3564 public AMD64Address getPlaceholder(int instructionStartPosition) { 3565 return new AMD64Address(AMD64.rip, Register.None, Scale.Times1, 0, instructionStartPosition); 3566 } 3567 3568 private void prefetchPrefix(AMD64Address src) { 3569 prefix(src); 3570 emitByte(0x0F); 3571 } 3572 3573 public void prefetchnta(AMD64Address src) { 3574 prefetchPrefix(src); 3575 emitByte(0x18); 3576 emitOperandHelper(0, src, 0); 3577 } 3578 3579 void prefetchr(AMD64Address src) { 3580 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 3581 prefetchPrefix(src); 3582 emitByte(0x0D); 3583 emitOperandHelper(0, src, 0); 3584 } 3585 3586 public void prefetcht0(AMD64Address src) { 3587 assert supports(CPUFeature.SSE); 3588 prefetchPrefix(src); 3589 emitByte(0x18); 3590 emitOperandHelper(1, src, 0); 3591 } 3592 3593 public void prefetcht1(AMD64Address src) { 3594 assert supports(CPUFeature.SSE); 3595 prefetchPrefix(src); 3596 emitByte(0x18); 3597 emitOperandHelper(2, src, 0); 3598 } 3599 3600 public void prefetcht2(AMD64Address src) { 3601 assert supports(CPUFeature.SSE); 3602 prefix(src); 3603 emitByte(0x0f); 3604 emitByte(0x18); 3605 emitOperandHelper(3, src, 0); 3606 } 3607 3608 public void prefetchw(AMD64Address src) { 3609 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 3610 prefix(src); 3611 emitByte(0x0f); 3612 emitByte(0x0D); 3613 emitOperandHelper(1, src, 0); 3614 } 3615 3616 public void rdtsc() { 3617 emitByte(0x0F); 3618 emitByte(0x31); 3619 } 3620 3621 /** 3622 * Emits an instruction which is considered to be illegal. This is used if we deliberately want 3623 * to crash the program (debugging etc.). 3624 */ 3625 public void illegal() { 3626 emitByte(0x0f); 3627 emitByte(0x0b); 3628 } 3629 3630 public void lfence() { 3631 emitByte(0x0f); 3632 emitByte(0xae); 3633 emitByte(0xe8); 3634 } 3635 3636 public final void vptest(Register dst, Register src) { 3637 VexRMOp.VPTEST.emit(this, AVXSize.YMM, dst, src); 3638 } 3639 3640 public final void vpxor(Register dst, Register nds, Register src) { 3641 VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src); 3642 } 3643 3644 public final void vpxor(Register dst, Register nds, AMD64Address src) { 3645 VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src); 3646 } 3647 3648 public final void vmovdqu(Register dst, AMD64Address src) { 3649 VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src); 3650 } 3651 3652 public final void vmovdqu(AMD64Address dst, Register src) { 3653 assert inRC(XMM, src); 3654 VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src); 3655 } 3656 3657 public final void vpmovzxbw(Register dst, AMD64Address src) { 3658 assert supports(CPUFeature.AVX2); 3659 VexRMOp.VPMOVZXBW.emit(this, AVXSize.YMM, dst, src); 3660 } 3661 3662 public final void vzeroupper() { 3663 emitVEX(L128, P_, M_0F, W0, 0, 0, true); 3664 emitByte(0x77); 3665 } 3666 3667 // Insn: KORTESTD k1, k2 3668 3669 // This instruction produces ZF or CF flags 3670 public final void kortestd(Register src1, Register src2) { 3671 assert supports(CPUFeature.AVX512BW); 3672 assert inRC(MASK, src1) && inRC(MASK, src2); 3673 // Code: VEX.L0.66.0F.W1 98 /r 3674 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, true); 3675 emitByte(0x98); 3676 emitModRM(src1, src2); 3677 } 3678 3679 // Insn: KORTESTQ k1, k2 3680 3681 // This instruction produces ZF or CF flags 3682 public final void kortestq(Register src1, Register src2) { 3683 assert supports(CPUFeature.AVX512BW); 3684 assert inRC(MASK, src1) && inRC(MASK, src2); 3685 // Code: VEX.L0.0F.W1 98 /r 3686 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_, M_0F, W1, true); 3687 emitByte(0x98); 3688 emitModRM(src1, src2); 3689 } 3690 3691 public final void kmovd(Register dst, Register src) { 3692 assert supports(CPUFeature.AVX512BW); 3693 assert inRC(MASK, dst) || inRC(CPU, dst); 3694 assert inRC(MASK, src) || inRC(CPU, src); 3695 assert !(inRC(CPU, dst) && inRC(CPU, src)); 3696 3697 if (inRC(MASK, dst)) { 3698 if (inRC(MASK, src)) { 3699 // kmovd(KRegister dst, KRegister src): 3700 // Insn: KMOVD k1, k2/m32 3701 // Code: VEX.L0.66.0F.W1 90 /r 3702 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_66, M_0F, W1, true); 3703 emitByte(0x90); 3704 emitModRM(dst, src); 3705 } else { 3706 // kmovd(KRegister dst, Register src) 3707 // Insn: KMOVD k1, r32 3708 // Code: VEX.L0.F2.0F.W0 92 /r 3709 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, true); 3710 emitByte(0x92); 3711 emitModRM(dst, src); 3712 } 3713 } else { 3714 if (inRC(MASK, src)) { 3715 // kmovd(Register dst, KRegister src) 3716 // Insn: KMOVD r32, k1 3717 // Code: VEX.L0.F2.0F.W0 93 /r 3718 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, true); 3719 emitByte(0x93); 3720 emitModRM(dst, src); 3721 } else { 3722 throw GraalError.shouldNotReachHere(); 3723 } 3724 } 3725 } 3726 3727 public final void kmovq(Register dst, Register src) { 3728 assert supports(CPUFeature.AVX512BW); 3729 assert inRC(MASK, dst) || inRC(CPU, dst); 3730 assert inRC(MASK, src) || inRC(CPU, src); 3731 assert !(inRC(CPU, dst) && inRC(CPU, src)); 3732 3733 if (inRC(MASK, dst)) { 3734 if (inRC(MASK, src)) { 3735 // kmovq(KRegister dst, KRegister src): 3736 // Insn: KMOVQ k1, k2/m64 3737 // Code: VEX.L0.0F.W1 90 /r 3738 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_, M_0F, W1, true); 3739 emitByte(0x90); 3740 emitModRM(dst, src); 3741 } else { 3742 // kmovq(KRegister dst, Register src) 3743 // Insn: KMOVQ k1, r64 3744 // Code: VEX.L0.F2.0F.W1 92 /r 3745 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, true); 3746 emitByte(0x92); 3747 emitModRM(dst, src); 3748 } 3749 } else { 3750 if (inRC(MASK, src)) { 3751 // kmovq(Register dst, KRegister src) 3752 // Insn: KMOVQ r64, k1 3753 // Code: VEX.L0.F2.0F.W1 93 /r 3754 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, true); 3755 emitByte(0x93); 3756 emitModRM(dst, src); 3757 } else { 3758 throw GraalError.shouldNotReachHere(); 3759 } 3760 } 3761 } 3762 3763 // Insn: KTESTD k1, k2 3764 3765 public final void ktestd(Register src1, Register src2) { 3766 assert supports(CPUFeature.AVX512BW); 3767 assert inRC(MASK, src1) && inRC(MASK, src2); 3768 // Code: VEX.L0.66.0F.W1 99 /r 3769 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, true); 3770 emitByte(0x99); 3771 emitModRM(src1, src2); 3772 } 3773 3774 public final void evmovdqu64(Register dst, AMD64Address src) { 3775 assert supports(CPUFeature.AVX512F); 3776 assert inRC(XMM, dst); 3777 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F3, M_0F, W1, Z0, B0); 3778 emitByte(0x6F); 3779 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3780 } 3781 3782 // Insn: VPMOVZXBW zmm1, m256 3783 3784 public final void evpmovzxbw(Register dst, AMD64Address src) { 3785 assert supports(CPUFeature.AVX512BW); 3786 assert inRC(XMM, dst); 3787 // Code: EVEX.512.66.0F38.WIG 30 /r 3788 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0); 3789 emitByte(0x30); 3790 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3791 } 3792 3793 public final void evpcmpeqb(Register kdst, Register nds, AMD64Address src) { 3794 assert supports(CPUFeature.AVX512BW); 3795 assert inRC(MASK, kdst) && inRC(XMM, nds); 3796 evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F, WIG, Z0, B0); 3797 emitByte(0x74); 3798 emitEVEXOperandHelper(kdst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3799 } 3800 3801 // Insn: VMOVDQU16 zmm1 {k1}{z}, zmm2/m512 3802 // ----- 3803 // Insn: VMOVDQU16 zmm1, m512 3804 3805 public final void evmovdqu16(Register dst, AMD64Address src) { 3806 assert supports(CPUFeature.AVX512BW); 3807 assert inRC(XMM, dst); 3808 // Code: EVEX.512.F2.0F.W1 6F /r 3809 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0); 3810 emitByte(0x6F); 3811 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3812 } 3813 3814 // Insn: VMOVDQU16 zmm1, k1:z, m512 3815 3816 public final void evmovdqu16(Register dst, Register mask, AMD64Address src) { 3817 assert supports(CPUFeature.AVX512BW); 3818 assert inRC(XMM, dst) && inRC(MASK, mask); 3819 // Code: EVEX.512.F2.0F.W1 6F /r 3820 evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z1, B0); 3821 emitByte(0x6F); 3822 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3823 } 3824 3825 // Insn: VMOVDQU16 zmm2/m512 {k1}{z}, zmm1 3826 // ----- 3827 // Insn: VMOVDQU16 m512, zmm1 3828 3829 public final void evmovdqu16(AMD64Address dst, Register src) { 3830 assert supports(CPUFeature.AVX512BW); 3831 assert inRC(XMM, src); 3832 // Code: EVEX.512.F2.0F.W1 7F /r 3833 evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0); 3834 emitByte(0x7F); 3835 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3836 } 3837 3838 // Insn: VMOVDQU16 m512, k1, zmm1 3839 3840 public final void evmovdqu16(AMD64Address dst, Register mask, Register src) { 3841 assert supports(CPUFeature.AVX512BW); 3842 assert inRC(MASK, mask) && inRC(XMM, src); 3843 // Code: EVEX.512.F2.0F.W1 7F /r 3844 evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0); 3845 emitByte(0x7F); 3846 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3847 } 3848 3849 // Insn: VPBROADCASTW zmm1 {k1}{z}, reg 3850 // ----- 3851 // Insn: VPBROADCASTW zmm1, reg 3852 3853 public final void evpbroadcastw(Register dst, Register src) { 3854 assert supports(CPUFeature.AVX512BW); 3855 assert inRC(XMM, dst) && inRC(CPU, src); 3856 // Code: EVEX.512.66.0F38.W0 7B /r 3857 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, W0, Z0, B0); 3858 emitByte(0x7B); 3859 emitModRM(dst, src); 3860 } 3861 3862 // Insn: VPCMPUW k1 {k2}, zmm2, zmm3/m512, imm8 3863 // ----- 3864 // Insn: VPCMPUW k1, zmm2, zmm3, imm8 3865 3866 public final void evpcmpuw(Register kdst, Register nds, Register src, int vcc) { 3867 assert supports(CPUFeature.AVX512BW); 3868 assert inRC(MASK, kdst) && inRC(XMM, nds) && inRC(XMM, src); 3869 // Code: EVEX.NDS.512.66.0F3A.W1 3E /r ib 3870 evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F3A, W1, Z0, B0); 3871 emitByte(0x3E); 3872 emitModRM(kdst, src); 3873 emitByte(vcc); 3874 } 3875 3876 // Insn: VPCMPUW k1 {k2}, zmm2, zmm3/m512, imm8 3877 // ----- 3878 // Insn: VPCMPUW k1, k2, zmm2, zmm3, imm8 3879 3880 public final void evpcmpuw(Register kdst, Register mask, Register nds, Register src, int vcc) { 3881 assert supports(CPUFeature.AVX512BW); 3882 assert inRC(MASK, kdst) && inRC(MASK, mask); 3883 assert inRC(XMM, nds) && inRC(XMM, src); 3884 // Code: EVEX.NDS.512.66.0F3A.W1 3E /r ib 3885 evexPrefix(kdst, mask, nds, src, AVXSize.ZMM, P_66, M_0F3A, W1, Z0, B0); 3886 emitByte(0x3E); 3887 emitModRM(kdst, src); 3888 emitByte(vcc); 3889 } 3890 3891 // Insn: VPMOVWB ymm1/m256 {k1}{z}, zmm2 3892 // ----- 3893 // Insn: VPMOVWB m256, zmm2 3894 3895 public final void evpmovwb(AMD64Address dst, Register src) { 3896 assert supports(CPUFeature.AVX512BW); 3897 assert inRC(XMM, src); 3898 // Code: EVEX.512.F3.0F38.W0 30 /r 3899 evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0); 3900 emitByte(0x30); 3901 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3902 } 3903 3904 // Insn: VPMOVWB m256, k1, zmm2 3905 3906 public final void evpmovwb(AMD64Address dst, Register mask, Register src) { 3907 assert supports(CPUFeature.AVX512BW); 3908 assert inRC(MASK, mask) && inRC(XMM, src); 3909 // Code: EVEX.512.F3.0F38.W0 30 /r 3910 evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0); 3911 emitByte(0x30); 3912 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3913 } 3914 3915 // Insn: VPMOVZXBW zmm1 {k1}{z}, ymm2/m256 3916 // ----- 3917 // Insn: VPMOVZXBW zmm1, k1, m256 3918 3919 public final void evpmovzxbw(Register dst, Register mask, AMD64Address src) { 3920 assert supports(CPUFeature.AVX512BW); 3921 assert inRC(MASK, mask) && inRC(XMM, dst); 3922 // Code: EVEX.512.66.0F38.WIG 30 /r 3923 evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0); 3924 emitByte(0x30); 3925 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3926 } 3927 3928 }