1 /* 2 * Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 25 package org.graalvm.compiler.asm.amd64; 26 27 import static jdk.vm.ci.amd64.AMD64.CPU; 28 import static jdk.vm.ci.amd64.AMD64.MASK; 29 import static jdk.vm.ci.amd64.AMD64.XMM; 30 import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD; 31 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop; 32 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop; 33 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD; 34 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND; 35 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP; 36 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR; 37 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB; 38 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB; 39 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR; 40 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC; 41 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC; 42 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG; 43 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT; 44 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0; 45 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0; 46 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1; 47 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.BYTE; 48 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.DWORD; 49 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PD; 50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PS; 51 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.QWORD; 52 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SD; 53 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SS; 54 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.WORD; 55 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128; 56 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256; 57 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ; 58 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F; 59 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38; 60 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A; 61 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_; 62 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66; 63 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2; 64 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3; 65 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0; 66 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1; 67 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG; 68 import static org.graalvm.compiler.core.common.NumUtil.isByte; 69 import static org.graalvm.compiler.core.common.NumUtil.isInt; 70 import static org.graalvm.compiler.core.common.NumUtil.isShiftCount; 71 import static org.graalvm.compiler.core.common.NumUtil.isUByte; 72 73 import java.util.EnumSet; 74 75 import org.graalvm.compiler.asm.Label; 76 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; 77 import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize; 78 import org.graalvm.compiler.core.common.calc.Condition; 79 import org.graalvm.compiler.debug.GraalError; 80 81 import jdk.vm.ci.amd64.AMD64; 82 import jdk.vm.ci.amd64.AMD64.CPUFeature; 83 import jdk.vm.ci.code.Register; 84 import jdk.vm.ci.code.Register.RegisterCategory; 85 import jdk.vm.ci.code.TargetDescription; 86 87 /** 88 * This class implements an assembler that can encode most X86 instructions. 89 */ 90 public class AMD64Assembler extends AMD64BaseAssembler { 91 92 /** 93 * Constructs an assembler for the AMD64 architecture. 94 */ 95 public AMD64Assembler(TargetDescription target) { 96 super(target); 97 } 98 99 /** 100 * The x86 condition codes used for conditional jumps/moves. 101 */ 102 public enum ConditionFlag { 103 Zero(0x4, "|zero|"), 104 NotZero(0x5, "|nzero|"), 105 Equal(0x4, "="), 106 NotEqual(0x5, "!="), 107 Less(0xc, "<"), 108 LessEqual(0xe, "<="), 109 Greater(0xf, ">"), 110 GreaterEqual(0xd, ">="), 111 Below(0x2, "|<|"), 112 BelowEqual(0x6, "|<=|"), 113 Above(0x7, "|>|"), 114 AboveEqual(0x3, "|>=|"), 115 Overflow(0x0, "|of|"), 116 NoOverflow(0x1, "|nof|"), 117 CarrySet(0x2, "|carry|"), 118 CarryClear(0x3, "|ncarry|"), 119 Negative(0x8, "|neg|"), 120 Positive(0x9, "|pos|"), 121 Parity(0xa, "|par|"), 122 NoParity(0xb, "|npar|"); 123 124 private final int value; 125 private final String operator; 126 127 ConditionFlag(int value, String operator) { 128 this.value = value; 129 this.operator = operator; 130 } 131 132 public ConditionFlag negate() { 133 switch (this) { 134 case Zero: 135 return NotZero; 136 case NotZero: 137 return Zero; 138 case Equal: 139 return NotEqual; 140 case NotEqual: 141 return Equal; 142 case Less: 143 return GreaterEqual; 144 case LessEqual: 145 return Greater; 146 case Greater: 147 return LessEqual; 148 case GreaterEqual: 149 return Less; 150 case Below: 151 return AboveEqual; 152 case BelowEqual: 153 return Above; 154 case Above: 155 return BelowEqual; 156 case AboveEqual: 157 return Below; 158 case Overflow: 159 return NoOverflow; 160 case NoOverflow: 161 return Overflow; 162 case CarrySet: 163 return CarryClear; 164 case CarryClear: 165 return CarrySet; 166 case Negative: 167 return Positive; 168 case Positive: 169 return Negative; 170 case Parity: 171 return NoParity; 172 case NoParity: 173 return Parity; 174 } 175 throw new IllegalArgumentException(); 176 } 177 178 public int getValue() { 179 return value; 180 } 181 182 @Override 183 public String toString() { 184 return operator; 185 } 186 } 187 188 /** 189 * Operand size and register type constraints. 190 */ 191 private enum OpAssertion { 192 ByteAssertion(CPU, CPU, BYTE), 193 ByteOrLargerAssertion(CPU, CPU, BYTE, WORD, DWORD, QWORD), 194 WordOrLargerAssertion(CPU, CPU, WORD, DWORD, QWORD), 195 DwordOrLargerAssertion(CPU, CPU, DWORD, QWORD), 196 WordOrDwordAssertion(CPU, CPU, WORD, QWORD), 197 QwordAssertion(CPU, CPU, QWORD), 198 FloatAssertion(XMM, XMM, SS, SD, PS, PD), 199 PackedFloatAssertion(XMM, XMM, PS, PD), 200 SingleAssertion(XMM, XMM, SS), 201 DoubleAssertion(XMM, XMM, SD), 202 PackedDoubleAssertion(XMM, XMM, PD), 203 IntToFloatAssertion(XMM, CPU, DWORD, QWORD), 204 FloatToIntAssertion(CPU, XMM, DWORD, QWORD); 205 206 private final RegisterCategory resultCategory; 207 private final RegisterCategory inputCategory; 208 private final OperandSize[] allowedSizes; 209 210 OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes) { 211 this.resultCategory = resultCategory; 212 this.inputCategory = inputCategory; 213 this.allowedSizes = allowedSizes; 214 } 215 216 protected boolean checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg) { 217 assert resultReg == null || resultCategory.equals(resultReg.getRegisterCategory()) : "invalid result register " + resultReg + " used in " + op; 218 assert inputReg == null || inputCategory.equals(inputReg.getRegisterCategory()) : "invalid input register " + inputReg + " used in " + op; 219 220 for (OperandSize s : allowedSizes) { 221 if (size == s) { 222 return true; 223 } 224 } 225 226 assert false : "invalid operand size " + size + " used in " + op; 227 return false; 228 } 229 230 } 231 232 protected static final int P_0F = 0x0F; 233 protected static final int P_0F38 = 0x380F; 234 protected static final int P_0F3A = 0x3A0F; 235 236 /** 237 * Base class for AMD64 opcodes. 238 */ 239 public static class AMD64Op { 240 241 private final String opcode; 242 243 protected final int prefix1; 244 protected final int prefix2; 245 protected final int op; 246 247 private final boolean dstIsByte; 248 private final boolean srcIsByte; 249 250 private final OpAssertion assertion; 251 private final CPUFeature feature; 252 253 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 254 this(opcode, prefix1, prefix2, op, assertion == OpAssertion.ByteAssertion, assertion == OpAssertion.ByteAssertion, assertion, feature); 255 } 256 257 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 258 this.opcode = opcode; 259 this.prefix1 = prefix1; 260 this.prefix2 = prefix2; 261 this.op = op; 262 263 this.dstIsByte = dstIsByte; 264 this.srcIsByte = srcIsByte; 265 266 this.assertion = assertion; 267 this.feature = feature; 268 } 269 270 protected final void emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc) { 271 if (prefix1 != 0) { 272 asm.emitByte(prefix1); 273 } 274 if (size.getSizePrefix() != 0) { 275 asm.emitByte(size.getSizePrefix()); 276 } 277 int rexPrefix = 0x40 | rxb; 278 if (size == QWORD) { 279 rexPrefix |= 0x08; 280 } 281 if (rexPrefix != 0x40 || (dstIsByte && dstEnc >= 4) || (srcIsByte && srcEnc >= 4)) { 282 asm.emitByte(rexPrefix); 283 } 284 if (prefix2 > 0xFF) { 285 asm.emitShort(prefix2); 286 } else if (prefix2 > 0) { 287 asm.emitByte(prefix2); 288 } 289 asm.emitByte(op); 290 } 291 292 protected final boolean verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg) { 293 assert feature == null || asm.supports(feature) : String.format("unsupported feature %s required for %s", feature, opcode); 294 assert assertion.checkOperands(this, size, resultReg, inputReg); 295 return true; 296 } 297 298 public OperandSize[] getAllowedSizes() { 299 return assertion.allowedSizes; 300 } 301 302 protected final boolean isSSEInstruction() { 303 if (feature == null) { 304 return false; 305 } 306 switch (feature) { 307 case SSE: 308 case SSE2: 309 case SSE3: 310 case SSSE3: 311 case SSE4A: 312 case SSE4_1: 313 case SSE4_2: 314 return true; 315 default: 316 return false; 317 } 318 } 319 320 public final OpAssertion getAssertion() { 321 return assertion; 322 } 323 324 @Override 325 public String toString() { 326 return opcode; 327 } 328 } 329 330 /** 331 * Base class for AMD64 opcodes with immediate operands. 332 */ 333 public static class AMD64ImmOp extends AMD64Op { 334 335 private final boolean immIsByte; 336 337 protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) { 338 this(opcode, immIsByte, prefix, op, assertion, null); 339 } 340 341 protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 342 super(opcode, 0, prefix, op, assertion, feature); 343 this.immIsByte = immIsByte; 344 } 345 346 protected final void emitImmediate(AMD64Assembler asm, OperandSize size, int imm) { 347 if (immIsByte) { 348 assert imm == (byte) imm; 349 asm.emitByte(imm); 350 } else { 351 size.emitImmediate(asm, imm); 352 } 353 } 354 355 protected final int immediateSize(OperandSize size) { 356 if (immIsByte) { 357 return 1; 358 } else { 359 return size.getBytes(); 360 } 361 } 362 } 363 364 /** 365 * Opcode with operand order of either RM or MR for 2 address forms. 366 */ 367 public abstract static class AMD64RROp extends AMD64Op { 368 369 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 370 super(opcode, prefix1, prefix2, op, assertion, feature); 371 } 372 373 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 374 super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature); 375 } 376 377 public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src); 378 } 379 380 /** 381 * Opcode with operand order of RM. 382 */ 383 public static class AMD64RMOp extends AMD64RROp { 384 // @formatter:off 385 public static final AMD64RMOp IMUL = new AMD64RMOp("IMUL", P_0F, 0xAF, OpAssertion.ByteOrLargerAssertion); 386 public static final AMD64RMOp BSF = new AMD64RMOp("BSF", P_0F, 0xBC); 387 public static final AMD64RMOp BSR = new AMD64RMOp("BSR", P_0F, 0xBD); 388 // POPCNT, TZCNT, and LZCNT support word operation. However, the legacy size prefix should 389 // be emitted before the mandatory prefix 0xF3. Since we are not emitting bit count for 390 // 16-bit operands, here we simply use DwordOrLargerAssertion. 391 public static final AMD64RMOp POPCNT = new AMD64RMOp("POPCNT", 0xF3, P_0F, 0xB8, OpAssertion.DwordOrLargerAssertion, CPUFeature.POPCNT); 392 public static final AMD64RMOp TZCNT = new AMD64RMOp("TZCNT", 0xF3, P_0F, 0xBC, OpAssertion.DwordOrLargerAssertion, CPUFeature.BMI1); 393 public static final AMD64RMOp LZCNT = new AMD64RMOp("LZCNT", 0xF3, P_0F, 0xBD, OpAssertion.DwordOrLargerAssertion, CPUFeature.LZCNT); 394 public static final AMD64RMOp MOVZXB = new AMD64RMOp("MOVZXB", P_0F, 0xB6, false, true, OpAssertion.WordOrLargerAssertion); 395 public static final AMD64RMOp MOVZX = new AMD64RMOp("MOVZX", P_0F, 0xB7, OpAssertion.DwordOrLargerAssertion); 396 public static final AMD64RMOp MOVSXB = new AMD64RMOp("MOVSXB", P_0F, 0xBE, false, true, OpAssertion.WordOrLargerAssertion); 397 public static final AMD64RMOp MOVSX = new AMD64RMOp("MOVSX", P_0F, 0xBF, OpAssertion.DwordOrLargerAssertion); 398 public static final AMD64RMOp MOVSXD = new AMD64RMOp("MOVSXD", 0x63, OpAssertion.QwordAssertion); 399 public static final AMD64RMOp MOVB = new AMD64RMOp("MOVB", 0x8A, OpAssertion.ByteAssertion); 400 public static final AMD64RMOp MOV = new AMD64RMOp("MOV", 0x8B); 401 public static final AMD64RMOp CMP = new AMD64RMOp("CMP", 0x3B); 402 403 // MOVD/MOVQ and MOVSS/MOVSD are the same opcode, just with different operand size prefix 404 public static final AMD64RMOp MOVD = new AMD64RMOp("MOVD", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 405 public static final AMD64RMOp MOVQ = new AMD64RMOp("MOVQ", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 406 public static final AMD64RMOp MOVSS = new AMD64RMOp("MOVSS", P_0F, 0x10, OpAssertion.FloatAssertion, CPUFeature.SSE); 407 public static final AMD64RMOp MOVSD = new AMD64RMOp("MOVSD", P_0F, 0x10, OpAssertion.FloatAssertion, CPUFeature.SSE); 408 409 // TEST is documented as MR operation, but it's symmetric, and using it as RM operation is more convenient. 410 public static final AMD64RMOp TESTB = new AMD64RMOp("TEST", 0x84, OpAssertion.ByteAssertion); 411 public static final AMD64RMOp TEST = new AMD64RMOp("TEST", 0x85); 412 // @formatter:on 413 414 protected AMD64RMOp(String opcode, int op) { 415 this(opcode, 0, op); 416 } 417 418 protected AMD64RMOp(String opcode, int op, OpAssertion assertion) { 419 this(opcode, 0, op, assertion); 420 } 421 422 protected AMD64RMOp(String opcode, int prefix, int op) { 423 this(opcode, 0, prefix, op, null); 424 } 425 426 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion) { 427 this(opcode, 0, prefix, op, assertion, null); 428 } 429 430 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 431 this(opcode, 0, prefix, op, assertion, feature); 432 } 433 434 protected AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) { 435 super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null); 436 } 437 438 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) { 439 this(opcode, prefix1, prefix2, op, OpAssertion.WordOrLargerAssertion, feature); 440 } 441 442 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 443 super(opcode, prefix1, prefix2, op, assertion, feature); 444 } 445 446 @Override 447 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) { 448 assert verify(asm, size, dst, src); 449 if (isSSEInstruction()) { 450 Register nds = Register.None; 451 switch (op) { 452 case 0x10: 453 case 0x51: 454 if ((size == SS) || (size == SD)) { 455 nds = dst; 456 } 457 break; 458 case 0x2A: 459 case 0x54: 460 case 0x55: 461 case 0x56: 462 case 0x57: 463 case 0x58: 464 case 0x59: 465 case 0x5A: 466 case 0x5C: 467 case 0x5D: 468 case 0x5E: 469 case 0x5F: 470 nds = dst; 471 break; 472 default: 473 break; 474 } 475 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, size == QWORD); 476 asm.emitByte(op); 477 asm.emitModRM(dst, src); 478 } else { 479 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding); 480 asm.emitModRM(dst, src); 481 } 482 } 483 484 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src) { 485 assert verify(asm, size, dst, null); 486 if (isSSEInstruction()) { 487 Register nds = Register.None; 488 switch (op) { 489 case 0x51: 490 if ((size == SS) || (size == SD)) { 491 nds = dst; 492 } 493 break; 494 case 0x2A: 495 case 0x54: 496 case 0x55: 497 case 0x56: 498 case 0x57: 499 case 0x58: 500 case 0x59: 501 case 0x5A: 502 case 0x5C: 503 case 0x5D: 504 case 0x5E: 505 case 0x5F: 506 nds = dst; 507 break; 508 default: 509 break; 510 } 511 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, size == QWORD); 512 asm.emitByte(op); 513 asm.emitOperandHelper(dst, src, 0); 514 } else { 515 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0); 516 asm.emitOperandHelper(dst, src, 0); 517 } 518 } 519 } 520 521 /** 522 * Opcode with operand order of MR. 523 */ 524 public static class AMD64MROp extends AMD64RROp { 525 // @formatter:off 526 public static final AMD64MROp MOVB = new AMD64MROp("MOVB", 0x88, OpAssertion.ByteAssertion); 527 public static final AMD64MROp MOV = new AMD64MROp("MOV", 0x89); 528 529 // MOVD and MOVQ are the same opcode, just with different operand size prefix 530 // Note that as MR opcodes, they have reverse operand order, so the IntToFloatingAssertion must be used. 531 public static final AMD64MROp MOVD = new AMD64MROp("MOVD", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 532 public static final AMD64MROp MOVQ = new AMD64MROp("MOVQ", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 533 534 // MOVSS and MOVSD are the same opcode, just with different operand size prefix 535 public static final AMD64MROp MOVSS = new AMD64MROp("MOVSS", P_0F, 0x11, OpAssertion.FloatAssertion, CPUFeature.SSE); 536 public static final AMD64MROp MOVSD = new AMD64MROp("MOVSD", P_0F, 0x11, OpAssertion.FloatAssertion, CPUFeature.SSE); 537 // @formatter:on 538 539 protected AMD64MROp(String opcode, int op) { 540 this(opcode, 0, op); 541 } 542 543 protected AMD64MROp(String opcode, int op, OpAssertion assertion) { 544 this(opcode, 0, op, assertion); 545 } 546 547 protected AMD64MROp(String opcode, int prefix, int op) { 548 this(opcode, prefix, op, OpAssertion.WordOrLargerAssertion); 549 } 550 551 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion) { 552 this(opcode, prefix, op, assertion, null); 553 } 554 555 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 556 this(opcode, 0, prefix, op, assertion, feature); 557 } 558 559 protected AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 560 super(opcode, prefix1, prefix2, op, assertion, feature); 561 } 562 563 @Override 564 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) { 565 assert verify(asm, size, src, dst); 566 if (isSSEInstruction()) { 567 Register nds = Register.None; 568 switch (op) { 569 case 0x11: 570 if ((size == SS) || (size == SD)) { 571 nds = src; 572 } 573 break; 574 default: 575 break; 576 } 577 asm.simdPrefix(src, nds, dst, size, prefix1, prefix2, size == QWORD); 578 asm.emitByte(op); 579 asm.emitModRM(src, dst); 580 } else { 581 emitOpcode(asm, size, getRXB(src, dst), src.encoding, dst.encoding); 582 asm.emitModRM(src, dst); 583 } 584 } 585 586 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src) { 587 assert verify(asm, size, src, null); 588 if (isSSEInstruction()) { 589 asm.simdPrefix(src, Register.None, dst, size, prefix1, prefix2, size == QWORD); 590 asm.emitByte(op); 591 } else { 592 emitOpcode(asm, size, getRXB(src, dst), src.encoding, 0); 593 } 594 asm.emitOperandHelper(src, dst, 0); 595 } 596 } 597 598 /** 599 * Opcodes with operand order of M. 600 */ 601 public static class AMD64MOp extends AMD64Op { 602 // @formatter:off 603 public static final AMD64MOp NOT = new AMD64MOp("NOT", 0xF7, 2); 604 public static final AMD64MOp NEG = new AMD64MOp("NEG", 0xF7, 3); 605 public static final AMD64MOp MUL = new AMD64MOp("MUL", 0xF7, 4); 606 public static final AMD64MOp IMUL = new AMD64MOp("IMUL", 0xF7, 5); 607 public static final AMD64MOp DIV = new AMD64MOp("DIV", 0xF7, 6); 608 public static final AMD64MOp IDIV = new AMD64MOp("IDIV", 0xF7, 7); 609 public static final AMD64MOp INC = new AMD64MOp("INC", 0xFF, 0); 610 public static final AMD64MOp DEC = new AMD64MOp("DEC", 0xFF, 1); 611 public static final AMD64MOp PUSH = new AMD64MOp("PUSH", 0xFF, 6); 612 public static final AMD64MOp POP = new AMD64MOp("POP", 0x8F, 0, OpAssertion.WordOrDwordAssertion); 613 // @formatter:on 614 615 private final int ext; 616 617 protected AMD64MOp(String opcode, int op, int ext) { 618 this(opcode, 0, op, ext); 619 } 620 621 protected AMD64MOp(String opcode, int prefix, int op, int ext) { 622 this(opcode, prefix, op, ext, OpAssertion.WordOrLargerAssertion); 623 } 624 625 protected AMD64MOp(String opcode, int op, int ext, OpAssertion assertion) { 626 this(opcode, 0, op, ext, assertion); 627 } 628 629 protected AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion) { 630 super(opcode, 0, prefix, op, assertion, null); 631 this.ext = ext; 632 } 633 634 public final void emit(AMD64Assembler asm, OperandSize size, Register dst) { 635 assert verify(asm, size, dst, null); 636 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding); 637 asm.emitModRM(ext, dst); 638 } 639 640 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst) { 641 assert verify(asm, size, null, null); 642 emitOpcode(asm, size, getRXB(null, dst), 0, 0); 643 asm.emitOperandHelper(ext, dst, 0); 644 } 645 } 646 647 /** 648 * Opcodes with operand order of MI. 649 */ 650 public static class AMD64MIOp extends AMD64ImmOp { 651 // @formatter:off 652 public static final AMD64MIOp MOVB = new AMD64MIOp("MOVB", true, 0xC6, 0, OpAssertion.ByteAssertion); 653 public static final AMD64MIOp MOV = new AMD64MIOp("MOV", false, 0xC7, 0); 654 public static final AMD64MIOp TEST = new AMD64MIOp("TEST", false, 0xF7, 0); 655 // @formatter:on 656 657 private final int ext; 658 659 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext) { 660 this(opcode, immIsByte, op, ext, OpAssertion.WordOrLargerAssertion); 661 } 662 663 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion) { 664 this(opcode, immIsByte, 0, op, ext, assertion); 665 } 666 667 protected AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion) { 668 super(opcode, immIsByte, prefix, op, assertion); 669 this.ext = ext; 670 } 671 672 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm) { 673 emit(asm, size, dst, imm, false); 674 } 675 676 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm, boolean annotateImm) { 677 assert verify(asm, size, dst, null); 678 int insnPos = asm.position(); 679 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding); 680 asm.emitModRM(ext, dst); 681 int immPos = asm.position(); 682 emitImmediate(asm, size, imm); 683 int nextInsnPos = asm.position(); 684 if (annotateImm && asm.codePatchingAnnotationConsumer != null) { 685 asm.codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 686 } 687 } 688 689 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm) { 690 emit(asm, size, dst, imm, false); 691 } 692 693 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm, boolean annotateImm) { 694 assert verify(asm, size, null, null); 695 int insnPos = asm.position(); 696 emitOpcode(asm, size, getRXB(null, dst), 0, 0); 697 asm.emitOperandHelper(ext, dst, immediateSize(size)); 698 int immPos = asm.position(); 699 emitImmediate(asm, size, imm); 700 int nextInsnPos = asm.position(); 701 if (annotateImm && asm.codePatchingAnnotationConsumer != null) { 702 asm.codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 703 } 704 } 705 } 706 707 /** 708 * Opcodes with operand order of RMI. 709 * 710 * We only have one form of round as the operation is always treated with single variant input, 711 * making its extension to 3 address forms redundant. 712 */ 713 public static class AMD64RMIOp extends AMD64ImmOp { 714 // @formatter:off 715 public static final AMD64RMIOp IMUL = new AMD64RMIOp("IMUL", false, 0x69); 716 public static final AMD64RMIOp IMUL_SX = new AMD64RMIOp("IMUL", true, 0x6B); 717 public static final AMD64RMIOp ROUNDSS = new AMD64RMIOp("ROUNDSS", true, P_0F3A, 0x0A, OpAssertion.PackedDoubleAssertion, CPUFeature.SSE4_1); 718 public static final AMD64RMIOp ROUNDSD = new AMD64RMIOp("ROUNDSD", true, P_0F3A, 0x0B, OpAssertion.PackedDoubleAssertion, CPUFeature.SSE4_1); 719 // @formatter:on 720 721 protected AMD64RMIOp(String opcode, boolean immIsByte, int op) { 722 this(opcode, immIsByte, 0, op, OpAssertion.WordOrLargerAssertion, null); 723 } 724 725 protected AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 726 super(opcode, immIsByte, prefix, op, assertion, feature); 727 } 728 729 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm) { 730 assert verify(asm, size, dst, src); 731 if (isSSEInstruction()) { 732 Register nds = Register.None; 733 switch (op) { 734 case 0x0A: 735 case 0x0B: 736 nds = dst; 737 break; 738 default: 739 break; 740 } 741 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, false); 742 asm.emitByte(op); 743 asm.emitModRM(dst, src); 744 } else { 745 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding); 746 asm.emitModRM(dst, src); 747 } 748 emitImmediate(asm, size, imm); 749 } 750 751 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm) { 752 assert verify(asm, size, dst, null); 753 if (isSSEInstruction()) { 754 Register nds = Register.None; 755 switch (op) { 756 case 0x0A: 757 case 0x0B: 758 nds = dst; 759 break; 760 default: 761 break; 762 } 763 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, false); 764 asm.emitByte(op); 765 } else { 766 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0); 767 } 768 asm.emitOperandHelper(dst, src, immediateSize(size)); 769 emitImmediate(asm, size, imm); 770 } 771 } 772 773 public static class SSEOp extends AMD64RMOp { 774 // @formatter:off 775 public static final SSEOp CVTSI2SS = new SSEOp("CVTSI2SS", 0xF3, P_0F, 0x2A, OpAssertion.IntToFloatAssertion); 776 public static final SSEOp CVTSI2SD = new SSEOp("CVTSI2SD", 0xF2, P_0F, 0x2A, OpAssertion.IntToFloatAssertion); 777 public static final SSEOp CVTTSS2SI = new SSEOp("CVTTSS2SI", 0xF3, P_0F, 0x2C, OpAssertion.FloatToIntAssertion); 778 public static final SSEOp CVTTSD2SI = new SSEOp("CVTTSD2SI", 0xF2, P_0F, 0x2C, OpAssertion.FloatToIntAssertion); 779 public static final SSEOp UCOMIS = new SSEOp("UCOMIS", P_0F, 0x2E, OpAssertion.PackedFloatAssertion); 780 public static final SSEOp SQRT = new SSEOp("SQRT", P_0F, 0x51); 781 public static final SSEOp AND = new SSEOp("AND", P_0F, 0x54, OpAssertion.PackedFloatAssertion); 782 public static final SSEOp ANDN = new SSEOp("ANDN", P_0F, 0x55, OpAssertion.PackedFloatAssertion); 783 public static final SSEOp OR = new SSEOp("OR", P_0F, 0x56, OpAssertion.PackedFloatAssertion); 784 public static final SSEOp XOR = new SSEOp("XOR", P_0F, 0x57, OpAssertion.PackedFloatAssertion); 785 public static final SSEOp ADD = new SSEOp("ADD", P_0F, 0x58); 786 public static final SSEOp MUL = new SSEOp("MUL", P_0F, 0x59); 787 public static final SSEOp CVTSS2SD = new SSEOp("CVTSS2SD", P_0F, 0x5A, OpAssertion.SingleAssertion); 788 public static final SSEOp CVTSD2SS = new SSEOp("CVTSD2SS", P_0F, 0x5A, OpAssertion.DoubleAssertion); 789 public static final SSEOp SUB = new SSEOp("SUB", P_0F, 0x5C); 790 public static final SSEOp MIN = new SSEOp("MIN", P_0F, 0x5D); 791 public static final SSEOp DIV = new SSEOp("DIV", P_0F, 0x5E); 792 public static final SSEOp MAX = new SSEOp("MAX", P_0F, 0x5F); 793 // @formatter:on 794 795 protected SSEOp(String opcode, int prefix, int op) { 796 this(opcode, prefix, op, OpAssertion.FloatAssertion); 797 } 798 799 protected SSEOp(String opcode, int prefix, int op, OpAssertion assertion) { 800 this(opcode, 0, prefix, op, assertion); 801 } 802 803 protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) { 804 super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.SSE2); 805 } 806 } 807 808 /** 809 * Arithmetic operation with operand order of RM, MR or MI. 810 */ 811 public static final class AMD64BinaryArithmetic { 812 // @formatter:off 813 public static final AMD64BinaryArithmetic ADD = new AMD64BinaryArithmetic("ADD", 0); 814 public static final AMD64BinaryArithmetic OR = new AMD64BinaryArithmetic("OR", 1); 815 public static final AMD64BinaryArithmetic ADC = new AMD64BinaryArithmetic("ADC", 2); 816 public static final AMD64BinaryArithmetic SBB = new AMD64BinaryArithmetic("SBB", 3); 817 public static final AMD64BinaryArithmetic AND = new AMD64BinaryArithmetic("AND", 4); 818 public static final AMD64BinaryArithmetic SUB = new AMD64BinaryArithmetic("SUB", 5); 819 public static final AMD64BinaryArithmetic XOR = new AMD64BinaryArithmetic("XOR", 6); 820 public static final AMD64BinaryArithmetic CMP = new AMD64BinaryArithmetic("CMP", 7); 821 // @formatter:on 822 823 private final AMD64MIOp byteImmOp; 824 private final AMD64MROp byteMrOp; 825 private final AMD64RMOp byteRmOp; 826 827 private final AMD64MIOp immOp; 828 private final AMD64MIOp immSxOp; 829 private final AMD64MROp mrOp; 830 private final AMD64RMOp rmOp; 831 832 private AMD64BinaryArithmetic(String opcode, int code) { 833 int baseOp = code << 3; 834 835 byteImmOp = new AMD64MIOp(opcode, true, 0, 0x80, code, OpAssertion.ByteAssertion); 836 byteMrOp = new AMD64MROp(opcode, 0, baseOp, OpAssertion.ByteAssertion); 837 byteRmOp = new AMD64RMOp(opcode, 0, baseOp | 0x02, OpAssertion.ByteAssertion); 838 839 immOp = new AMD64MIOp(opcode, false, 0, 0x81, code, OpAssertion.WordOrLargerAssertion); 840 immSxOp = new AMD64MIOp(opcode, true, 0, 0x83, code, OpAssertion.WordOrLargerAssertion); 841 mrOp = new AMD64MROp(opcode, 0, baseOp | 0x01, OpAssertion.WordOrLargerAssertion); 842 rmOp = new AMD64RMOp(opcode, 0, baseOp | 0x03, OpAssertion.WordOrLargerAssertion); 843 } 844 845 public AMD64MIOp getMIOpcode(OperandSize size, boolean sx) { 846 if (size == BYTE) { 847 return byteImmOp; 848 } else if (sx) { 849 return immSxOp; 850 } else { 851 return immOp; 852 } 853 } 854 855 public AMD64MROp getMROpcode(OperandSize size) { 856 if (size == BYTE) { 857 return byteMrOp; 858 } else { 859 return mrOp; 860 } 861 } 862 863 public AMD64RMOp getRMOpcode(OperandSize size) { 864 if (size == BYTE) { 865 return byteRmOp; 866 } else { 867 return rmOp; 868 } 869 } 870 } 871 872 /** 873 * Shift operation with operand order of M1, MC or MI. 874 */ 875 public static final class AMD64Shift { 876 // @formatter:off 877 public static final AMD64Shift ROL = new AMD64Shift("ROL", 0); 878 public static final AMD64Shift ROR = new AMD64Shift("ROR", 1); 879 public static final AMD64Shift RCL = new AMD64Shift("RCL", 2); 880 public static final AMD64Shift RCR = new AMD64Shift("RCR", 3); 881 public static final AMD64Shift SHL = new AMD64Shift("SHL", 4); 882 public static final AMD64Shift SHR = new AMD64Shift("SHR", 5); 883 public static final AMD64Shift SAR = new AMD64Shift("SAR", 7); 884 // @formatter:on 885 886 public final AMD64MOp m1Op; 887 public final AMD64MOp mcOp; 888 public final AMD64MIOp miOp; 889 890 private AMD64Shift(String opcode, int code) { 891 m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.WordOrLargerAssertion); 892 mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.WordOrLargerAssertion); 893 miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.WordOrLargerAssertion); 894 } 895 } 896 897 private enum VEXOpAssertion { 898 AVX1(CPUFeature.AVX, CPUFeature.AVX), 899 AVX1_2(CPUFeature.AVX, CPUFeature.AVX2), 900 AVX2(CPUFeature.AVX2, CPUFeature.AVX2), 901 AVX1_128ONLY(CPUFeature.AVX, null), 902 AVX1_256ONLY(null, CPUFeature.AVX), 903 AVX2_256ONLY(null, CPUFeature.AVX2), 904 XMM_CPU(CPUFeature.AVX, null, XMM, null, CPU, null), 905 XMM_XMM_CPU(CPUFeature.AVX, null, XMM, XMM, CPU, null), 906 CPU_XMM(CPUFeature.AVX, null, CPU, null, XMM, null), 907 AVX1_2_CPU_XMM(CPUFeature.AVX, CPUFeature.AVX2, CPU, null, XMM, null), 908 BMI1(CPUFeature.BMI1, null, CPU, CPU, CPU, null), 909 BMI2(CPUFeature.BMI2, null, CPU, CPU, CPU, null); 910 911 private final CPUFeature l128feature; 912 private final CPUFeature l256feature; 913 914 private final RegisterCategory rCategory; 915 private final RegisterCategory vCategory; 916 private final RegisterCategory mCategory; 917 private final RegisterCategory imm8Category; 918 919 VEXOpAssertion(CPUFeature l128feature, CPUFeature l256feature) { 920 this(l128feature, l256feature, XMM, XMM, XMM, XMM); 921 } 922 923 VEXOpAssertion(CPUFeature l128feature, CPUFeature l256feature, RegisterCategory rCategory, RegisterCategory vCategory, RegisterCategory mCategory, RegisterCategory imm8Category) { 924 this.l128feature = l128feature; 925 this.l256feature = l256feature; 926 this.rCategory = rCategory; 927 this.vCategory = vCategory; 928 this.mCategory = mCategory; 929 this.imm8Category = imm8Category; 930 } 931 932 public boolean check(AMD64 arch, AVXSize size, Register r, Register v, Register m) { 933 return check(arch, getLFlag(size), r, v, m, null); 934 } 935 936 public boolean check(AMD64 arch, AVXSize size, Register r, Register v, Register m, Register imm8) { 937 return check(arch, getLFlag(size), r, v, m, imm8); 938 } 939 940 public boolean check(AMD64 arch, int l, Register r, Register v, Register m, Register imm8) { 941 switch (l) { 942 case L128: 943 assert l128feature != null && arch.getFeatures().contains(l128feature) : "emitting illegal 128 bit instruction"; 944 break; 945 case L256: 946 assert l256feature != null && arch.getFeatures().contains(l256feature) : "emitting illegal 256 bit instruction"; 947 break; 948 } 949 if (r != null) { 950 assert r.getRegisterCategory().equals(rCategory); 951 } 952 if (v != null) { 953 assert v.getRegisterCategory().equals(vCategory); 954 } 955 if (m != null) { 956 assert m.getRegisterCategory().equals(mCategory); 957 } 958 if (imm8 != null) { 959 assert imm8.getRegisterCategory().equals(imm8Category); 960 } 961 return true; 962 } 963 964 public boolean supports(EnumSet<CPUFeature> features, AVXSize avxSize) { 965 switch (avxSize) { 966 case XMM: 967 return l128feature != null && features.contains(l128feature); 968 case YMM: 969 return l256feature != null && features.contains(l256feature); 970 default: 971 throw GraalError.shouldNotReachHere(); 972 } 973 } 974 } 975 976 /** 977 * Base class for VEX-encoded instructions. 978 */ 979 public static class VexOp { 980 protected final int pp; 981 protected final int mmmmm; 982 protected final int w; 983 protected final int op; 984 985 private final String opcode; 986 protected final VEXOpAssertion assertion; 987 988 protected VexOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 989 this.pp = pp; 990 this.mmmmm = mmmmm; 991 this.w = w; 992 this.op = op; 993 this.opcode = opcode; 994 this.assertion = assertion; 995 } 996 997 public final boolean isSupported(AMD64Assembler vasm, AVXSize size) { 998 return assertion.supports(((AMD64) vasm.target.arch).getFeatures(), size); 999 } 1000 1001 @Override 1002 public String toString() { 1003 return opcode; 1004 } 1005 } 1006 1007 /** 1008 * VEX-encoded instructions with an operand order of RM, but the M operand must be a register. 1009 */ 1010 public static class VexRROp extends VexOp { 1011 // @formatter:off 1012 public static final VexRROp VMASKMOVDQU = new VexRROp("VMASKMOVDQU", P_66, M_0F, WIG, 0xF7, VEXOpAssertion.AVX1_128ONLY); 1013 // @formatter:on 1014 1015 protected VexRROp(String opcode, int pp, int mmmmm, int w, int op) { 1016 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1017 } 1018 1019 protected VexRROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1020 super(opcode, pp, mmmmm, w, op, assertion); 1021 } 1022 1023 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) { 1024 assert assertion.check((AMD64) asm.target.arch, size, dst, null, src); 1025 assert op != 0x1A || op != 0x5A; 1026 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); 1027 asm.emitByte(op); 1028 asm.emitModRM(dst, src); 1029 } 1030 } 1031 1032 /** 1033 * VEX-encoded instructions with an operand order of RM. 1034 */ 1035 public static class VexRMOp extends VexRROp { 1036 // @formatter:off 1037 public static final VexRMOp VCVTTSS2SI = new VexRMOp("VCVTTSS2SI", P_F3, M_0F, W0, 0x2C, VEXOpAssertion.CPU_XMM); 1038 public static final VexRMOp VCVTTSS2SQ = new VexRMOp("VCVTTSS2SQ", P_F3, M_0F, W1, 0x2C, VEXOpAssertion.CPU_XMM); 1039 public static final VexRMOp VCVTTSD2SI = new VexRMOp("VCVTTSD2SI", P_F2, M_0F, W0, 0x2C, VEXOpAssertion.CPU_XMM); 1040 public static final VexRMOp VCVTTSD2SQ = new VexRMOp("VCVTTSD2SQ", P_F2, M_0F, W1, 0x2C, VEXOpAssertion.CPU_XMM); 1041 public static final VexRMOp VCVTPS2PD = new VexRMOp("VCVTPS2PD", P_, M_0F, WIG, 0x5A); 1042 public static final VexRMOp VCVTPD2PS = new VexRMOp("VCVTPD2PS", P_66, M_0F, WIG, 0x5A); 1043 public static final VexRMOp VCVTDQ2PS = new VexRMOp("VCVTDQ2PS", P_, M_0F, WIG, 0x5B); 1044 public static final VexRMOp VCVTTPS2DQ = new VexRMOp("VCVTTPS2DQ", P_F3, M_0F, WIG, 0x5B); 1045 public static final VexRMOp VCVTTPD2DQ = new VexRMOp("VCVTTPD2DQ", P_66, M_0F, WIG, 0xE6); 1046 public static final VexRMOp VCVTDQ2PD = new VexRMOp("VCVTDQ2PD", P_F3, M_0F, WIG, 0xE6); 1047 public static final VexRMOp VBROADCASTSS = new VexRMOp("VBROADCASTSS", P_66, M_0F38, W0, 0x18); 1048 public static final VexRMOp VBROADCASTSD = new VexRMOp("VBROADCASTSD", P_66, M_0F38, W0, 0x19, VEXOpAssertion.AVX1_256ONLY); 1049 public static final VexRMOp VBROADCASTF128 = new VexRMOp("VBROADCASTF128", P_66, M_0F38, W0, 0x1A, VEXOpAssertion.AVX1_256ONLY); 1050 public static final VexRMOp VPBROADCASTI128 = new VexRMOp("VPBROADCASTI128", P_66, M_0F38, W0, 0x5A, VEXOpAssertion.AVX2_256ONLY); 1051 public static final VexRMOp VPBROADCASTB = new VexRMOp("VPBROADCASTB", P_66, M_0F38, W0, 0x78, VEXOpAssertion.AVX2); 1052 public static final VexRMOp VPBROADCASTW = new VexRMOp("VPBROADCASTW", P_66, M_0F38, W0, 0x79, VEXOpAssertion.AVX2); 1053 public static final VexRMOp VPBROADCASTD = new VexRMOp("VPBROADCASTD", P_66, M_0F38, W0, 0x58, VEXOpAssertion.AVX2); 1054 public static final VexRMOp VPBROADCASTQ = new VexRMOp("VPBROADCASTQ", P_66, M_0F38, W0, 0x59, VEXOpAssertion.AVX2); 1055 public static final VexRMOp VPMOVMSKB = new VexRMOp("VPMOVMSKB", P_66, M_0F, WIG, 0xD7, VEXOpAssertion.AVX1_2_CPU_XMM); 1056 public static final VexRMOp VPMOVSXBW = new VexRMOp("VPMOVSXBW", P_66, M_0F38, WIG, 0x20); 1057 public static final VexRMOp VPMOVSXBD = new VexRMOp("VPMOVSXBD", P_66, M_0F38, WIG, 0x21); 1058 public static final VexRMOp VPMOVSXBQ = new VexRMOp("VPMOVSXBQ", P_66, M_0F38, WIG, 0x22); 1059 public static final VexRMOp VPMOVSXWD = new VexRMOp("VPMOVSXWD", P_66, M_0F38, WIG, 0x23); 1060 public static final VexRMOp VPMOVSXWQ = new VexRMOp("VPMOVSXWQ", P_66, M_0F38, WIG, 0x24); 1061 public static final VexRMOp VPMOVSXDQ = new VexRMOp("VPMOVSXDQ", P_66, M_0F38, WIG, 0x25); 1062 public static final VexRMOp VPMOVZXBW = new VexRMOp("VPMOVZXBW", P_66, M_0F38, WIG, 0x30); 1063 public static final VexRMOp VPMOVZXBD = new VexRMOp("VPMOVZXBD", P_66, M_0F38, WIG, 0x31); 1064 public static final VexRMOp VPMOVZXBQ = new VexRMOp("VPMOVZXBQ", P_66, M_0F38, WIG, 0x32); 1065 public static final VexRMOp VPMOVZXWD = new VexRMOp("VPMOVZXWD", P_66, M_0F38, WIG, 0x33); 1066 public static final VexRMOp VPMOVZXWQ = new VexRMOp("VPMOVZXWQ", P_66, M_0F38, WIG, 0x34); 1067 public static final VexRMOp VPMOVZXDQ = new VexRMOp("VPMOVZXDQ", P_66, M_0F38, WIG, 0x35); 1068 public static final VexRMOp VPTEST = new VexRMOp("VPTEST", P_66, M_0F38, WIG, 0x17); 1069 public static final VexRMOp VSQRTPD = new VexRMOp("VSQRTPD", P_66, M_0F, WIG, 0x51); 1070 public static final VexRMOp VSQRTPS = new VexRMOp("VSQRTPS", P_, M_0F, WIG, 0x51); 1071 public static final VexRMOp VSQRTSD = new VexRMOp("VSQRTSD", P_F2, M_0F, WIG, 0x51); 1072 public static final VexRMOp VSQRTSS = new VexRMOp("VSQRTSS", P_F3, M_0F, WIG, 0x51); 1073 public static final VexRMOp VUCOMISS = new VexRMOp("VUCOMISS", P_, M_0F, WIG, 0x2E); 1074 public static final VexRMOp VUCOMISD = new VexRMOp("VUCOMISD", P_66, M_0F, WIG, 0x2E); 1075 // @formatter:on 1076 1077 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op) { 1078 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1079 } 1080 1081 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1082 super(opcode, pp, mmmmm, w, op, assertion); 1083 } 1084 1085 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) { 1086 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1087 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); 1088 asm.emitByte(op); 1089 asm.emitOperandHelper(dst, src, 0); 1090 } 1091 } 1092 1093 /** 1094 * VEX-encoded move instructions. 1095 * <p> 1096 * These instructions have two opcodes: op is the forward move instruction with an operand order 1097 * of RM, and opReverse is the reverse move instruction with an operand order of MR. 1098 */ 1099 public static final class VexMoveOp extends VexRMOp { 1100 // @formatter:off 1101 public static final VexMoveOp VMOVDQA = new VexMoveOp("VMOVDQA", P_66, M_0F, WIG, 0x6F, 0x7F); 1102 public static final VexMoveOp VMOVDQU = new VexMoveOp("VMOVDQU", P_F3, M_0F, WIG, 0x6F, 0x7F); 1103 public static final VexMoveOp VMOVAPS = new VexMoveOp("VMOVAPS", P_, M_0F, WIG, 0x28, 0x29); 1104 public static final VexMoveOp VMOVAPD = new VexMoveOp("VMOVAPD", P_66, M_0F, WIG, 0x28, 0x29); 1105 public static final VexMoveOp VMOVUPS = new VexMoveOp("VMOVUPS", P_, M_0F, WIG, 0x10, 0x11); 1106 public static final VexMoveOp VMOVUPD = new VexMoveOp("VMOVUPD", P_66, M_0F, WIG, 0x10, 0x11); 1107 public static final VexMoveOp VMOVSS = new VexMoveOp("VMOVSS", P_F3, M_0F, WIG, 0x10, 0x11); 1108 public static final VexMoveOp VMOVSD = new VexMoveOp("VMOVSD", P_F2, M_0F, WIG, 0x10, 0x11); 1109 public static final VexMoveOp VMOVD = new VexMoveOp("VMOVD", P_66, M_0F, W0, 0x6E, 0x7E, VEXOpAssertion.XMM_CPU); 1110 public static final VexMoveOp VMOVQ = new VexMoveOp("VMOVQ", P_66, M_0F, W1, 0x6E, 0x7E, VEXOpAssertion.XMM_CPU); 1111 // @formatter:on 1112 1113 private final int opReverse; 1114 1115 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) { 1116 this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1); 1117 } 1118 1119 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) { 1120 super(opcode, pp, mmmmm, w, op, assertion); 1121 this.opReverse = opReverse; 1122 } 1123 1124 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src) { 1125 assert assertion.check((AMD64) asm.target.arch, size, src, null, null); 1126 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); 1127 asm.emitByte(opReverse); 1128 asm.emitOperandHelper(src, dst, 0); 1129 } 1130 1131 public void emitReverse(AMD64Assembler asm, AVXSize size, Register dst, Register src) { 1132 assert assertion.check((AMD64) asm.target.arch, size, src, null, dst); 1133 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); 1134 asm.emitByte(opReverse); 1135 asm.emitModRM(src, dst); 1136 } 1137 } 1138 1139 public interface VexRRIOp { 1140 void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8); 1141 } 1142 1143 /** 1144 * VEX-encoded instructions with an operand order of RMI. 1145 */ 1146 public static final class VexRMIOp extends VexOp implements VexRRIOp { 1147 // @formatter:off 1148 public static final VexRMIOp VPERMQ = new VexRMIOp("VPERMQ", P_66, M_0F3A, W1, 0x00, VEXOpAssertion.AVX2_256ONLY); 1149 public static final VexRMIOp VPSHUFLW = new VexRMIOp("VPSHUFLW", P_F2, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2); 1150 public static final VexRMIOp VPSHUFHW = new VexRMIOp("VPSHUFHW", P_F3, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2); 1151 public static final VexRMIOp VPSHUFD = new VexRMIOp("VPSHUFD", P_66, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2); 1152 // @formatter:on 1153 1154 private VexRMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1155 super(opcode, pp, mmmmm, w, op, assertion); 1156 } 1157 1158 @Override 1159 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { 1160 assert assertion.check((AMD64) asm.target.arch, size, dst, null, src); 1161 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); 1162 asm.emitByte(op); 1163 asm.emitModRM(dst, src); 1164 asm.emitByte(imm8); 1165 } 1166 1167 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src, int imm8) { 1168 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1169 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); 1170 asm.emitByte(op); 1171 asm.emitOperandHelper(dst, src, 1); 1172 asm.emitByte(imm8); 1173 } 1174 } 1175 1176 /** 1177 * VEX-encoded instructions with an operand order of MRI. 1178 */ 1179 public static final class VexMRIOp extends VexOp implements VexRRIOp { 1180 // @formatter:off 1181 public static final VexMRIOp VEXTRACTF128 = new VexMRIOp("VEXTRACTF128", P_66, M_0F3A, W0, 0x19, VEXOpAssertion.AVX1_256ONLY); 1182 public static final VexMRIOp VEXTRACTI128 = new VexMRIOp("VEXTRACTI128", P_66, M_0F3A, W0, 0x39, VEXOpAssertion.AVX2_256ONLY); 1183 public static final VexMRIOp VPEXTRB = new VexMRIOp("VPEXTRB", P_66, M_0F3A, W0, 0x14, VEXOpAssertion.XMM_CPU); 1184 public static final VexMRIOp VPEXTRW = new VexMRIOp("VPEXTRW", P_66, M_0F3A, W0, 0x15, VEXOpAssertion.XMM_CPU); 1185 public static final VexMRIOp VPEXTRD = new VexMRIOp("VPEXTRD", P_66, M_0F3A, W0, 0x16, VEXOpAssertion.XMM_CPU); 1186 public static final VexMRIOp VPEXTRQ = new VexMRIOp("VPEXTRQ", P_66, M_0F3A, W1, 0x16, VEXOpAssertion.XMM_CPU); 1187 // @formatter:on 1188 1189 private VexMRIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1190 super(opcode, pp, mmmmm, w, op, assertion); 1191 } 1192 1193 @Override 1194 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { 1195 assert assertion.check((AMD64) asm.target.arch, size, src, null, dst); 1196 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); 1197 asm.emitByte(op); 1198 asm.emitModRM(src, dst); 1199 asm.emitByte(imm8); 1200 } 1201 1202 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src, int imm8) { 1203 assert assertion.check((AMD64) asm.target.arch, size, src, null, null); 1204 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); 1205 asm.emitByte(op); 1206 asm.emitOperandHelper(src, dst, 1); 1207 asm.emitByte(imm8); 1208 } 1209 } 1210 1211 /** 1212 * VEX-encoded instructions with an operand order of RVMR. 1213 */ 1214 public static class VexRVMROp extends VexOp { 1215 // @formatter:off 1216 public static final VexRVMROp VPBLENDVB = new VexRVMROp("VPBLENDVB", P_66, M_0F3A, W0, 0x4C, VEXOpAssertion.AVX1_2); 1217 public static final VexRVMROp VPBLENDVPS = new VexRVMROp("VPBLENDVPS", P_66, M_0F3A, W0, 0x4A, VEXOpAssertion.AVX1); 1218 public static final VexRVMROp VPBLENDVPD = new VexRVMROp("VPBLENDVPD", P_66, M_0F3A, W0, 0x4B, VEXOpAssertion.AVX1); 1219 // @formatter:on 1220 1221 protected VexRVMROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1222 super(opcode, pp, mmmmm, w, op, assertion); 1223 } 1224 1225 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, Register src2) { 1226 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, src2); 1227 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1228 asm.emitByte(op); 1229 asm.emitModRM(dst, src2); 1230 asm.emitByte(mask.encoding() << 4); 1231 } 1232 1233 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, AMD64Address src2) { 1234 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, null); 1235 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1236 asm.emitByte(op); 1237 asm.emitOperandHelper(dst, src2, 0); 1238 asm.emitByte(mask.encoding() << 4); 1239 } 1240 } 1241 1242 /** 1243 * VEX-encoded instructions with an operand order of RVM. 1244 */ 1245 public static class VexRVMOp extends VexOp { 1246 // @formatter:off 1247 public static final VexRVMOp VANDPS = new VexRVMOp("VANDPS", P_, M_0F, WIG, 0x54); 1248 public static final VexRVMOp VANDPD = new VexRVMOp("VANDPD", P_66, M_0F, WIG, 0x54); 1249 public static final VexRVMOp VANDNPS = new VexRVMOp("VANDNPS", P_, M_0F, WIG, 0x55); 1250 public static final VexRVMOp VANDNPD = new VexRVMOp("VANDNPD", P_66, M_0F, WIG, 0x55); 1251 public static final VexRVMOp VORPS = new VexRVMOp("VORPS", P_, M_0F, WIG, 0x56); 1252 public static final VexRVMOp VORPD = new VexRVMOp("VORPD", P_66, M_0F, WIG, 0x56); 1253 public static final VexRVMOp VXORPS = new VexRVMOp("VXORPS", P_, M_0F, WIG, 0x57); 1254 public static final VexRVMOp VXORPD = new VexRVMOp("VXORPD", P_66, M_0F, WIG, 0x57); 1255 public static final VexRVMOp VADDPS = new VexRVMOp("VADDPS", P_, M_0F, WIG, 0x58); 1256 public static final VexRVMOp VADDPD = new VexRVMOp("VADDPD", P_66, M_0F, WIG, 0x58); 1257 public static final VexRVMOp VADDSS = new VexRVMOp("VADDSS", P_F3, M_0F, WIG, 0x58); 1258 public static final VexRVMOp VADDSD = new VexRVMOp("VADDSD", P_F2, M_0F, WIG, 0x58); 1259 public static final VexRVMOp VMULPS = new VexRVMOp("VMULPS", P_, M_0F, WIG, 0x59); 1260 public static final VexRVMOp VMULPD = new VexRVMOp("VMULPD", P_66, M_0F, WIG, 0x59); 1261 public static final VexRVMOp VMULSS = new VexRVMOp("VMULSS", P_F3, M_0F, WIG, 0x59); 1262 public static final VexRVMOp VMULSD = new VexRVMOp("VMULSD", P_F2, M_0F, WIG, 0x59); 1263 public static final VexRVMOp VSUBPS = new VexRVMOp("VSUBPS", P_, M_0F, WIG, 0x5C); 1264 public static final VexRVMOp VSUBPD = new VexRVMOp("VSUBPD", P_66, M_0F, WIG, 0x5C); 1265 public static final VexRVMOp VSUBSS = new VexRVMOp("VSUBSS", P_F3, M_0F, WIG, 0x5C); 1266 public static final VexRVMOp VSUBSD = new VexRVMOp("VSUBSD", P_F2, M_0F, WIG, 0x5C); 1267 public static final VexRVMOp VMINPS = new VexRVMOp("VMINPS", P_, M_0F, WIG, 0x5D); 1268 public static final VexRVMOp VMINPD = new VexRVMOp("VMINPD", P_66, M_0F, WIG, 0x5D); 1269 public static final VexRVMOp VMINSS = new VexRVMOp("VMINSS", P_F3, M_0F, WIG, 0x5D); 1270 public static final VexRVMOp VMINSD = new VexRVMOp("VMINSD", P_F2, M_0F, WIG, 0x5D); 1271 public static final VexRVMOp VDIVPS = new VexRVMOp("VDIVPS", P_, M_0F, WIG, 0x5E); 1272 public static final VexRVMOp VDIVPD = new VexRVMOp("VDIVPD", P_66, M_0F, WIG, 0x5E); 1273 public static final VexRVMOp VDIVSS = new VexRVMOp("VDIVPS", P_F3, M_0F, WIG, 0x5E); 1274 public static final VexRVMOp VDIVSD = new VexRVMOp("VDIVPD", P_F2, M_0F, WIG, 0x5E); 1275 public static final VexRVMOp VMAXPS = new VexRVMOp("VMAXPS", P_, M_0F, WIG, 0x5F); 1276 public static final VexRVMOp VMAXPD = new VexRVMOp("VMAXPD", P_66, M_0F, WIG, 0x5F); 1277 public static final VexRVMOp VMAXSS = new VexRVMOp("VMAXSS", P_F3, M_0F, WIG, 0x5F); 1278 public static final VexRVMOp VMAXSD = new VexRVMOp("VMAXSD", P_F2, M_0F, WIG, 0x5F); 1279 public static final VexRVMOp VADDSUBPS = new VexRVMOp("VADDSUBPS", P_F2, M_0F, WIG, 0xD0); 1280 public static final VexRVMOp VADDSUBPD = new VexRVMOp("VADDSUBPD", P_66, M_0F, WIG, 0xD0); 1281 public static final VexRVMOp VPAND = new VexRVMOp("VPAND", P_66, M_0F, WIG, 0xDB, VEXOpAssertion.AVX1_2); 1282 public static final VexRVMOp VPOR = new VexRVMOp("VPOR", P_66, M_0F, WIG, 0xEB, VEXOpAssertion.AVX1_2); 1283 public static final VexRVMOp VPXOR = new VexRVMOp("VPXOR", P_66, M_0F, WIG, 0xEF, VEXOpAssertion.AVX1_2); 1284 public static final VexRVMOp VPADDB = new VexRVMOp("VPADDB", P_66, M_0F, WIG, 0xFC, VEXOpAssertion.AVX1_2); 1285 public static final VexRVMOp VPADDW = new VexRVMOp("VPADDW", P_66, M_0F, WIG, 0xFD, VEXOpAssertion.AVX1_2); 1286 public static final VexRVMOp VPADDD = new VexRVMOp("VPADDD", P_66, M_0F, WIG, 0xFE, VEXOpAssertion.AVX1_2); 1287 public static final VexRVMOp VPADDQ = new VexRVMOp("VPADDQ", P_66, M_0F, WIG, 0xD4, VEXOpAssertion.AVX1_2); 1288 public static final VexRVMOp VPMULHUW = new VexRVMOp("VPMULHUW", P_66, M_0F, WIG, 0xE4, VEXOpAssertion.AVX1_2); 1289 public static final VexRVMOp VPMULHW = new VexRVMOp("VPMULHW", P_66, M_0F, WIG, 0xE5, VEXOpAssertion.AVX1_2); 1290 public static final VexRVMOp VPMULLW = new VexRVMOp("VPMULLW", P_66, M_0F, WIG, 0xD5, VEXOpAssertion.AVX1_2); 1291 public static final VexRVMOp VPMULLD = new VexRVMOp("VPMULLD", P_66, M_0F38, WIG, 0x40, VEXOpAssertion.AVX1_2); 1292 public static final VexRVMOp VPSUBB = new VexRVMOp("VPSUBB", P_66, M_0F, WIG, 0xF8, VEXOpAssertion.AVX1_2); 1293 public static final VexRVMOp VPSUBW = new VexRVMOp("VPSUBW", P_66, M_0F, WIG, 0xF9, VEXOpAssertion.AVX1_2); 1294 public static final VexRVMOp VPSUBD = new VexRVMOp("VPSUBD", P_66, M_0F, WIG, 0xFA, VEXOpAssertion.AVX1_2); 1295 public static final VexRVMOp VPSUBQ = new VexRVMOp("VPSUBQ", P_66, M_0F, WIG, 0xFB, VEXOpAssertion.AVX1_2); 1296 public static final VexRVMOp VPSHUFB = new VexRVMOp("VPSHUFB", P_66, M_0F38, WIG, 0x00, VEXOpAssertion.AVX1_2); 1297 public static final VexRVMOp VCVTSD2SS = new VexRVMOp("VCVTSD2SS", P_F2, M_0F, WIG, 0x5A); 1298 public static final VexRVMOp VCVTSS2SD = new VexRVMOp("VCVTSS2SD", P_F3, M_0F, WIG, 0x5A); 1299 public static final VexRVMOp VCVTSI2SD = new VexRVMOp("VCVTSI2SD", P_F2, M_0F, W0, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1300 public static final VexRVMOp VCVTSQ2SD = new VexRVMOp("VCVTSQ2SD", P_F2, M_0F, W1, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1301 public static final VexRVMOp VCVTSI2SS = new VexRVMOp("VCVTSI2SS", P_F3, M_0F, W0, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1302 public static final VexRVMOp VCVTSQ2SS = new VexRVMOp("VCVTSQ2SS", P_F3, M_0F, W1, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1303 public static final VexRVMOp VPCMPEQB = new VexRVMOp("VPCMPEQB", P_66, M_0F, WIG, 0x74, VEXOpAssertion.AVX1_2); 1304 public static final VexRVMOp VPCMPEQW = new VexRVMOp("VPCMPEQW", P_66, M_0F, WIG, 0x75, VEXOpAssertion.AVX1_2); 1305 public static final VexRVMOp VPCMPEQD = new VexRVMOp("VPCMPEQD", P_66, M_0F, WIG, 0x76, VEXOpAssertion.AVX1_2); 1306 public static final VexRVMOp VPCMPEQQ = new VexRVMOp("VPCMPEQQ", P_66, M_0F38, WIG, 0x29, VEXOpAssertion.AVX1_2); 1307 public static final VexRVMOp VPCMPGTB = new VexRVMOp("VPCMPGTB", P_66, M_0F, WIG, 0x64, VEXOpAssertion.AVX1_2); 1308 public static final VexRVMOp VPCMPGTW = new VexRVMOp("VPCMPGTW", P_66, M_0F, WIG, 0x65, VEXOpAssertion.AVX1_2); 1309 public static final VexRVMOp VPCMPGTD = new VexRVMOp("VPCMPGTD", P_66, M_0F, WIG, 0x66, VEXOpAssertion.AVX1_2); 1310 public static final VexRVMOp VPCMPGTQ = new VexRVMOp("VPCMPGTQ", P_66, M_0F38, WIG, 0x37, VEXOpAssertion.AVX1_2); 1311 // @formatter:on 1312 1313 private VexRVMOp(String opcode, int pp, int mmmmm, int w, int op) { 1314 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1315 } 1316 1317 protected VexRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1318 super(opcode, pp, mmmmm, w, op, assertion); 1319 } 1320 1321 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { 1322 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); 1323 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1324 asm.emitByte(op); 1325 asm.emitModRM(dst, src2); 1326 } 1327 1328 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) { 1329 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); 1330 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1331 asm.emitByte(op); 1332 asm.emitOperandHelper(dst, src2, 0); 1333 } 1334 } 1335 1336 public static final class VexGeneralPurposeRVMOp extends VexRVMOp { 1337 // @formatter:off 1338 public static final VexGeneralPurposeRVMOp ANDN = new VexGeneralPurposeRVMOp("ANDN", P_, M_0F38, WIG, 0xF2, VEXOpAssertion.BMI1); 1339 public static final VexGeneralPurposeRVMOp MULX = new VexGeneralPurposeRVMOp("MULX", P_F2, M_0F38, WIG, 0xF6, VEXOpAssertion.BMI2); 1340 public static final VexGeneralPurposeRVMOp PDEP = new VexGeneralPurposeRVMOp("PDEP", P_F2, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); 1341 public static final VexGeneralPurposeRVMOp PEXT = new VexGeneralPurposeRVMOp("PEXT", P_F3, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); 1342 // @formatter:on 1343 1344 private VexGeneralPurposeRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1345 super(opcode, pp, mmmmm, w, op, assertion); 1346 } 1347 1348 @Override 1349 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { 1350 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, src2, null); 1351 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1352 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1353 asm.emitByte(op); 1354 asm.emitModRM(dst, src2); 1355 } 1356 1357 @Override 1358 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) { 1359 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, null, null); 1360 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1361 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1362 asm.emitByte(op); 1363 asm.emitOperandHelper(dst, src2, 0); 1364 } 1365 } 1366 1367 public static final class VexGeneralPurposeRMVOp extends VexOp { 1368 // @formatter:off 1369 public static final VexGeneralPurposeRMVOp BEXTR = new VexGeneralPurposeRMVOp("BEXTR", P_, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI1); 1370 public static final VexGeneralPurposeRMVOp BZHI = new VexGeneralPurposeRMVOp("BZHI", P_, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); 1371 public static final VexGeneralPurposeRMVOp SARX = new VexGeneralPurposeRMVOp("SARX", P_F3, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2); 1372 public static final VexGeneralPurposeRMVOp SHRX = new VexGeneralPurposeRMVOp("SHRX", P_F2, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2); 1373 public static final VexGeneralPurposeRMVOp SHLX = new VexGeneralPurposeRMVOp("SHLX", P_66, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2); 1374 // @formatter:on 1375 1376 private VexGeneralPurposeRMVOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1377 super(opcode, pp, mmmmm, w, op, assertion); 1378 } 1379 1380 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { 1381 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, src1, null); 1382 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1383 asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1384 asm.emitByte(op); 1385 asm.emitModRM(dst, src1); 1386 } 1387 1388 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src1, Register src2) { 1389 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, null, null); 1390 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1391 asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1392 asm.emitByte(op); 1393 asm.emitOperandHelper(dst, src1, 0); 1394 } 1395 } 1396 1397 public static final class VexGeneralPurposeRMOp extends VexRMOp { 1398 // @formatter:off 1399 public static final VexGeneralPurposeRMOp BLSI = new VexGeneralPurposeRMOp("BLSI", P_, M_0F38, WIG, 0xF3, 3, VEXOpAssertion.BMI1); 1400 public static final VexGeneralPurposeRMOp BLSMSK = new VexGeneralPurposeRMOp("BLSMSK", P_, M_0F38, WIG, 0xF3, 2, VEXOpAssertion.BMI1); 1401 public static final VexGeneralPurposeRMOp BLSR = new VexGeneralPurposeRMOp("BLSR", P_, M_0F38, WIG, 0xF3, 1, VEXOpAssertion.BMI1); 1402 // @formatter:on 1403 private final int ext; 1404 1405 private VexGeneralPurposeRMOp(String opcode, int pp, int mmmmm, int w, int op, int ext, VEXOpAssertion assertion) { 1406 super(opcode, pp, mmmmm, w, op, assertion); 1407 this.ext = ext; 1408 } 1409 1410 @Override 1411 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) { 1412 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1413 asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1414 asm.emitByte(op); 1415 asm.emitModRM(ext, src); 1416 } 1417 1418 @Override 1419 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) { 1420 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1421 asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1422 asm.emitByte(op); 1423 asm.emitOperandHelper(ext, src, 0); 1424 } 1425 } 1426 1427 /** 1428 * VEX-encoded shift instructions with an operand order of either RVM or VMI. 1429 */ 1430 public static final class VexShiftOp extends VexRVMOp implements VexRRIOp { 1431 // @formatter:off 1432 public static final VexShiftOp VPSRLW = new VexShiftOp("VPSRLW", P_66, M_0F, WIG, 0xD1, 0x71, 2); 1433 public static final VexShiftOp VPSRLD = new VexShiftOp("VPSRLD", P_66, M_0F, WIG, 0xD2, 0x72, 2); 1434 public static final VexShiftOp VPSRLQ = new VexShiftOp("VPSRLQ", P_66, M_0F, WIG, 0xD3, 0x73, 2); 1435 public static final VexShiftOp VPSRAW = new VexShiftOp("VPSRAW", P_66, M_0F, WIG, 0xE1, 0x71, 4); 1436 public static final VexShiftOp VPSRAD = new VexShiftOp("VPSRAD", P_66, M_0F, WIG, 0xE2, 0x72, 4); 1437 public static final VexShiftOp VPSLLW = new VexShiftOp("VPSLLW", P_66, M_0F, WIG, 0xF1, 0x71, 6); 1438 public static final VexShiftOp VPSLLD = new VexShiftOp("VPSLLD", P_66, M_0F, WIG, 0xF2, 0x72, 6); 1439 public static final VexShiftOp VPSLLQ = new VexShiftOp("VPSLLQ", P_66, M_0F, WIG, 0xF3, 0x73, 6); 1440 // @formatter:on 1441 1442 private final int immOp; 1443 private final int r; 1444 1445 private VexShiftOp(String opcode, int pp, int mmmmm, int w, int op, int immOp, int r) { 1446 super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1_2); 1447 this.immOp = immOp; 1448 this.r = r; 1449 } 1450 1451 @Override 1452 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { 1453 assert assertion.check((AMD64) asm.target.arch, size, null, dst, src); 1454 asm.vexPrefix(null, dst, src, size, pp, mmmmm, w, false); 1455 asm.emitByte(immOp); 1456 asm.emitModRM(r, src); 1457 asm.emitByte(imm8); 1458 } 1459 } 1460 1461 public static final class VexMaskMoveOp extends VexOp { 1462 // @formatter:off 1463 public static final VexMaskMoveOp VMASKMOVPS = new VexMaskMoveOp("VMASKMOVPS", P_66, M_0F38, W0, 0x2C, 0x2E); 1464 public static final VexMaskMoveOp VMASKMOVPD = new VexMaskMoveOp("VMASKMOVPD", P_66, M_0F38, W0, 0x2D, 0x2F); 1465 public static final VexMaskMoveOp VPMASKMOVD = new VexMaskMoveOp("VPMASKMOVD", P_66, M_0F38, W0, 0x8C, 0x8E, VEXOpAssertion.AVX2); 1466 public static final VexMaskMoveOp VPMASKMOVQ = new VexMaskMoveOp("VPMASKMOVQ", P_66, M_0F38, W1, 0x8C, 0x8E, VEXOpAssertion.AVX2); 1467 // @formatter:on 1468 1469 private final int opReverse; 1470 1471 private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) { 1472 this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1); 1473 } 1474 1475 private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) { 1476 super(opcode, pp, mmmmm, w, op, assertion); 1477 this.opReverse = opReverse; 1478 } 1479 1480 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, AMD64Address src) { 1481 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, null); 1482 asm.vexPrefix(dst, mask, src, size, pp, mmmmm, w, false); 1483 asm.emitByte(op); 1484 asm.emitOperandHelper(dst, src, 0); 1485 } 1486 1487 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register mask, Register src) { 1488 assert assertion.check((AMD64) asm.target.arch, size, src, mask, null); 1489 asm.vexPrefix(src, mask, dst, size, pp, mmmmm, w, false); 1490 asm.emitByte(opReverse); 1491 asm.emitOperandHelper(src, dst, 0); 1492 } 1493 } 1494 1495 /** 1496 * VEX-encoded instructions with an operand order of RVMI. 1497 */ 1498 public static final class VexRVMIOp extends VexOp { 1499 // @formatter:off 1500 public static final VexRVMIOp VSHUFPS = new VexRVMIOp("VSHUFPS", P_, M_0F, WIG, 0xC6); 1501 public static final VexRVMIOp VSHUFPD = new VexRVMIOp("VSHUFPD", P_66, M_0F, WIG, 0xC6); 1502 public static final VexRVMIOp VINSERTF128 = new VexRVMIOp("VINSERTF128", P_66, M_0F3A, W0, 0x18, VEXOpAssertion.AVX1_256ONLY); 1503 public static final VexRVMIOp VINSERTI128 = new VexRVMIOp("VINSERTI128", P_66, M_0F3A, W0, 0x38, VEXOpAssertion.AVX2_256ONLY); 1504 // @formatter:on 1505 1506 private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op) { 1507 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1508 } 1509 1510 private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1511 super(opcode, pp, mmmmm, w, op, assertion); 1512 } 1513 1514 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, int imm8) { 1515 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); 1516 assert (imm8 & 0xFF) == imm8; 1517 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1518 asm.emitByte(op); 1519 asm.emitModRM(dst, src2); 1520 asm.emitByte(imm8); 1521 } 1522 1523 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, int imm8) { 1524 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); 1525 assert (imm8 & 0xFF) == imm8; 1526 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1527 asm.emitByte(op); 1528 asm.emitOperandHelper(dst, src2, 1); 1529 asm.emitByte(imm8); 1530 } 1531 } 1532 1533 /** 1534 * VEX-encoded comparison operation with an operand order of RVMI. The immediate operand is a 1535 * comparison operator. 1536 */ 1537 public static final class VexFloatCompareOp extends VexOp { 1538 // @formatter:off 1539 public static final VexFloatCompareOp VCMPPS = new VexFloatCompareOp("VCMPPS", P_, M_0F, WIG, 0xC2); 1540 public static final VexFloatCompareOp VCMPPD = new VexFloatCompareOp("VCMPPD", P_66, M_0F, WIG, 0xC2); 1541 public static final VexFloatCompareOp VCMPSS = new VexFloatCompareOp("VCMPSS", P_F2, M_0F, WIG, 0xC2); 1542 public static final VexFloatCompareOp VCMPSD = new VexFloatCompareOp("VCMPSD", P_F2, M_0F, WIG, 0xC2); 1543 // @formatter:on 1544 1545 public enum Predicate { 1546 EQ_OQ(0x00), 1547 LT_OS(0x01), 1548 LE_OS(0x02), 1549 UNORD_Q(0x03), 1550 NEQ_UQ(0x04), 1551 NLT_US(0x05), 1552 NLE_US(0x06), 1553 ORD_Q(0x07), 1554 EQ_UQ(0x08), 1555 NGE_US(0x09), 1556 NGT_US(0x0a), 1557 FALSE_OQ(0x0b), 1558 NEQ_OQ(0x0c), 1559 GE_OS(0x0d), 1560 GT_OS(0x0e), 1561 TRUE_UQ(0x0f), 1562 EQ_OS(0x10), 1563 LT_OQ(0x11), 1564 LE_OQ(0x12), 1565 UNORD_S(0x13), 1566 NEQ_US(0x14), 1567 NLT_UQ(0x15), 1568 NLE_UQ(0x16), 1569 ORD_S(0x17), 1570 EQ_US(0x18), 1571 NGE_UQ(0x19), 1572 NGT_UQ(0x1a), 1573 FALSE_OS(0x1b), 1574 NEQ_OS(0x1c), 1575 GE_OQ(0x1d), 1576 GT_OQ(0x1e), 1577 TRUE_US(0x1f); 1578 1579 private int imm8; 1580 1581 Predicate(int imm8) { 1582 this.imm8 = imm8; 1583 } 1584 1585 public static Predicate getPredicate(Condition condition, boolean unorderedIsTrue) { 1586 if (unorderedIsTrue) { 1587 switch (condition) { 1588 case EQ: 1589 return EQ_UQ; 1590 case NE: 1591 return NEQ_UQ; 1592 case LT: 1593 return NGE_UQ; 1594 case LE: 1595 return NGT_UQ; 1596 case GT: 1597 return NLE_UQ; 1598 case GE: 1599 return NLT_UQ; 1600 default: 1601 throw GraalError.shouldNotReachHere(); 1602 } 1603 } else { 1604 switch (condition) { 1605 case EQ: 1606 return EQ_OQ; 1607 case NE: 1608 return NEQ_OQ; 1609 case LT: 1610 return LT_OQ; 1611 case LE: 1612 return LE_OQ; 1613 case GT: 1614 return GT_OQ; 1615 case GE: 1616 return GE_OQ; 1617 default: 1618 throw GraalError.shouldNotReachHere(); 1619 } 1620 } 1621 } 1622 } 1623 1624 private VexFloatCompareOp(String opcode, int pp, int mmmmm, int w, int op) { 1625 super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1626 } 1627 1628 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, Predicate p) { 1629 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); 1630 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1631 asm.emitByte(op); 1632 asm.emitModRM(dst, src2); 1633 asm.emitByte(p.imm8); 1634 } 1635 1636 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, Predicate p) { 1637 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); 1638 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1639 asm.emitByte(op); 1640 asm.emitOperandHelper(dst, src2, 1); 1641 asm.emitByte(p.imm8); 1642 } 1643 } 1644 1645 public final void addl(AMD64Address dst, int imm32) { 1646 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1647 } 1648 1649 public final void addl(Register dst, int imm32) { 1650 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1651 } 1652 1653 public final void addl(Register dst, Register src) { 1654 ADD.rmOp.emit(this, DWORD, dst, src); 1655 } 1656 1657 public final void addpd(Register dst, Register src) { 1658 SSEOp.ADD.emit(this, PD, dst, src); 1659 } 1660 1661 public final void addpd(Register dst, AMD64Address src) { 1662 SSEOp.ADD.emit(this, PD, dst, src); 1663 } 1664 1665 public final void addsd(Register dst, Register src) { 1666 SSEOp.ADD.emit(this, SD, dst, src); 1667 } 1668 1669 public final void addsd(Register dst, AMD64Address src) { 1670 SSEOp.ADD.emit(this, SD, dst, src); 1671 } 1672 1673 private void addrNop4() { 1674 // 4 bytes: NOP DWORD PTR [EAX+0] 1675 emitByte(0x0F); 1676 emitByte(0x1F); 1677 emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc); 1678 emitByte(0); // 8-bits offset (1 byte) 1679 } 1680 1681 private void addrNop5() { 1682 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 1683 emitByte(0x0F); 1684 emitByte(0x1F); 1685 emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4); 1686 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 1687 emitByte(0); // 8-bits offset (1 byte) 1688 } 1689 1690 private void addrNop7() { 1691 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 1692 emitByte(0x0F); 1693 emitByte(0x1F); 1694 emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc); 1695 emitInt(0); // 32-bits offset (4 bytes) 1696 } 1697 1698 private void addrNop8() { 1699 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 1700 emitByte(0x0F); 1701 emitByte(0x1F); 1702 emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4); 1703 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 1704 emitInt(0); // 32-bits offset (4 bytes) 1705 } 1706 1707 public final void andl(Register dst, int imm32) { 1708 AND.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1709 } 1710 1711 public final void andl(Register dst, Register src) { 1712 AND.rmOp.emit(this, DWORD, dst, src); 1713 } 1714 1715 public final void andpd(Register dst, Register src) { 1716 SSEOp.AND.emit(this, PD, dst, src); 1717 } 1718 1719 public final void andpd(Register dst, AMD64Address src) { 1720 SSEOp.AND.emit(this, PD, dst, src); 1721 } 1722 1723 public final void bsfq(Register dst, Register src) { 1724 prefixq(dst, src); 1725 emitByte(0x0F); 1726 emitByte(0xBC); 1727 emitModRM(dst, src); 1728 } 1729 1730 public final void bsrl(Register dst, Register src) { 1731 prefix(dst, src); 1732 emitByte(0x0F); 1733 emitByte(0xBD); 1734 emitModRM(dst, src); 1735 } 1736 1737 public final void bswapl(Register reg) { 1738 prefix(reg); 1739 emitByte(0x0F); 1740 emitModRM(1, reg); 1741 } 1742 1743 public final void cdql() { 1744 emitByte(0x99); 1745 } 1746 1747 public final void cmovl(ConditionFlag cc, Register dst, Register src) { 1748 prefix(dst, src); 1749 emitByte(0x0F); 1750 emitByte(0x40 | cc.getValue()); 1751 emitModRM(dst, src); 1752 } 1753 1754 public final void cmovl(ConditionFlag cc, Register dst, AMD64Address src) { 1755 prefix(src, dst); 1756 emitByte(0x0F); 1757 emitByte(0x40 | cc.getValue()); 1758 emitOperandHelper(dst, src, 0); 1759 } 1760 1761 public final void cmpb(Register dst, Register src) { 1762 CMP.byteRmOp.emit(this, BYTE, dst, src); 1763 } 1764 1765 public final void cmpw(Register dst, Register src) { 1766 CMP.rmOp.emit(this, WORD, dst, src); 1767 } 1768 1769 public final void cmpl(Register dst, int imm32) { 1770 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1771 } 1772 1773 public final void cmpl(Register dst, Register src) { 1774 CMP.rmOp.emit(this, DWORD, dst, src); 1775 } 1776 1777 public final void cmpl(Register dst, AMD64Address src) { 1778 CMP.rmOp.emit(this, DWORD, dst, src); 1779 } 1780 1781 public final void cmpl(AMD64Address dst, int imm32) { 1782 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1783 } 1784 1785 /** 1786 * The 8-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg into 1787 * adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the compared 1788 * values were equal, and cleared otherwise. 1789 */ 1790 public final void cmpxchgb(Register reg, AMD64Address adr) { // cmpxchg 1791 prefixb(adr, reg); 1792 emitByte(0x0F); 1793 emitByte(0xB0); 1794 emitOperandHelper(reg, adr, 0); 1795 } 1796 1797 /** 1798 * The 16-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg 1799 * into adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the 1800 * compared values were equal, and cleared otherwise. 1801 */ 1802 public final void cmpxchgw(Register reg, AMD64Address adr) { // cmpxchg 1803 emitByte(0x66); // Switch to 16-bit mode. 1804 prefix(adr, reg); 1805 emitByte(0x0F); 1806 emitByte(0xB1); 1807 emitOperandHelper(reg, adr, 0); 1808 } 1809 1810 /** 1811 * The 32-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg 1812 * into adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the 1813 * compared values were equal, and cleared otherwise. 1814 */ 1815 public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg 1816 prefix(adr, reg); 1817 emitByte(0x0F); 1818 emitByte(0xB1); 1819 emitOperandHelper(reg, adr, 0); 1820 } 1821 1822 public final void cvtsi2sdl(Register dst, Register src) { 1823 SSEOp.CVTSI2SD.emit(this, DWORD, dst, src); 1824 } 1825 1826 public final void cvttsd2sil(Register dst, Register src) { 1827 SSEOp.CVTTSD2SI.emit(this, DWORD, dst, src); 1828 } 1829 1830 public final void decl(AMD64Address dst) { 1831 prefix(dst); 1832 emitByte(0xFF); 1833 emitOperandHelper(1, dst, 0); 1834 } 1835 1836 public final void divsd(Register dst, Register src) { 1837 SSEOp.DIV.emit(this, SD, dst, src); 1838 } 1839 1840 public final void hlt() { 1841 emitByte(0xF4); 1842 } 1843 1844 public final void imull(Register dst, Register src, int value) { 1845 if (isByte(value)) { 1846 AMD64RMIOp.IMUL_SX.emit(this, DWORD, dst, src, value); 1847 } else { 1848 AMD64RMIOp.IMUL.emit(this, DWORD, dst, src, value); 1849 } 1850 } 1851 1852 public final void incl(AMD64Address dst) { 1853 prefix(dst); 1854 emitByte(0xFF); 1855 emitOperandHelper(0, dst, 0); 1856 } 1857 1858 public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) { 1859 int shortSize = 2; 1860 int longSize = 6; 1861 long disp = jumpTarget - position(); 1862 if (!forceDisp32 && isByte(disp - shortSize)) { 1863 // 0111 tttn #8-bit disp 1864 emitByte(0x70 | cc.getValue()); 1865 emitByte((int) ((disp - shortSize) & 0xFF)); 1866 } else { 1867 // 0000 1111 1000 tttn #32-bit disp 1868 assert isInt(disp - longSize) : "must be 32bit offset (call4)"; 1869 emitByte(0x0F); 1870 emitByte(0x80 | cc.getValue()); 1871 emitInt((int) (disp - longSize)); 1872 } 1873 } 1874 1875 public final void jcc(ConditionFlag cc, Label l) { 1876 assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc"; 1877 if (l.isBound()) { 1878 jcc(cc, l.position(), false); 1879 } else { 1880 // Note: could eliminate cond. jumps to this jump if condition 1881 // is the same however, seems to be rather unlikely case. 1882 // Note: use jccb() if label to be bound is very close to get 1883 // an 8-bit displacement 1884 l.addPatchAt(position(), this); 1885 emitByte(0x0F); 1886 emitByte(0x80 | cc.getValue()); 1887 emitInt(0); 1888 } 1889 1890 } 1891 1892 public final void jccb(ConditionFlag cc, Label l) { 1893 if (l.isBound()) { 1894 int shortSize = 2; 1895 int entry = l.position(); 1896 assert isByte(entry - (position() + shortSize)) : "Dispacement too large for a short jmp"; 1897 long disp = entry - position(); 1898 // 0111 tttn #8-bit disp 1899 emitByte(0x70 | cc.getValue()); 1900 emitByte((int) ((disp - shortSize) & 0xFF)); 1901 } else { 1902 l.addPatchAt(position(), this); 1903 emitByte(0x70 | cc.getValue()); 1904 emitByte(0); 1905 } 1906 } 1907 1908 public final void jmp(int jumpTarget, boolean forceDisp32) { 1909 int shortSize = 2; 1910 int longSize = 5; 1911 long disp = jumpTarget - position(); 1912 if (!forceDisp32 && isByte(disp - shortSize)) { 1913 emitByte(0xEB); 1914 emitByte((int) ((disp - shortSize) & 0xFF)); 1915 } else { 1916 emitByte(0xE9); 1917 emitInt((int) (disp - longSize)); 1918 } 1919 } 1920 1921 @Override 1922 public final void jmp(Label l) { 1923 if (l.isBound()) { 1924 jmp(l.position(), false); 1925 } else { 1926 // By default, forward jumps are always 32-bit displacements, since 1927 // we can't yet know where the label will be bound. If you're sure that 1928 // the forward jump will not run beyond 256 bytes, use jmpb to 1929 // force an 8-bit displacement. 1930 1931 l.addPatchAt(position(), this); 1932 emitByte(0xE9); 1933 emitInt(0); 1934 } 1935 } 1936 1937 public final void jmp(Register entry) { 1938 prefix(entry); 1939 emitByte(0xFF); 1940 emitModRM(4, entry); 1941 } 1942 1943 public final void jmp(AMD64Address adr) { 1944 prefix(adr); 1945 emitByte(0xFF); 1946 emitOperandHelper(AMD64.rsp, adr, 0); 1947 } 1948 1949 public final void jmpb(Label l) { 1950 if (l.isBound()) { 1951 int shortSize = 2; 1952 // Displacement is relative to byte just after jmpb instruction 1953 int displacement = l.position() - position() - shortSize; 1954 GraalError.guarantee(isByte(displacement), "Displacement too large to be encoded as a byte: %d", displacement); 1955 emitByte(0xEB); 1956 emitByte(displacement & 0xFF); 1957 } else { 1958 l.addPatchAt(position(), this); 1959 emitByte(0xEB); 1960 emitByte(0); 1961 } 1962 } 1963 1964 public final void lead(Register dst, AMD64Address src) { 1965 prefix(src, dst); 1966 emitByte(0x8D); 1967 emitOperandHelper(dst, src, 0); 1968 } 1969 1970 public final void leaq(Register dst, AMD64Address src) { 1971 prefixq(src, dst); 1972 emitByte(0x8D); 1973 emitOperandHelper(dst, src, 0); 1974 } 1975 1976 public final void leave() { 1977 emitByte(0xC9); 1978 } 1979 1980 public final void lock() { 1981 emitByte(0xF0); 1982 } 1983 1984 public final void movapd(Register dst, Register src) { 1985 assert inRC(XMM, dst) && inRC(XMM, src); 1986 simdPrefix(dst, Register.None, src, PD, P_0F, false); 1987 emitByte(0x28); 1988 emitModRM(dst, src); 1989 } 1990 1991 public final void movaps(Register dst, Register src) { 1992 assert inRC(XMM, dst) && inRC(XMM, src); 1993 simdPrefix(dst, Register.None, src, PS, P_0F, false); 1994 emitByte(0x28); 1995 emitModRM(dst, src); 1996 } 1997 1998 public final void movb(AMD64Address dst, int imm8) { 1999 prefix(dst); 2000 emitByte(0xC6); 2001 emitOperandHelper(0, dst, 1); 2002 emitByte(imm8); 2003 } 2004 2005 public final void movb(AMD64Address dst, Register src) { 2006 assert inRC(CPU, src) : "must have byte register"; 2007 prefixb(dst, src); 2008 emitByte(0x88); 2009 emitOperandHelper(src, dst, 0); 2010 } 2011 2012 public final void movl(Register dst, int imm32) { 2013 movl(dst, imm32, false); 2014 } 2015 2016 public final void movl(Register dst, int imm32, boolean annotateImm) { 2017 int insnPos = position(); 2018 prefix(dst); 2019 emitByte(0xB8 + encode(dst)); 2020 int immPos = position(); 2021 emitInt(imm32); 2022 int nextInsnPos = position(); 2023 if (annotateImm && codePatchingAnnotationConsumer != null) { 2024 codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 2025 } 2026 } 2027 2028 public final void movl(Register dst, Register src) { 2029 prefix(dst, src); 2030 emitByte(0x8B); 2031 emitModRM(dst, src); 2032 } 2033 2034 public final void movl(Register dst, AMD64Address src) { 2035 prefix(src, dst); 2036 emitByte(0x8B); 2037 emitOperandHelper(dst, src, 0); 2038 } 2039 2040 /** 2041 * @param wide use 4 byte encoding for displacements that would normally fit in a byte 2042 */ 2043 public final void movl(Register dst, AMD64Address src, boolean wide) { 2044 prefix(src, dst); 2045 emitByte(0x8B); 2046 emitOperandHelper(dst, src, wide, 0); 2047 } 2048 2049 public final void movl(AMD64Address dst, int imm32) { 2050 prefix(dst); 2051 emitByte(0xC7); 2052 emitOperandHelper(0, dst, 4); 2053 emitInt(imm32); 2054 } 2055 2056 public final void movl(AMD64Address dst, Register src) { 2057 prefix(dst, src); 2058 emitByte(0x89); 2059 emitOperandHelper(src, dst, 0); 2060 } 2061 2062 /** 2063 * New CPUs require use of movsd and movss to avoid partial register stall when loading from 2064 * memory. But for old Opteron use movlpd instead of movsd. The selection is done in 2065 * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and 2066 * {@link AMD64MacroAssembler#movflt(Register, Register)}. 2067 */ 2068 public final void movlpd(Register dst, AMD64Address src) { 2069 assert inRC(XMM, dst); 2070 simdPrefix(dst, dst, src, PD, P_0F, false); 2071 emitByte(0x12); 2072 emitOperandHelper(dst, src, 0); 2073 } 2074 2075 public final void movlhps(Register dst, Register src) { 2076 assert inRC(XMM, dst) && inRC(XMM, src); 2077 simdPrefix(dst, src, src, PS, P_0F, false); 2078 emitByte(0x16); 2079 emitModRM(dst, src); 2080 } 2081 2082 public final void movq(Register dst, AMD64Address src) { 2083 movq(dst, src, false); 2084 } 2085 2086 public final void movq(Register dst, AMD64Address src, boolean force4BytesDisplacement) { 2087 if (inRC(XMM, dst)) { 2088 // Insn: MOVQ xmm, r/m64 2089 // Code: F3 0F 7E /r 2090 // An alternative instruction would be 66 REX.W 0F 6E /r. We prefer the REX.W free 2091 // format, because it would allow us to emit 2-bytes-prefixed vex-encoding instruction 2092 // when applicable. 2093 simdPrefix(dst, Register.None, src, SS, P_0F, false); 2094 emitByte(0x7E); 2095 emitOperandHelper(dst, src, force4BytesDisplacement, 0); 2096 } else { 2097 // gpr version of movq 2098 prefixq(src, dst); 2099 emitByte(0x8B); 2100 emitOperandHelper(dst, src, force4BytesDisplacement, 0); 2101 } 2102 } 2103 2104 public final void movq(Register dst, Register src) { 2105 assert inRC(CPU, dst) && inRC(CPU, src); 2106 prefixq(dst, src); 2107 emitByte(0x8B); 2108 emitModRM(dst, src); 2109 } 2110 2111 public final void movq(AMD64Address dst, Register src) { 2112 if (inRC(XMM, src)) { 2113 // Insn: MOVQ r/m64, xmm 2114 // Code: 66 0F D6 /r 2115 // An alternative instruction would be 66 REX.W 0F 7E /r. We prefer the REX.W free 2116 // format, because it would allow us to emit 2-bytes-prefixed vex-encoding instruction 2117 // when applicable. 2118 simdPrefix(src, Register.None, dst, PD, P_0F, false); 2119 emitByte(0xD6); 2120 emitOperandHelper(src, dst, 0); 2121 } else { 2122 // gpr version of movq 2123 prefixq(dst, src); 2124 emitByte(0x89); 2125 emitOperandHelper(src, dst, 0); 2126 } 2127 } 2128 2129 public final void movsbl(Register dst, AMD64Address src) { 2130 prefix(src, dst); 2131 emitByte(0x0F); 2132 emitByte(0xBE); 2133 emitOperandHelper(dst, src, 0); 2134 } 2135 2136 public final void movsbl(Register dst, Register src) { 2137 prefix(dst, false, src, true); 2138 emitByte(0x0F); 2139 emitByte(0xBE); 2140 emitModRM(dst, src); 2141 } 2142 2143 public final void movsbq(Register dst, AMD64Address src) { 2144 prefixq(src, dst); 2145 emitByte(0x0F); 2146 emitByte(0xBE); 2147 emitOperandHelper(dst, src, 0); 2148 } 2149 2150 public final void movsbq(Register dst, Register src) { 2151 prefixq(dst, src); 2152 emitByte(0x0F); 2153 emitByte(0xBE); 2154 emitModRM(dst, src); 2155 } 2156 2157 public final void movsd(Register dst, Register src) { 2158 AMD64RMOp.MOVSD.emit(this, SD, dst, src); 2159 } 2160 2161 public final void movsd(Register dst, AMD64Address src) { 2162 AMD64RMOp.MOVSD.emit(this, SD, dst, src); 2163 } 2164 2165 public final void movsd(AMD64Address dst, Register src) { 2166 AMD64MROp.MOVSD.emit(this, SD, dst, src); 2167 } 2168 2169 public final void movss(Register dst, Register src) { 2170 AMD64RMOp.MOVSS.emit(this, SS, dst, src); 2171 } 2172 2173 public final void movss(Register dst, AMD64Address src) { 2174 AMD64RMOp.MOVSS.emit(this, SS, dst, src); 2175 } 2176 2177 public final void movss(AMD64Address dst, Register src) { 2178 AMD64MROp.MOVSS.emit(this, SS, dst, src); 2179 } 2180 2181 public final void mulpd(Register dst, Register src) { 2182 SSEOp.MUL.emit(this, PD, dst, src); 2183 } 2184 2185 public final void mulpd(Register dst, AMD64Address src) { 2186 SSEOp.MUL.emit(this, PD, dst, src); 2187 } 2188 2189 public final void mulsd(Register dst, Register src) { 2190 SSEOp.MUL.emit(this, SD, dst, src); 2191 } 2192 2193 public final void mulsd(Register dst, AMD64Address src) { 2194 SSEOp.MUL.emit(this, SD, dst, src); 2195 } 2196 2197 public final void mulss(Register dst, Register src) { 2198 SSEOp.MUL.emit(this, SS, dst, src); 2199 } 2200 2201 public final void movswl(Register dst, AMD64Address src) { 2202 AMD64RMOp.MOVSX.emit(this, DWORD, dst, src); 2203 } 2204 2205 public final void movswq(Register dst, AMD64Address src) { 2206 AMD64RMOp.MOVSX.emit(this, QWORD, dst, src); 2207 } 2208 2209 public final void movw(AMD64Address dst, int imm16) { 2210 emitByte(0x66); // switch to 16-bit mode 2211 prefix(dst); 2212 emitByte(0xC7); 2213 emitOperandHelper(0, dst, 2); 2214 emitShort(imm16); 2215 } 2216 2217 public final void movw(AMD64Address dst, Register src) { 2218 emitByte(0x66); 2219 prefix(dst, src); 2220 emitByte(0x89); 2221 emitOperandHelper(src, dst, 0); 2222 } 2223 2224 public final void movw(Register dst, AMD64Address src) { 2225 emitByte(0x66); 2226 prefix(src, dst); 2227 emitByte(0x8B); 2228 emitOperandHelper(dst, src, 0); 2229 } 2230 2231 public final void movzbl(Register dst, AMD64Address src) { 2232 prefix(src, dst); 2233 emitByte(0x0F); 2234 emitByte(0xB6); 2235 emitOperandHelper(dst, src, 0); 2236 } 2237 2238 public final void movzbl(Register dst, Register src) { 2239 AMD64RMOp.MOVZXB.emit(this, DWORD, dst, src); 2240 } 2241 2242 public final void movzbq(Register dst, Register src) { 2243 AMD64RMOp.MOVZXB.emit(this, QWORD, dst, src); 2244 } 2245 2246 public final void movzbq(Register dst, AMD64Address src) { 2247 AMD64RMOp.MOVZXB.emit(this, QWORD, dst, src); 2248 } 2249 2250 public final void movzwl(Register dst, AMD64Address src) { 2251 AMD64RMOp.MOVZX.emit(this, DWORD, dst, src); 2252 } 2253 2254 public final void movzwq(Register dst, AMD64Address src) { 2255 AMD64RMOp.MOVZX.emit(this, QWORD, dst, src); 2256 } 2257 2258 public final void negl(Register dst) { 2259 NEG.emit(this, DWORD, dst); 2260 } 2261 2262 public final void notl(Register dst) { 2263 NOT.emit(this, DWORD, dst); 2264 } 2265 2266 public final void notq(Register dst) { 2267 NOT.emit(this, QWORD, dst); 2268 } 2269 2270 @Override 2271 public final void ensureUniquePC() { 2272 nop(); 2273 } 2274 2275 public final void nop() { 2276 nop(1); 2277 } 2278 2279 public void nop(int count) { 2280 int i = count; 2281 if (UseNormalNop) { 2282 assert i > 0 : " "; 2283 // The fancy nops aren't currently recognized by debuggers making it a 2284 // pain to disassemble code while debugging. If assert are on clearly 2285 // speed is not an issue so simply use the single byte traditional nop 2286 // to do alignment. 2287 2288 for (; i > 0; i--) { 2289 emitByte(0x90); 2290 } 2291 return; 2292 } 2293 2294 if (UseAddressNop) { 2295 // 2296 // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD. 2297 // 1: 0x90 2298 // 2: 0x66 0x90 2299 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2300 // 4: 0x0F 0x1F 0x40 0x00 2301 // 5: 0x0F 0x1F 0x44 0x00 0x00 2302 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2303 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2304 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2305 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2306 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2307 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2308 2309 // The rest coding is AMD specific - use consecutive Address nops 2310 2311 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2312 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2313 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2314 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2315 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2316 // Size prefixes (0x66) are added for larger sizes 2317 2318 while (i >= 22) { 2319 i -= 11; 2320 emitByte(0x66); // size prefix 2321 emitByte(0x66); // size prefix 2322 emitByte(0x66); // size prefix 2323 addrNop8(); 2324 } 2325 // Generate first nop for size between 21-12 2326 switch (i) { 2327 case 21: 2328 i -= 11; 2329 emitByte(0x66); // size prefix 2330 emitByte(0x66); // size prefix 2331 emitByte(0x66); // size prefix 2332 addrNop8(); 2333 break; 2334 case 20: 2335 case 19: 2336 i -= 10; 2337 emitByte(0x66); // size prefix 2338 emitByte(0x66); // size prefix 2339 addrNop8(); 2340 break; 2341 case 18: 2342 case 17: 2343 i -= 9; 2344 emitByte(0x66); // size prefix 2345 addrNop8(); 2346 break; 2347 case 16: 2348 case 15: 2349 i -= 8; 2350 addrNop8(); 2351 break; 2352 case 14: 2353 case 13: 2354 i -= 7; 2355 addrNop7(); 2356 break; 2357 case 12: 2358 i -= 6; 2359 emitByte(0x66); // size prefix 2360 addrNop5(); 2361 break; 2362 default: 2363 assert i < 12; 2364 } 2365 2366 // Generate second nop for size between 11-1 2367 switch (i) { 2368 case 11: 2369 emitByte(0x66); // size prefix 2370 emitByte(0x66); // size prefix 2371 emitByte(0x66); // size prefix 2372 addrNop8(); 2373 break; 2374 case 10: 2375 emitByte(0x66); // size prefix 2376 emitByte(0x66); // size prefix 2377 addrNop8(); 2378 break; 2379 case 9: 2380 emitByte(0x66); // size prefix 2381 addrNop8(); 2382 break; 2383 case 8: 2384 addrNop8(); 2385 break; 2386 case 7: 2387 addrNop7(); 2388 break; 2389 case 6: 2390 emitByte(0x66); // size prefix 2391 addrNop5(); 2392 break; 2393 case 5: 2394 addrNop5(); 2395 break; 2396 case 4: 2397 addrNop4(); 2398 break; 2399 case 3: 2400 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2401 emitByte(0x66); // size prefix 2402 emitByte(0x66); // size prefix 2403 emitByte(0x90); // nop 2404 break; 2405 case 2: 2406 emitByte(0x66); // size prefix 2407 emitByte(0x90); // nop 2408 break; 2409 case 1: 2410 emitByte(0x90); // nop 2411 break; 2412 default: 2413 assert i == 0; 2414 } 2415 return; 2416 } 2417 2418 // Using nops with size prefixes "0x66 0x90". 2419 // From AMD Optimization Guide: 2420 // 1: 0x90 2421 // 2: 0x66 0x90 2422 // 3: 0x66 0x66 0x90 2423 // 4: 0x66 0x66 0x66 0x90 2424 // 5: 0x66 0x66 0x90 0x66 0x90 2425 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2426 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2427 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2428 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2429 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2430 // 2431 while (i > 12) { 2432 i -= 4; 2433 emitByte(0x66); // size prefix 2434 emitByte(0x66); 2435 emitByte(0x66); 2436 emitByte(0x90); // nop 2437 } 2438 // 1 - 12 nops 2439 if (i > 8) { 2440 if (i > 9) { 2441 i -= 1; 2442 emitByte(0x66); 2443 } 2444 i -= 3; 2445 emitByte(0x66); 2446 emitByte(0x66); 2447 emitByte(0x90); 2448 } 2449 // 1 - 8 nops 2450 if (i > 4) { 2451 if (i > 6) { 2452 i -= 1; 2453 emitByte(0x66); 2454 } 2455 i -= 3; 2456 emitByte(0x66); 2457 emitByte(0x66); 2458 emitByte(0x90); 2459 } 2460 switch (i) { 2461 case 4: 2462 emitByte(0x66); 2463 emitByte(0x66); 2464 emitByte(0x66); 2465 emitByte(0x90); 2466 break; 2467 case 3: 2468 emitByte(0x66); 2469 emitByte(0x66); 2470 emitByte(0x90); 2471 break; 2472 case 2: 2473 emitByte(0x66); 2474 emitByte(0x90); 2475 break; 2476 case 1: 2477 emitByte(0x90); 2478 break; 2479 default: 2480 assert i == 0; 2481 } 2482 } 2483 2484 public final void orl(Register dst, Register src) { 2485 OR.rmOp.emit(this, DWORD, dst, src); 2486 } 2487 2488 public final void orl(Register dst, int imm32) { 2489 OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2490 } 2491 2492 // Insn: VPACKUSWB xmm1, xmm2, xmm3/m128 2493 // ----- 2494 // Insn: VPACKUSWB xmm1, xmm1, xmm2 2495 2496 public final void packuswb(Register dst, Register src) { 2497 assert inRC(XMM, dst) && inRC(XMM, src); 2498 // Code: VEX.NDS.128.66.0F.WIG 67 /r 2499 simdPrefix(dst, dst, src, PD, P_0F, false); 2500 emitByte(0x67); 2501 emitModRM(dst, src); 2502 } 2503 2504 public final void pop(Register dst) { 2505 prefix(dst); 2506 emitByte(0x58 + encode(dst)); 2507 } 2508 2509 public void popfq() { 2510 emitByte(0x9D); 2511 } 2512 2513 public final void ptest(Register dst, Register src) { 2514 assert supports(CPUFeature.SSE4_1); 2515 assert inRC(XMM, dst) && inRC(XMM, src); 2516 simdPrefix(dst, Register.None, src, PD, P_0F38, false); 2517 emitByte(0x17); 2518 emitModRM(dst, src); 2519 } 2520 2521 public final void pcmpeqb(Register dst, Register src) { 2522 assert supports(CPUFeature.SSE2); 2523 assert inRC(XMM, dst) && inRC(XMM, src); 2524 simdPrefix(dst, dst, src, PD, P_0F, false); 2525 emitByte(0x74); 2526 emitModRM(dst, src); 2527 } 2528 2529 public final void pcmpeqw(Register dst, Register src) { 2530 assert supports(CPUFeature.SSE2); 2531 assert inRC(XMM, dst) && inRC(XMM, src); 2532 simdPrefix(dst, dst, src, PD, P_0F, false); 2533 emitByte(0x75); 2534 emitModRM(dst, src); 2535 } 2536 2537 public final void pcmpeqd(Register dst, Register src) { 2538 assert supports(CPUFeature.SSE2); 2539 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2540 simdPrefix(dst, dst, src, PD, P_0F, false); 2541 emitByte(0x76); 2542 emitModRM(dst, src); 2543 } 2544 2545 public final void pcmpestri(Register dst, AMD64Address src, int imm8) { 2546 assert supports(CPUFeature.SSE4_2); 2547 assert inRC(XMM, dst); 2548 simdPrefix(dst, Register.None, src, PD, P_0F3A, false); 2549 emitByte(0x61); 2550 emitOperandHelper(dst, src, 0); 2551 emitByte(imm8); 2552 } 2553 2554 public final void pcmpestri(Register dst, Register src, int imm8) { 2555 assert supports(CPUFeature.SSE4_2); 2556 assert inRC(XMM, dst) && inRC(XMM, src); 2557 simdPrefix(dst, Register.None, src, PD, P_0F3A, false); 2558 emitByte(0x61); 2559 emitModRM(dst, src); 2560 emitByte(imm8); 2561 } 2562 2563 public final void pmovmskb(Register dst, Register src) { 2564 assert supports(CPUFeature.SSE2); 2565 assert inRC(CPU, dst) && inRC(XMM, src); 2566 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2567 emitByte(0xD7); 2568 emitModRM(dst, src); 2569 } 2570 2571 private void pmovSZx(Register dst, AMD64Address src, int op) { 2572 assert supports(CPUFeature.SSE4_1); 2573 assert inRC(XMM, dst); 2574 simdPrefix(dst, Register.None, src, PD, P_0F38, false); 2575 emitByte(op); 2576 emitOperandHelper(dst, src, 0); 2577 } 2578 2579 public final void pmovsxbw(Register dst, AMD64Address src) { 2580 pmovSZx(dst, src, 0x20); 2581 } 2582 2583 public final void pmovsxbd(Register dst, AMD64Address src) { 2584 pmovSZx(dst, src, 0x21); 2585 } 2586 2587 public final void pmovsxbq(Register dst, AMD64Address src) { 2588 pmovSZx(dst, src, 0x22); 2589 } 2590 2591 public final void pmovsxwd(Register dst, AMD64Address src) { 2592 pmovSZx(dst, src, 0x23); 2593 } 2594 2595 public final void pmovsxwq(Register dst, AMD64Address src) { 2596 pmovSZx(dst, src, 0x24); 2597 } 2598 2599 public final void pmovsxdq(Register dst, AMD64Address src) { 2600 pmovSZx(dst, src, 0x25); 2601 } 2602 2603 // Insn: VPMOVZXBW xmm1, xmm2/m64 2604 public final void pmovzxbw(Register dst, AMD64Address src) { 2605 pmovSZx(dst, src, 0x30); 2606 } 2607 2608 public final void pmovzxbd(Register dst, AMD64Address src) { 2609 pmovSZx(dst, src, 0x31); 2610 } 2611 2612 public final void pmovzxbq(Register dst, AMD64Address src) { 2613 pmovSZx(dst, src, 0x32); 2614 } 2615 2616 public final void pmovzxwd(Register dst, AMD64Address src) { 2617 pmovSZx(dst, src, 0x33); 2618 } 2619 2620 public final void pmovzxwq(Register dst, AMD64Address src) { 2621 pmovSZx(dst, src, 0x34); 2622 } 2623 2624 public final void pmovzxdq(Register dst, AMD64Address src) { 2625 pmovSZx(dst, src, 0x35); 2626 } 2627 2628 public final void pmovzxbw(Register dst, Register src) { 2629 assert supports(CPUFeature.SSE4_1); 2630 assert inRC(XMM, dst) && inRC(XMM, src); 2631 simdPrefix(dst, Register.None, src, PD, P_0F38, false); 2632 emitByte(0x30); 2633 emitModRM(dst, src); 2634 } 2635 2636 public final void push(Register src) { 2637 prefix(src); 2638 emitByte(0x50 + encode(src)); 2639 } 2640 2641 public void pushfq() { 2642 emitByte(0x9c); 2643 } 2644 2645 public final void paddd(Register dst, Register src) { 2646 assert inRC(XMM, dst) && inRC(XMM, src); 2647 simdPrefix(dst, dst, src, PD, P_0F, false); 2648 emitByte(0xFE); 2649 emitModRM(dst, src); 2650 } 2651 2652 public final void paddq(Register dst, Register src) { 2653 assert inRC(XMM, dst) && inRC(XMM, src); 2654 simdPrefix(dst, dst, src, PD, P_0F, false); 2655 emitByte(0xD4); 2656 emitModRM(dst, src); 2657 } 2658 2659 public final void pextrw(Register dst, Register src, int imm8) { 2660 assert inRC(CPU, dst) && inRC(XMM, src); 2661 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2662 emitByte(0xC5); 2663 emitModRM(dst, src); 2664 emitByte(imm8); 2665 } 2666 2667 public final void pinsrw(Register dst, Register src, int imm8) { 2668 assert inRC(XMM, dst) && inRC(CPU, src); 2669 simdPrefix(dst, dst, src, PD, P_0F, false); 2670 emitByte(0xC4); 2671 emitModRM(dst, src); 2672 emitByte(imm8); 2673 } 2674 2675 public final void por(Register dst, Register src) { 2676 assert inRC(XMM, dst) && inRC(XMM, src); 2677 simdPrefix(dst, dst, src, PD, P_0F, false); 2678 emitByte(0xEB); 2679 emitModRM(dst, src); 2680 } 2681 2682 public final void pand(Register dst, Register src) { 2683 assert inRC(XMM, dst) && inRC(XMM, src); 2684 simdPrefix(dst, dst, src, PD, P_0F, false); 2685 emitByte(0xDB); 2686 emitModRM(dst, src); 2687 } 2688 2689 public final void pxor(Register dst, Register src) { 2690 assert inRC(XMM, dst) && inRC(XMM, src); 2691 simdPrefix(dst, dst, src, PD, P_0F, false); 2692 emitByte(0xEF); 2693 emitModRM(dst, src); 2694 } 2695 2696 public final void pslld(Register dst, int imm8) { 2697 assert isUByte(imm8) : "invalid value"; 2698 assert inRC(XMM, dst); 2699 // XMM6 is for /6 encoding: 66 0F 72 /6 ib 2700 simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false); 2701 emitByte(0x72); 2702 emitModRM(6, dst); 2703 emitByte(imm8 & 0xFF); 2704 } 2705 2706 public final void psllq(Register dst, Register shift) { 2707 assert inRC(XMM, dst) && inRC(XMM, shift); 2708 simdPrefix(dst, dst, shift, PD, P_0F, false); 2709 emitByte(0xF3); 2710 emitModRM(dst, shift); 2711 } 2712 2713 public final void psllq(Register dst, int imm8) { 2714 assert isUByte(imm8) : "invalid value"; 2715 assert inRC(XMM, dst); 2716 // XMM6 is for /6 encoding: 66 0F 73 /6 ib 2717 simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false); 2718 emitByte(0x73); 2719 emitModRM(6, dst); 2720 emitByte(imm8); 2721 } 2722 2723 public final void psrad(Register dst, int imm8) { 2724 assert isUByte(imm8) : "invalid value"; 2725 assert inRC(XMM, dst); 2726 // XMM4 is for /4 encoding: 66 0F 72 /4 ib 2727 simdPrefix(AMD64.xmm4, dst, dst, PD, P_0F, false); 2728 emitByte(0x72); 2729 emitModRM(4, dst); 2730 emitByte(imm8); 2731 } 2732 2733 public final void psrld(Register dst, int imm8) { 2734 assert isUByte(imm8) : "invalid value"; 2735 assert inRC(XMM, dst); 2736 // XMM2 is for /2 encoding: 66 0F 72 /2 ib 2737 simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false); 2738 emitByte(0x72); 2739 emitModRM(2, dst); 2740 emitByte(imm8); 2741 } 2742 2743 public final void psrlq(Register dst, int imm8) { 2744 assert isUByte(imm8) : "invalid value"; 2745 assert inRC(XMM, dst); 2746 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 2747 simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false); 2748 emitByte(0x73); 2749 emitModRM(2, dst); 2750 emitByte(imm8); 2751 } 2752 2753 public final void psrldq(Register dst, int imm8) { 2754 assert isUByte(imm8) : "invalid value"; 2755 assert inRC(XMM, dst); 2756 simdPrefix(AMD64.xmm3, dst, dst, PD, P_0F, false); 2757 emitByte(0x73); 2758 emitModRM(3, dst); 2759 emitByte(imm8); 2760 } 2761 2762 public final void pshufb(Register dst, Register src) { 2763 assert supports(CPUFeature.SSSE3); 2764 assert inRC(XMM, dst) && inRC(XMM, src); 2765 simdPrefix(dst, dst, src, PD, P_0F38, false); 2766 emitByte(0x00); 2767 emitModRM(dst, src); 2768 } 2769 2770 public final void pshuflw(Register dst, Register src, int imm8) { 2771 assert supports(CPUFeature.SSE2); 2772 assert isUByte(imm8) : "invalid value"; 2773 assert inRC(XMM, dst) && inRC(XMM, src); 2774 simdPrefix(dst, Register.None, src, SD, P_0F, false); 2775 emitByte(0x70); 2776 emitModRM(dst, src); 2777 emitByte(imm8); 2778 } 2779 2780 public final void pshufd(Register dst, Register src, int imm8) { 2781 assert isUByte(imm8) : "invalid value"; 2782 assert inRC(XMM, dst) && inRC(XMM, src); 2783 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2784 emitByte(0x70); 2785 emitModRM(dst, src); 2786 emitByte(imm8); 2787 } 2788 2789 public final void psubd(Register dst, Register src) { 2790 assert inRC(XMM, dst) && inRC(XMM, src); 2791 simdPrefix(dst, dst, src, PD, P_0F, false); 2792 emitByte(0xFA); 2793 emitModRM(dst, src); 2794 } 2795 2796 public final void punpcklbw(Register dst, Register src) { 2797 assert supports(CPUFeature.SSE2); 2798 assert inRC(XMM, dst) && inRC(XMM, src); 2799 simdPrefix(dst, dst, src, PD, P_0F, false); 2800 emitByte(0x60); 2801 emitModRM(dst, src); 2802 } 2803 2804 public final void rcpps(Register dst, Register src) { 2805 assert inRC(XMM, dst) && inRC(XMM, src); 2806 simdPrefix(dst, Register.None, src, PS, P_0F, false); 2807 emitByte(0x53); 2808 emitModRM(dst, src); 2809 } 2810 2811 public final void ret(int imm16) { 2812 if (imm16 == 0) { 2813 emitByte(0xC3); 2814 } else { 2815 emitByte(0xC2); 2816 emitShort(imm16); 2817 } 2818 } 2819 2820 public final void sarl(Register dst, int imm8) { 2821 prefix(dst); 2822 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2823 if (imm8 == 1) { 2824 emitByte(0xD1); 2825 emitModRM(7, dst); 2826 } else { 2827 emitByte(0xC1); 2828 emitModRM(7, dst); 2829 emitByte(imm8); 2830 } 2831 } 2832 2833 public final void shll(Register dst, int imm8) { 2834 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2835 prefix(dst); 2836 if (imm8 == 1) { 2837 emitByte(0xD1); 2838 emitModRM(4, dst); 2839 } else { 2840 emitByte(0xC1); 2841 emitModRM(4, dst); 2842 emitByte(imm8); 2843 } 2844 } 2845 2846 public final void shll(Register dst) { 2847 // Multiply dst by 2, CL times. 2848 prefix(dst); 2849 emitByte(0xD3); 2850 emitModRM(4, dst); 2851 } 2852 2853 // Insn: SHLX r32a, r/m32, r32b 2854 2855 public final void shlxl(Register dst, Register src1, Register src2) { 2856 VexGeneralPurposeRMVOp.SHLX.emit(this, AVXSize.DWORD, dst, src1, src2); 2857 } 2858 2859 public final void shrl(Register dst, int imm8) { 2860 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2861 prefix(dst); 2862 emitByte(0xC1); 2863 emitModRM(5, dst); 2864 emitByte(imm8); 2865 } 2866 2867 public final void shrl(Register dst) { 2868 // Unsigned divide dst by 2, CL times. 2869 prefix(dst); 2870 emitByte(0xD3); 2871 emitModRM(5, dst); 2872 } 2873 2874 public final void subl(AMD64Address dst, int imm32) { 2875 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2876 } 2877 2878 public final void subl(Register dst, int imm32) { 2879 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2880 } 2881 2882 public final void subl(Register dst, Register src) { 2883 SUB.rmOp.emit(this, DWORD, dst, src); 2884 } 2885 2886 public final void subpd(Register dst, Register src) { 2887 SSEOp.SUB.emit(this, PD, dst, src); 2888 } 2889 2890 public final void subsd(Register dst, Register src) { 2891 SSEOp.SUB.emit(this, SD, dst, src); 2892 } 2893 2894 public final void subsd(Register dst, AMD64Address src) { 2895 SSEOp.SUB.emit(this, SD, dst, src); 2896 } 2897 2898 public final void testl(Register dst, int imm32) { 2899 // not using emitArith because test 2900 // doesn't support sign-extension of 2901 // 8bit operands 2902 if (dst.encoding == 0) { 2903 emitByte(0xA9); 2904 } else { 2905 prefix(dst); 2906 emitByte(0xF7); 2907 emitModRM(0, dst); 2908 } 2909 emitInt(imm32); 2910 } 2911 2912 public final void testl(Register dst, Register src) { 2913 prefix(dst, src); 2914 emitByte(0x85); 2915 emitModRM(dst, src); 2916 } 2917 2918 public final void testl(Register dst, AMD64Address src) { 2919 prefix(src, dst); 2920 emitByte(0x85); 2921 emitOperandHelper(dst, src, 0); 2922 } 2923 2924 public final void unpckhpd(Register dst, Register src) { 2925 assert inRC(XMM, dst) && inRC(XMM, src); 2926 simdPrefix(dst, dst, src, PD, P_0F, false); 2927 emitByte(0x15); 2928 emitModRM(dst, src); 2929 } 2930 2931 public final void unpcklpd(Register dst, Register src) { 2932 assert inRC(XMM, dst) && inRC(XMM, src); 2933 simdPrefix(dst, dst, src, PD, P_0F, false); 2934 emitByte(0x14); 2935 emitModRM(dst, src); 2936 } 2937 2938 public final void xorl(Register dst, Register src) { 2939 XOR.rmOp.emit(this, DWORD, dst, src); 2940 } 2941 2942 public final void xorq(Register dst, Register src) { 2943 XOR.rmOp.emit(this, QWORD, dst, src); 2944 } 2945 2946 public final void xorpd(Register dst, Register src) { 2947 SSEOp.XOR.emit(this, PD, dst, src); 2948 } 2949 2950 public final void xorps(Register dst, Register src) { 2951 SSEOp.XOR.emit(this, PS, dst, src); 2952 } 2953 2954 protected final void decl(Register dst) { 2955 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 2956 prefix(dst); 2957 emitByte(0xFF); 2958 emitModRM(1, dst); 2959 } 2960 2961 protected final void incl(Register dst) { 2962 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 2963 prefix(dst); 2964 emitByte(0xFF); 2965 emitModRM(0, dst); 2966 } 2967 2968 public final void addq(Register dst, int imm32) { 2969 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 2970 } 2971 2972 public final void addq(AMD64Address dst, int imm32) { 2973 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 2974 } 2975 2976 public final void addq(Register dst, Register src) { 2977 ADD.rmOp.emit(this, QWORD, dst, src); 2978 } 2979 2980 public final void addq(AMD64Address dst, Register src) { 2981 ADD.mrOp.emit(this, QWORD, dst, src); 2982 } 2983 2984 public final void andq(Register dst, int imm32) { 2985 AND.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 2986 } 2987 2988 public final void bsrq(Register dst, Register src) { 2989 prefixq(dst, src); 2990 emitByte(0x0F); 2991 emitByte(0xBD); 2992 emitModRM(dst, src); 2993 } 2994 2995 public final void bswapq(Register reg) { 2996 prefixq(reg); 2997 emitByte(0x0F); 2998 emitByte(0xC8 + encode(reg)); 2999 } 3000 3001 public final void cdqq() { 3002 rexw(); 3003 emitByte(0x99); 3004 } 3005 3006 public final void cmovq(ConditionFlag cc, Register dst, Register src) { 3007 prefixq(dst, src); 3008 emitByte(0x0F); 3009 emitByte(0x40 | cc.getValue()); 3010 emitModRM(dst, src); 3011 } 3012 3013 public final void setb(ConditionFlag cc, Register dst) { 3014 prefix(dst, true); 3015 emitByte(0x0F); 3016 emitByte(0x90 | cc.getValue()); 3017 emitModRM(0, dst); 3018 } 3019 3020 public final void cmovq(ConditionFlag cc, Register dst, AMD64Address src) { 3021 prefixq(src, dst); 3022 emitByte(0x0F); 3023 emitByte(0x40 | cc.getValue()); 3024 emitOperandHelper(dst, src, 0); 3025 } 3026 3027 public final void cmpq(Register dst, int imm32) { 3028 CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3029 } 3030 3031 public final void cmpq(Register dst, Register src) { 3032 CMP.rmOp.emit(this, QWORD, dst, src); 3033 } 3034 3035 public final void cmpq(Register dst, AMD64Address src) { 3036 CMP.rmOp.emit(this, QWORD, dst, src); 3037 } 3038 3039 public final void cmpxchgq(Register reg, AMD64Address adr) { 3040 prefixq(adr, reg); 3041 emitByte(0x0F); 3042 emitByte(0xB1); 3043 emitOperandHelper(reg, adr, 0); 3044 } 3045 3046 public final void cvtdq2pd(Register dst, Register src) { 3047 assert inRC(XMM, dst) && inRC(XMM, src); 3048 simdPrefix(dst, Register.None, src, SS, P_0F, false); 3049 emitByte(0xE6); 3050 emitModRM(dst, src); 3051 } 3052 3053 public final void cvtsi2sdq(Register dst, Register src) { 3054 SSEOp.CVTSI2SD.emit(this, QWORD, dst, src); 3055 } 3056 3057 public final void cvttsd2siq(Register dst, Register src) { 3058 SSEOp.CVTTSD2SI.emit(this, QWORD, dst, src); 3059 } 3060 3061 public final void cvttpd2dq(Register dst, Register src) { 3062 assert inRC(XMM, dst) && inRC(XMM, src); 3063 simdPrefix(dst, Register.None, src, PD, P_0F, false); 3064 emitByte(0xE6); 3065 emitModRM(dst, src); 3066 } 3067 3068 public final void decq(Register dst) { 3069 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 3070 prefixq(dst); 3071 emitByte(0xFF); 3072 emitModRM(1, dst); 3073 } 3074 3075 public final void decq(AMD64Address dst) { 3076 DEC.emit(this, QWORD, dst); 3077 } 3078 3079 public final void imulq(Register dst, Register src) { 3080 prefixq(dst, src); 3081 emitByte(0x0F); 3082 emitByte(0xAF); 3083 emitModRM(dst, src); 3084 } 3085 3086 public final void incq(Register dst) { 3087 // Don't use it directly. Use Macroincrementq() instead. 3088 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3089 prefixq(dst); 3090 emitByte(0xFF); 3091 emitModRM(0, dst); 3092 } 3093 3094 public final void incq(AMD64Address dst) { 3095 INC.emit(this, QWORD, dst); 3096 } 3097 3098 public final void movq(Register dst, long imm64) { 3099 movq(dst, imm64, false); 3100 } 3101 3102 public final void movq(Register dst, long imm64, boolean annotateImm) { 3103 int insnPos = position(); 3104 prefixq(dst); 3105 emitByte(0xB8 + encode(dst)); 3106 int immPos = position(); 3107 emitLong(imm64); 3108 int nextInsnPos = position(); 3109 if (annotateImm && codePatchingAnnotationConsumer != null) { 3110 codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 3111 } 3112 } 3113 3114 public final void movslq(Register dst, int imm32) { 3115 prefixq(dst); 3116 emitByte(0xC7); 3117 emitModRM(0, dst); 3118 emitInt(imm32); 3119 } 3120 3121 public final void movdq(Register dst, AMD64Address src) { 3122 AMD64RMOp.MOVQ.emit(this, QWORD, dst, src); 3123 } 3124 3125 public final void movdq(AMD64Address dst, Register src) { 3126 AMD64MROp.MOVQ.emit(this, QWORD, dst, src); 3127 } 3128 3129 public final void movdq(Register dst, Register src) { 3130 if (inRC(XMM, dst) && inRC(CPU, src)) { 3131 AMD64RMOp.MOVQ.emit(this, QWORD, dst, src); 3132 } else if (inRC(XMM, src) && inRC(CPU, dst)) { 3133 AMD64MROp.MOVQ.emit(this, QWORD, dst, src); 3134 } else { 3135 throw new InternalError("should not reach here"); 3136 } 3137 } 3138 3139 public final void movdl(Register dst, Register src) { 3140 if (inRC(XMM, dst) && inRC(CPU, src)) { 3141 AMD64RMOp.MOVD.emit(this, DWORD, dst, src); 3142 } else if (inRC(XMM, src) && inRC(CPU, dst)) { 3143 AMD64MROp.MOVD.emit(this, DWORD, dst, src); 3144 } else { 3145 throw new InternalError("should not reach here"); 3146 } 3147 } 3148 3149 public final void movdl(Register dst, AMD64Address src) { 3150 AMD64RMOp.MOVD.emit(this, DWORD, dst, src); 3151 } 3152 3153 public final void movddup(Register dst, Register src) { 3154 assert supports(CPUFeature.SSE3); 3155 assert inRC(XMM, dst) && inRC(XMM, src); 3156 simdPrefix(dst, Register.None, src, SD, P_0F, false); 3157 emitByte(0x12); 3158 emitModRM(dst, src); 3159 } 3160 3161 public final void movdqu(Register dst, AMD64Address src) { 3162 assert inRC(XMM, dst); 3163 simdPrefix(dst, Register.None, src, SS, P_0F, false); 3164 emitByte(0x6F); 3165 emitOperandHelper(dst, src, 0); 3166 } 3167 3168 public final void movdqu(Register dst, Register src) { 3169 assert inRC(XMM, dst) && inRC(XMM, src); 3170 simdPrefix(dst, Register.None, src, SS, P_0F, false); 3171 emitByte(0x6F); 3172 emitModRM(dst, src); 3173 } 3174 3175 // Insn: VMOVDQU xmm2/m128, xmm1 3176 3177 public final void movdqu(AMD64Address dst, Register src) { 3178 assert inRC(XMM, src); 3179 // Code: VEX.128.F3.0F.WIG 7F /r 3180 simdPrefix(src, Register.None, dst, SS, P_0F, false); 3181 emitByte(0x7F); 3182 emitOperandHelper(src, dst, 0); 3183 } 3184 3185 public final void movslq(AMD64Address dst, int imm32) { 3186 prefixq(dst); 3187 emitByte(0xC7); 3188 emitOperandHelper(0, dst, 4); 3189 emitInt(imm32); 3190 } 3191 3192 public final void movslq(Register dst, AMD64Address src) { 3193 prefixq(src, dst); 3194 emitByte(0x63); 3195 emitOperandHelper(dst, src, 0); 3196 } 3197 3198 public final void movslq(Register dst, Register src) { 3199 prefixq(dst, src); 3200 emitByte(0x63); 3201 emitModRM(dst, src); 3202 } 3203 3204 public final void negq(Register dst) { 3205 prefixq(dst); 3206 emitByte(0xF7); 3207 emitModRM(3, dst); 3208 } 3209 3210 public final void orq(Register dst, Register src) { 3211 OR.rmOp.emit(this, QWORD, dst, src); 3212 } 3213 3214 public final void shlq(Register dst, int imm8) { 3215 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3216 prefixq(dst); 3217 if (imm8 == 1) { 3218 emitByte(0xD1); 3219 emitModRM(4, dst); 3220 } else { 3221 emitByte(0xC1); 3222 emitModRM(4, dst); 3223 emitByte(imm8); 3224 } 3225 } 3226 3227 public final void shlq(Register dst) { 3228 // Multiply dst by 2, CL times. 3229 prefixq(dst); 3230 emitByte(0xD3); 3231 emitModRM(4, dst); 3232 } 3233 3234 public final void shrq(Register dst, int imm8) { 3235 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3236 prefixq(dst); 3237 if (imm8 == 1) { 3238 emitByte(0xD1); 3239 emitModRM(5, dst); 3240 } else { 3241 emitByte(0xC1); 3242 emitModRM(5, dst); 3243 emitByte(imm8); 3244 } 3245 } 3246 3247 public final void shrq(Register dst) { 3248 prefixq(dst); 3249 emitByte(0xD3); 3250 // Unsigned divide dst by 2, CL times. 3251 emitModRM(5, dst); 3252 } 3253 3254 public final void sarq(Register dst, int imm8) { 3255 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3256 prefixq(dst); 3257 if (imm8 == 1) { 3258 emitByte(0xD1); 3259 emitModRM(7, dst); 3260 } else { 3261 emitByte(0xC1); 3262 emitModRM(7, dst); 3263 emitByte(imm8); 3264 } 3265 } 3266 3267 public final void sbbq(Register dst, Register src) { 3268 SBB.rmOp.emit(this, QWORD, dst, src); 3269 } 3270 3271 public final void subq(Register dst, int imm32) { 3272 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3273 } 3274 3275 public final void subq(AMD64Address dst, int imm32) { 3276 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3277 } 3278 3279 public final void subqWide(Register dst, int imm32) { 3280 // don't use the sign-extending version, forcing a 32-bit immediate 3281 SUB.getMIOpcode(QWORD, false).emit(this, QWORD, dst, imm32); 3282 } 3283 3284 public final void subq(Register dst, Register src) { 3285 SUB.rmOp.emit(this, QWORD, dst, src); 3286 } 3287 3288 public final void testq(Register dst, Register src) { 3289 prefixq(dst, src); 3290 emitByte(0x85); 3291 emitModRM(dst, src); 3292 } 3293 3294 public final void btrq(Register src, int imm8) { 3295 prefixq(src); 3296 emitByte(0x0F); 3297 emitByte(0xBA); 3298 emitModRM(6, src); 3299 emitByte(imm8); 3300 } 3301 3302 public final void xaddb(AMD64Address dst, Register src) { 3303 prefixb(dst, src); 3304 emitByte(0x0F); 3305 emitByte(0xC0); 3306 emitOperandHelper(src, dst, 0); 3307 } 3308 3309 public final void xaddw(AMD64Address dst, Register src) { 3310 emitByte(0x66); // Switch to 16-bit mode. 3311 prefix(dst, src); 3312 emitByte(0x0F); 3313 emitByte(0xC1); 3314 emitOperandHelper(src, dst, 0); 3315 } 3316 3317 public final void xaddl(AMD64Address dst, Register src) { 3318 prefix(dst, src); 3319 emitByte(0x0F); 3320 emitByte(0xC1); 3321 emitOperandHelper(src, dst, 0); 3322 } 3323 3324 public final void xaddq(AMD64Address dst, Register src) { 3325 prefixq(dst, src); 3326 emitByte(0x0F); 3327 emitByte(0xC1); 3328 emitOperandHelper(src, dst, 0); 3329 } 3330 3331 public final void xchgb(Register dst, AMD64Address src) { 3332 prefixb(src, dst); 3333 emitByte(0x86); 3334 emitOperandHelper(dst, src, 0); 3335 } 3336 3337 public final void xchgw(Register dst, AMD64Address src) { 3338 emitByte(0x66); 3339 prefix(src, dst); 3340 emitByte(0x87); 3341 emitOperandHelper(dst, src, 0); 3342 } 3343 3344 public final void xchgl(Register dst, AMD64Address src) { 3345 prefix(src, dst); 3346 emitByte(0x87); 3347 emitOperandHelper(dst, src, 0); 3348 } 3349 3350 public final void xchgq(Register dst, AMD64Address src) { 3351 prefixq(src, dst); 3352 emitByte(0x87); 3353 emitOperandHelper(dst, src, 0); 3354 } 3355 3356 public final void membar(int barriers) { 3357 if (target.isMP) { 3358 // We only have to handle StoreLoad 3359 if ((barriers & STORE_LOAD) != 0) { 3360 // All usable chips support "locked" instructions which suffice 3361 // as barriers, and are much faster than the alternative of 3362 // using cpuid instruction. We use here a locked add [rsp],0. 3363 // This is conveniently otherwise a no-op except for blowing 3364 // flags. 3365 // Any change to this code may need to revisit other places in 3366 // the code where this idiom is used, in particular the 3367 // orderAccess code. 3368 lock(); 3369 addl(new AMD64Address(AMD64.rsp, 0), 0); // Assert the lock# signal here 3370 } 3371 } 3372 } 3373 3374 @Override 3375 protected final void patchJumpTarget(int branch, int branchTarget) { 3376 int op = getByte(branch); 3377 assert op == 0xE8 // call 3378 || op == 0x00 // jump table entry 3379 || op == 0xE9 // jmp 3380 || op == 0xEB // short jmp 3381 || (op & 0xF0) == 0x70 // short jcc 3382 || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc 3383 : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op; 3384 3385 if (op == 0x00) { 3386 int offsetToJumpTableBase = getShort(branch + 1); 3387 int jumpTableBase = branch - offsetToJumpTableBase; 3388 int imm32 = branchTarget - jumpTableBase; 3389 emitInt(imm32, branch); 3390 } else if (op == 0xEB || (op & 0xF0) == 0x70) { 3391 3392 // short offset operators (jmp and jcc) 3393 final int imm8 = branchTarget - (branch + 2); 3394 /* 3395 * Since a wrongly patched short branch can potentially lead to working but really bad 3396 * behaving code we should always fail with an exception instead of having an assert. 3397 */ 3398 GraalError.guarantee(isByte(imm8), "Displacement too large to be encoded as a byte: %d", imm8); 3399 emitByte(imm8, branch + 1); 3400 3401 } else { 3402 3403 int off = 1; 3404 if (op == 0x0F) { 3405 off = 2; 3406 } 3407 3408 int imm32 = branchTarget - (branch + 4 + off); 3409 emitInt(imm32, branch + off); 3410 } 3411 } 3412 3413 public void nullCheck(AMD64Address address) { 3414 testl(AMD64.rax, address); 3415 } 3416 3417 @Override 3418 public void align(int modulus) { 3419 if (position() % modulus != 0) { 3420 nop(modulus - (position() % modulus)); 3421 } 3422 } 3423 3424 /** 3425 * Emits a direct call instruction. Note that the actual call target is not specified, because 3426 * all calls need patching anyway. Therefore, 0 is emitted as the call target, and the user is 3427 * responsible to add the call address to the appropriate patching tables. 3428 */ 3429 public final void call() { 3430 annotatePatchingImmediate(1, 4); 3431 emitByte(0xE8); 3432 emitInt(0); 3433 } 3434 3435 public final void call(Register src) { 3436 prefix(src); 3437 emitByte(0xFF); 3438 emitModRM(2, src); 3439 } 3440 3441 public final void int3() { 3442 emitByte(0xCC); 3443 } 3444 3445 public final void pause() { 3446 emitByte(0xF3); 3447 emitByte(0x90); 3448 } 3449 3450 private void emitx87(int b1, int b2, int i) { 3451 assert 0 <= i && i < 8 : "illegal stack offset"; 3452 emitByte(b1); 3453 emitByte(b2 + i); 3454 } 3455 3456 public final void fldd(AMD64Address src) { 3457 emitByte(0xDD); 3458 emitOperandHelper(0, src, 0); 3459 } 3460 3461 public final void flds(AMD64Address src) { 3462 emitByte(0xD9); 3463 emitOperandHelper(0, src, 0); 3464 } 3465 3466 public final void fldln2() { 3467 emitByte(0xD9); 3468 emitByte(0xED); 3469 } 3470 3471 public final void fldlg2() { 3472 emitByte(0xD9); 3473 emitByte(0xEC); 3474 } 3475 3476 public final void fyl2x() { 3477 emitByte(0xD9); 3478 emitByte(0xF1); 3479 } 3480 3481 public final void fstps(AMD64Address src) { 3482 emitByte(0xD9); 3483 emitOperandHelper(3, src, 0); 3484 } 3485 3486 public final void fstpd(AMD64Address src) { 3487 emitByte(0xDD); 3488 emitOperandHelper(3, src, 0); 3489 } 3490 3491 private void emitFPUArith(int b1, int b2, int i) { 3492 assert 0 <= i && i < 8 : "illegal FPU register: " + i; 3493 emitByte(b1); 3494 emitByte(b2 + i); 3495 } 3496 3497 public void ffree(int i) { 3498 emitFPUArith(0xDD, 0xC0, i); 3499 } 3500 3501 public void fincstp() { 3502 emitByte(0xD9); 3503 emitByte(0xF7); 3504 } 3505 3506 public void fxch(int i) { 3507 emitFPUArith(0xD9, 0xC8, i); 3508 } 3509 3510 public void fnstswAX() { 3511 emitByte(0xDF); 3512 emitByte(0xE0); 3513 } 3514 3515 public void fwait() { 3516 emitByte(0x9B); 3517 } 3518 3519 public void fprem() { 3520 emitByte(0xD9); 3521 emitByte(0xF8); 3522 } 3523 3524 public final void fsin() { 3525 emitByte(0xD9); 3526 emitByte(0xFE); 3527 } 3528 3529 public final void fcos() { 3530 emitByte(0xD9); 3531 emitByte(0xFF); 3532 } 3533 3534 public final void fptan() { 3535 emitByte(0xD9); 3536 emitByte(0xF2); 3537 } 3538 3539 public final void fstp(int i) { 3540 emitx87(0xDD, 0xD8, i); 3541 } 3542 3543 @Override 3544 public AMD64Address makeAddress(Register base, int displacement) { 3545 return new AMD64Address(base, displacement); 3546 } 3547 3548 @Override 3549 public AMD64Address getPlaceholder(int instructionStartPosition) { 3550 return new AMD64Address(AMD64.rip, Register.None, Scale.Times1, 0, instructionStartPosition); 3551 } 3552 3553 private void prefetchPrefix(AMD64Address src) { 3554 prefix(src); 3555 emitByte(0x0F); 3556 } 3557 3558 public void prefetchnta(AMD64Address src) { 3559 prefetchPrefix(src); 3560 emitByte(0x18); 3561 emitOperandHelper(0, src, 0); 3562 } 3563 3564 void prefetchr(AMD64Address src) { 3565 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 3566 prefetchPrefix(src); 3567 emitByte(0x0D); 3568 emitOperandHelper(0, src, 0); 3569 } 3570 3571 public void prefetcht0(AMD64Address src) { 3572 assert supports(CPUFeature.SSE); 3573 prefetchPrefix(src); 3574 emitByte(0x18); 3575 emitOperandHelper(1, src, 0); 3576 } 3577 3578 public void prefetcht1(AMD64Address src) { 3579 assert supports(CPUFeature.SSE); 3580 prefetchPrefix(src); 3581 emitByte(0x18); 3582 emitOperandHelper(2, src, 0); 3583 } 3584 3585 public void prefetcht2(AMD64Address src) { 3586 assert supports(CPUFeature.SSE); 3587 prefix(src); 3588 emitByte(0x0f); 3589 emitByte(0x18); 3590 emitOperandHelper(3, src, 0); 3591 } 3592 3593 public void prefetchw(AMD64Address src) { 3594 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 3595 prefix(src); 3596 emitByte(0x0f); 3597 emitByte(0x0D); 3598 emitOperandHelper(1, src, 0); 3599 } 3600 3601 public void rdtsc() { 3602 emitByte(0x0F); 3603 emitByte(0x31); 3604 } 3605 3606 /** 3607 * Emits an instruction which is considered to be illegal. This is used if we deliberately want 3608 * to crash the program (debugging etc.). 3609 */ 3610 public void illegal() { 3611 emitByte(0x0f); 3612 emitByte(0x0b); 3613 } 3614 3615 public void lfence() { 3616 emitByte(0x0f); 3617 emitByte(0xae); 3618 emitByte(0xe8); 3619 } 3620 3621 public final void vptest(Register dst, Register src) { 3622 VexRMOp.VPTEST.emit(this, AVXSize.YMM, dst, src); 3623 } 3624 3625 public final void vpxor(Register dst, Register nds, Register src) { 3626 VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src); 3627 } 3628 3629 public final void vpxor(Register dst, Register nds, AMD64Address src) { 3630 VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src); 3631 } 3632 3633 public final void vmovdqu(Register dst, AMD64Address src) { 3634 VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src); 3635 } 3636 3637 public final void vmovdqu(AMD64Address dst, Register src) { 3638 assert inRC(XMM, src); 3639 VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src); 3640 } 3641 3642 public final void vpmovzxbw(Register dst, AMD64Address src) { 3643 assert supports(CPUFeature.AVX2); 3644 VexRMOp.VPMOVZXBW.emit(this, AVXSize.YMM, dst, src); 3645 } 3646 3647 public final void vzeroupper() { 3648 emitVEX(L128, P_, M_0F, W0, 0, 0, true); 3649 emitByte(0x77); 3650 } 3651 3652 // Insn: KORTESTD k1, k2 3653 3654 // This instruction produces ZF or CF flags 3655 public final void kortestd(Register src1, Register src2) { 3656 assert supports(CPUFeature.AVX512BW); 3657 assert inRC(MASK, src1) && inRC(MASK, src2); 3658 // Code: VEX.L0.66.0F.W1 98 /r 3659 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, true); 3660 emitByte(0x98); 3661 emitModRM(src1, src2); 3662 } 3663 3664 // Insn: KORTESTQ k1, k2 3665 3666 // This instruction produces ZF or CF flags 3667 public final void kortestq(Register src1, Register src2) { 3668 assert supports(CPUFeature.AVX512BW); 3669 assert inRC(MASK, src1) && inRC(MASK, src2); 3670 // Code: VEX.L0.0F.W1 98 /r 3671 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_, M_0F, W1, true); 3672 emitByte(0x98); 3673 emitModRM(src1, src2); 3674 } 3675 3676 public final void kmovd(Register dst, Register src) { 3677 assert supports(CPUFeature.AVX512BW); 3678 assert inRC(MASK, dst) || inRC(CPU, dst); 3679 assert inRC(MASK, src) || inRC(CPU, src); 3680 assert !(inRC(CPU, dst) && inRC(CPU, src)); 3681 3682 if (inRC(MASK, dst)) { 3683 if (inRC(MASK, src)) { 3684 // kmovd(KRegister dst, KRegister src): 3685 // Insn: KMOVD k1, k2/m32 3686 // Code: VEX.L0.66.0F.W1 90 /r 3687 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_66, M_0F, W1, true); 3688 emitByte(0x90); 3689 emitModRM(dst, src); 3690 } else { 3691 // kmovd(KRegister dst, Register src) 3692 // Insn: KMOVD k1, r32 3693 // Code: VEX.L0.F2.0F.W0 92 /r 3694 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, true); 3695 emitByte(0x92); 3696 emitModRM(dst, src); 3697 } 3698 } else { 3699 if (inRC(MASK, src)) { 3700 // kmovd(Register dst, KRegister src) 3701 // Insn: KMOVD r32, k1 3702 // Code: VEX.L0.F2.0F.W0 93 /r 3703 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, true); 3704 emitByte(0x93); 3705 emitModRM(dst, src); 3706 } else { 3707 throw GraalError.shouldNotReachHere(); 3708 } 3709 } 3710 } 3711 3712 public final void kmovq(Register dst, Register src) { 3713 assert supports(CPUFeature.AVX512BW); 3714 assert inRC(MASK, dst) || inRC(CPU, dst); 3715 assert inRC(MASK, src) || inRC(CPU, src); 3716 assert !(inRC(CPU, dst) && inRC(CPU, src)); 3717 3718 if (inRC(MASK, dst)) { 3719 if (inRC(MASK, src)) { 3720 // kmovq(KRegister dst, KRegister src): 3721 // Insn: KMOVQ k1, k2/m64 3722 // Code: VEX.L0.0F.W1 90 /r 3723 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_, M_0F, W1, true); 3724 emitByte(0x90); 3725 emitModRM(dst, src); 3726 } else { 3727 // kmovq(KRegister dst, Register src) 3728 // Insn: KMOVQ k1, r64 3729 // Code: VEX.L0.F2.0F.W1 92 /r 3730 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, true); 3731 emitByte(0x92); 3732 emitModRM(dst, src); 3733 } 3734 } else { 3735 if (inRC(MASK, src)) { 3736 // kmovq(Register dst, KRegister src) 3737 // Insn: KMOVQ r64, k1 3738 // Code: VEX.L0.F2.0F.W1 93 /r 3739 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, true); 3740 emitByte(0x93); 3741 emitModRM(dst, src); 3742 } else { 3743 throw GraalError.shouldNotReachHere(); 3744 } 3745 } 3746 } 3747 3748 // Insn: KTESTD k1, k2 3749 3750 public final void ktestd(Register src1, Register src2) { 3751 assert supports(CPUFeature.AVX512BW); 3752 assert inRC(MASK, src1) && inRC(MASK, src2); 3753 // Code: VEX.L0.66.0F.W1 99 /r 3754 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, true); 3755 emitByte(0x99); 3756 emitModRM(src1, src2); 3757 } 3758 3759 public final void evmovdqu64(Register dst, AMD64Address src) { 3760 assert supports(CPUFeature.AVX512F); 3761 assert inRC(XMM, dst); 3762 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F3, M_0F, W1, Z0, B0); 3763 emitByte(0x6F); 3764 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3765 } 3766 3767 // Insn: VPMOVZXBW zmm1, m256 3768 3769 public final void evpmovzxbw(Register dst, AMD64Address src) { 3770 assert supports(CPUFeature.AVX512BW); 3771 assert inRC(XMM, dst); 3772 // Code: EVEX.512.66.0F38.WIG 30 /r 3773 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0); 3774 emitByte(0x30); 3775 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3776 } 3777 3778 public final void evpcmpeqb(Register kdst, Register nds, AMD64Address src) { 3779 assert supports(CPUFeature.AVX512BW); 3780 assert inRC(MASK, kdst) && inRC(XMM, nds); 3781 evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F, WIG, Z0, B0); 3782 emitByte(0x74); 3783 emitEVEXOperandHelper(kdst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3784 } 3785 3786 // Insn: VMOVDQU16 zmm1 {k1}{z}, zmm2/m512 3787 // ----- 3788 // Insn: VMOVDQU16 zmm1, m512 3789 3790 public final void evmovdqu16(Register dst, AMD64Address src) { 3791 assert supports(CPUFeature.AVX512BW); 3792 assert inRC(XMM, dst); 3793 // Code: EVEX.512.F2.0F.W1 6F /r 3794 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0); 3795 emitByte(0x6F); 3796 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3797 } 3798 3799 // Insn: VMOVDQU16 zmm1, k1:z, m512 3800 3801 public final void evmovdqu16(Register dst, Register mask, AMD64Address src) { 3802 assert supports(CPUFeature.AVX512BW); 3803 assert inRC(XMM, dst) && inRC(MASK, mask); 3804 // Code: EVEX.512.F2.0F.W1 6F /r 3805 evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z1, B0); 3806 emitByte(0x6F); 3807 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3808 } 3809 3810 // Insn: VMOVDQU16 zmm2/m512 {k1}{z}, zmm1 3811 // ----- 3812 // Insn: VMOVDQU16 m512, zmm1 3813 3814 public final void evmovdqu16(AMD64Address dst, Register src) { 3815 assert supports(CPUFeature.AVX512BW); 3816 assert inRC(XMM, src); 3817 // Code: EVEX.512.F2.0F.W1 7F /r 3818 evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0); 3819 emitByte(0x7F); 3820 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3821 } 3822 3823 // Insn: VMOVDQU16 m512, k1, zmm1 3824 3825 public final void evmovdqu16(AMD64Address dst, Register mask, Register src) { 3826 assert supports(CPUFeature.AVX512BW); 3827 assert inRC(MASK, mask) && inRC(XMM, src); 3828 // Code: EVEX.512.F2.0F.W1 7F /r 3829 evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0); 3830 emitByte(0x7F); 3831 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3832 } 3833 3834 // Insn: VPBROADCASTW zmm1 {k1}{z}, reg 3835 // ----- 3836 // Insn: VPBROADCASTW zmm1, reg 3837 3838 public final void evpbroadcastw(Register dst, Register src) { 3839 assert supports(CPUFeature.AVX512BW); 3840 assert inRC(XMM, dst) && inRC(CPU, src); 3841 // Code: EVEX.512.66.0F38.W0 7B /r 3842 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, W0, Z0, B0); 3843 emitByte(0x7B); 3844 emitModRM(dst, src); 3845 } 3846 3847 // Insn: VPCMPUW k1 {k2}, zmm2, zmm3/m512, imm8 3848 // ----- 3849 // Insn: VPCMPUW k1, zmm2, zmm3, imm8 3850 3851 public final void evpcmpuw(Register kdst, Register nds, Register src, int vcc) { 3852 assert supports(CPUFeature.AVX512BW); 3853 assert inRC(MASK, kdst) && inRC(XMM, nds) && inRC(XMM, src); 3854 // Code: EVEX.NDS.512.66.0F3A.W1 3E /r ib 3855 evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F3A, W1, Z0, B0); 3856 emitByte(0x3E); 3857 emitModRM(kdst, src); 3858 emitByte(vcc); 3859 } 3860 3861 // Insn: VPCMPUW k1 {k2}, zmm2, zmm3/m512, imm8 3862 // ----- 3863 // Insn: VPCMPUW k1, k2, zmm2, zmm3, imm8 3864 3865 public final void evpcmpuw(Register kdst, Register mask, Register nds, Register src, int vcc) { 3866 assert supports(CPUFeature.AVX512BW); 3867 assert inRC(MASK, kdst) && inRC(MASK, mask); 3868 assert inRC(XMM, nds) && inRC(XMM, src); 3869 // Code: EVEX.NDS.512.66.0F3A.W1 3E /r ib 3870 evexPrefix(kdst, mask, nds, src, AVXSize.ZMM, P_66, M_0F3A, W1, Z0, B0); 3871 emitByte(0x3E); 3872 emitModRM(kdst, src); 3873 emitByte(vcc); 3874 } 3875 3876 // Insn: VPMOVWB ymm1/m256 {k1}{z}, zmm2 3877 // ----- 3878 // Insn: VPMOVWB m256, zmm2 3879 3880 public final void evpmovwb(AMD64Address dst, Register src) { 3881 assert supports(CPUFeature.AVX512BW); 3882 assert inRC(XMM, src); 3883 // Code: EVEX.512.F3.0F38.W0 30 /r 3884 evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0); 3885 emitByte(0x30); 3886 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3887 } 3888 3889 // Insn: VPMOVWB m256, k1, zmm2 3890 3891 public final void evpmovwb(AMD64Address dst, Register mask, Register src) { 3892 assert supports(CPUFeature.AVX512BW); 3893 assert inRC(MASK, mask) && inRC(XMM, src); 3894 // Code: EVEX.512.F3.0F38.W0 30 /r 3895 evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0); 3896 emitByte(0x30); 3897 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3898 } 3899 3900 // Insn: VPMOVZXBW zmm1 {k1}{z}, ymm2/m256 3901 // ----- 3902 // Insn: VPMOVZXBW zmm1, k1, m256 3903 3904 public final void evpmovzxbw(Register dst, Register mask, AMD64Address src) { 3905 assert supports(CPUFeature.AVX512BW); 3906 assert inRC(MASK, mask) && inRC(XMM, dst); 3907 // Code: EVEX.512.66.0F38.WIG 30 /r 3908 evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0); 3909 emitByte(0x30); 3910 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3911 } 3912 3913 }