1 /* 2 * Copyright (c) 2009, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 25 package org.graalvm.compiler.asm.amd64; 26 27 import static jdk.vm.ci.amd64.AMD64.CPU; 28 import static jdk.vm.ci.amd64.AMD64.MASK; 29 import static jdk.vm.ci.amd64.AMD64.XMM; 30 import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD; 31 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop; 32 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop; 33 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD; 34 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND; 35 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP; 36 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR; 37 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB; 38 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB; 39 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR; 40 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC; 41 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC; 42 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG; 43 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT; 44 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0; 45 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0; 46 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1; 47 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.BYTE; 48 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.DWORD; 49 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PD; 50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PS; 51 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.QWORD; 52 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SD; 53 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SS; 54 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.WORD; 55 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128; 56 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256; 57 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ; 58 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F; 59 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38; 60 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A; 61 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_; 62 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66; 63 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2; 64 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3; 65 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0; 66 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1; 67 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG; 68 import static org.graalvm.compiler.core.common.NumUtil.isByte; 69 import static org.graalvm.compiler.core.common.NumUtil.isInt; 70 import static org.graalvm.compiler.core.common.NumUtil.isShiftCount; 71 import static org.graalvm.compiler.core.common.NumUtil.isUByte; 72 73 import java.util.EnumSet; 74 75 import org.graalvm.compiler.asm.Label; 76 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; 77 import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize; 78 import org.graalvm.compiler.core.common.NumUtil; 79 import org.graalvm.compiler.core.common.calc.Condition; 80 import org.graalvm.compiler.debug.GraalError; 81 82 import jdk.vm.ci.amd64.AMD64; 83 import jdk.vm.ci.amd64.AMD64.CPUFeature; 84 import jdk.vm.ci.code.Register; 85 import jdk.vm.ci.code.Register.RegisterCategory; 86 import jdk.vm.ci.code.TargetDescription; 87 88 /** 89 * This class implements an assembler that can encode most X86 instructions. 90 */ 91 public class AMD64Assembler extends AMD64BaseAssembler { 92 93 /** 94 * Constructs an assembler for the AMD64 architecture. 95 */ 96 public AMD64Assembler(TargetDescription target) { 97 super(target); 98 } 99 100 /** 101 * The x86 condition codes used for conditional jumps/moves. 102 */ 103 public enum ConditionFlag { 104 Zero(0x4, "|zero|"), 105 NotZero(0x5, "|nzero|"), 106 Equal(0x4, "="), 107 NotEqual(0x5, "!="), 108 Less(0xc, "<"), 109 LessEqual(0xe, "<="), 110 Greater(0xf, ">"), 111 GreaterEqual(0xd, ">="), 112 Below(0x2, "|<|"), 113 BelowEqual(0x6, "|<=|"), 114 Above(0x7, "|>|"), 115 AboveEqual(0x3, "|>=|"), 116 Overflow(0x0, "|of|"), 117 NoOverflow(0x1, "|nof|"), 118 CarrySet(0x2, "|carry|"), 119 CarryClear(0x3, "|ncarry|"), 120 Negative(0x8, "|neg|"), 121 Positive(0x9, "|pos|"), 122 Parity(0xa, "|par|"), 123 NoParity(0xb, "|npar|"); 124 125 private final int value; 126 private final String operator; 127 128 ConditionFlag(int value, String operator) { 129 this.value = value; 130 this.operator = operator; 131 } 132 133 public ConditionFlag negate() { 134 switch (this) { 135 case Zero: 136 return NotZero; 137 case NotZero: 138 return Zero; 139 case Equal: 140 return NotEqual; 141 case NotEqual: 142 return Equal; 143 case Less: 144 return GreaterEqual; 145 case LessEqual: 146 return Greater; 147 case Greater: 148 return LessEqual; 149 case GreaterEqual: 150 return Less; 151 case Below: 152 return AboveEqual; 153 case BelowEqual: 154 return Above; 155 case Above: 156 return BelowEqual; 157 case AboveEqual: 158 return Below; 159 case Overflow: 160 return NoOverflow; 161 case NoOverflow: 162 return Overflow; 163 case CarrySet: 164 return CarryClear; 165 case CarryClear: 166 return CarrySet; 167 case Negative: 168 return Positive; 169 case Positive: 170 return Negative; 171 case Parity: 172 return NoParity; 173 case NoParity: 174 return Parity; 175 } 176 throw new IllegalArgumentException(); 177 } 178 179 public int getValue() { 180 return value; 181 } 182 183 @Override 184 public String toString() { 185 return operator; 186 } 187 } 188 189 /** 190 * Operand size and register type constraints. 191 */ 192 private enum OpAssertion { 193 ByteAssertion(CPU, CPU, BYTE), 194 ByteOrLargerAssertion(CPU, CPU, BYTE, WORD, DWORD, QWORD), 195 WordOrLargerAssertion(CPU, CPU, WORD, DWORD, QWORD), 196 DwordOrLargerAssertion(CPU, CPU, DWORD, QWORD), 197 WordOrDwordAssertion(CPU, CPU, WORD, QWORD), 198 QwordAssertion(CPU, CPU, QWORD), 199 FloatAssertion(XMM, XMM, SS, SD, PS, PD), 200 PackedFloatAssertion(XMM, XMM, PS, PD), 201 SingleAssertion(XMM, XMM, SS), 202 DoubleAssertion(XMM, XMM, SD), 203 PackedDoubleAssertion(XMM, XMM, PD), 204 IntToFloatAssertion(XMM, CPU, DWORD, QWORD), 205 FloatToIntAssertion(CPU, XMM, DWORD, QWORD); 206 207 private final RegisterCategory resultCategory; 208 private final RegisterCategory inputCategory; 209 private final OperandSize[] allowedSizes; 210 211 OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes) { 212 this.resultCategory = resultCategory; 213 this.inputCategory = inputCategory; 214 this.allowedSizes = allowedSizes; 215 } 216 217 protected boolean checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg) { 218 assert resultReg == null || resultCategory.equals(resultReg.getRegisterCategory()) : "invalid result register " + resultReg + " used in " + op; 219 assert inputReg == null || inputCategory.equals(inputReg.getRegisterCategory()) : "invalid input register " + inputReg + " used in " + op; 220 221 for (OperandSize s : allowedSizes) { 222 if (size == s) { 223 return true; 224 } 225 } 226 227 assert false : "invalid operand size " + size + " used in " + op; 228 return false; 229 } 230 231 } 232 233 protected static final int P_0F = 0x0F; 234 protected static final int P_0F38 = 0x380F; 235 protected static final int P_0F3A = 0x3A0F; 236 237 /** 238 * Base class for AMD64 opcodes. 239 */ 240 public static class AMD64Op { 241 242 private final String opcode; 243 244 protected final int prefix1; 245 protected final int prefix2; 246 protected final int op; 247 248 private final boolean dstIsByte; 249 private final boolean srcIsByte; 250 251 private final OpAssertion assertion; 252 private final CPUFeature feature; 253 254 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 255 this(opcode, prefix1, prefix2, op, assertion == OpAssertion.ByteAssertion, assertion == OpAssertion.ByteAssertion, assertion, feature); 256 } 257 258 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 259 this.opcode = opcode; 260 this.prefix1 = prefix1; 261 this.prefix2 = prefix2; 262 this.op = op; 263 264 this.dstIsByte = dstIsByte; 265 this.srcIsByte = srcIsByte; 266 267 this.assertion = assertion; 268 this.feature = feature; 269 } 270 271 protected final void emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc) { 272 if (prefix1 != 0) { 273 asm.emitByte(prefix1); 274 } 275 if (size.getSizePrefix() != 0) { 276 asm.emitByte(size.getSizePrefix()); 277 } 278 int rexPrefix = 0x40 | rxb; 279 if (size == QWORD) { 280 rexPrefix |= 0x08; 281 } 282 if (rexPrefix != 0x40 || (dstIsByte && dstEnc >= 4) || (srcIsByte && srcEnc >= 4)) { 283 asm.emitByte(rexPrefix); 284 } 285 if (prefix2 > 0xFF) { 286 asm.emitShort(prefix2); 287 } else if (prefix2 > 0) { 288 asm.emitByte(prefix2); 289 } 290 asm.emitByte(op); 291 } 292 293 protected final boolean verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg) { 294 assert feature == null || asm.supports(feature) : String.format("unsupported feature %s required for %s", feature, opcode); 295 assert assertion.checkOperands(this, size, resultReg, inputReg); 296 return true; 297 } 298 299 public OperandSize[] getAllowedSizes() { 300 return assertion.allowedSizes; 301 } 302 303 protected final boolean isSSEInstruction() { 304 if (feature == null) { 305 return false; 306 } 307 switch (feature) { 308 case SSE: 309 case SSE2: 310 case SSE3: 311 case SSSE3: 312 case SSE4A: 313 case SSE4_1: 314 case SSE4_2: 315 return true; 316 default: 317 return false; 318 } 319 } 320 321 public final OpAssertion getAssertion() { 322 return assertion; 323 } 324 325 @Override 326 public String toString() { 327 return opcode; 328 } 329 } 330 331 /** 332 * Base class for AMD64 opcodes with immediate operands. 333 */ 334 public static class AMD64ImmOp extends AMD64Op { 335 336 private final boolean immIsByte; 337 338 protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) { 339 this(opcode, immIsByte, prefix, op, assertion, null); 340 } 341 342 protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 343 super(opcode, 0, prefix, op, assertion, feature); 344 this.immIsByte = immIsByte; 345 } 346 347 protected final void emitImmediate(AMD64Assembler asm, OperandSize size, int imm) { 348 if (immIsByte) { 349 assert imm == (byte) imm; 350 asm.emitByte(imm); 351 } else { 352 size.emitImmediate(asm, imm); 353 } 354 } 355 356 protected final int immediateSize(OperandSize size) { 357 if (immIsByte) { 358 return 1; 359 } else { 360 return size.getBytes(); 361 } 362 } 363 } 364 365 /** 366 * Opcode with operand order of either RM or MR for 2 address forms. 367 */ 368 public abstract static class AMD64RROp extends AMD64Op { 369 370 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 371 super(opcode, prefix1, prefix2, op, assertion, feature); 372 } 373 374 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 375 super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature); 376 } 377 378 public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src); 379 } 380 381 /** 382 * Opcode with operand order of RM. 383 */ 384 public static class AMD64RMOp extends AMD64RROp { 385 // @formatter:off 386 public static final AMD64RMOp IMUL = new AMD64RMOp("IMUL", P_0F, 0xAF, OpAssertion.ByteOrLargerAssertion); 387 public static final AMD64RMOp BSF = new AMD64RMOp("BSF", P_0F, 0xBC); 388 public static final AMD64RMOp BSR = new AMD64RMOp("BSR", P_0F, 0xBD); 389 // POPCNT, TZCNT, and LZCNT support word operation. However, the legacy size prefix should 390 // be emitted before the mandatory prefix 0xF3. Since we are not emitting bit count for 391 // 16-bit operands, here we simply use DwordOrLargerAssertion. 392 public static final AMD64RMOp POPCNT = new AMD64RMOp("POPCNT", 0xF3, P_0F, 0xB8, OpAssertion.DwordOrLargerAssertion, CPUFeature.POPCNT); 393 public static final AMD64RMOp TZCNT = new AMD64RMOp("TZCNT", 0xF3, P_0F, 0xBC, OpAssertion.DwordOrLargerAssertion, CPUFeature.BMI1); 394 public static final AMD64RMOp LZCNT = new AMD64RMOp("LZCNT", 0xF3, P_0F, 0xBD, OpAssertion.DwordOrLargerAssertion, CPUFeature.LZCNT); 395 public static final AMD64RMOp MOVZXB = new AMD64RMOp("MOVZXB", P_0F, 0xB6, false, true, OpAssertion.WordOrLargerAssertion); 396 public static final AMD64RMOp MOVZX = new AMD64RMOp("MOVZX", P_0F, 0xB7, OpAssertion.DwordOrLargerAssertion); 397 public static final AMD64RMOp MOVSXB = new AMD64RMOp("MOVSXB", P_0F, 0xBE, false, true, OpAssertion.WordOrLargerAssertion); 398 public static final AMD64RMOp MOVSX = new AMD64RMOp("MOVSX", P_0F, 0xBF, OpAssertion.DwordOrLargerAssertion); 399 public static final AMD64RMOp MOVSXD = new AMD64RMOp("MOVSXD", 0x63, OpAssertion.QwordAssertion); 400 public static final AMD64RMOp MOVB = new AMD64RMOp("MOVB", 0x8A, OpAssertion.ByteAssertion); 401 public static final AMD64RMOp MOV = new AMD64RMOp("MOV", 0x8B); 402 public static final AMD64RMOp CMP = new AMD64RMOp("CMP", 0x3B); 403 404 // MOVD/MOVQ and MOVSS/MOVSD are the same opcode, just with different operand size prefix 405 public static final AMD64RMOp MOVD = new AMD64RMOp("MOVD", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 406 public static final AMD64RMOp MOVQ = new AMD64RMOp("MOVQ", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 407 public static final AMD64RMOp MOVSS = new AMD64RMOp("MOVSS", P_0F, 0x10, OpAssertion.FloatAssertion, CPUFeature.SSE); 408 public static final AMD64RMOp MOVSD = new AMD64RMOp("MOVSD", P_0F, 0x10, OpAssertion.FloatAssertion, CPUFeature.SSE); 409 410 // TEST is documented as MR operation, but it's symmetric, and using it as RM operation is more convenient. 411 public static final AMD64RMOp TESTB = new AMD64RMOp("TEST", 0x84, OpAssertion.ByteAssertion); 412 public static final AMD64RMOp TEST = new AMD64RMOp("TEST", 0x85); 413 // @formatter:on 414 415 protected AMD64RMOp(String opcode, int op) { 416 this(opcode, 0, op); 417 } 418 419 protected AMD64RMOp(String opcode, int op, OpAssertion assertion) { 420 this(opcode, 0, op, assertion); 421 } 422 423 protected AMD64RMOp(String opcode, int prefix, int op) { 424 this(opcode, 0, prefix, op, null); 425 } 426 427 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion) { 428 this(opcode, 0, prefix, op, assertion, null); 429 } 430 431 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 432 this(opcode, 0, prefix, op, assertion, feature); 433 } 434 435 protected AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) { 436 super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null); 437 } 438 439 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) { 440 this(opcode, prefix1, prefix2, op, OpAssertion.WordOrLargerAssertion, feature); 441 } 442 443 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 444 super(opcode, prefix1, prefix2, op, assertion, feature); 445 } 446 447 @Override 448 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) { 449 assert verify(asm, size, dst, src); 450 if (isSSEInstruction()) { 451 Register nds = Register.None; 452 switch (op) { 453 case 0x10: 454 case 0x51: 455 if ((size == SS) || (size == SD)) { 456 nds = dst; 457 } 458 break; 459 case 0x2A: 460 case 0x54: 461 case 0x55: 462 case 0x56: 463 case 0x57: 464 case 0x58: 465 case 0x59: 466 case 0x5A: 467 case 0x5C: 468 case 0x5D: 469 case 0x5E: 470 case 0x5F: 471 nds = dst; 472 break; 473 default: 474 break; 475 } 476 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, size == QWORD); 477 asm.emitByte(op); 478 asm.emitModRM(dst, src); 479 } else { 480 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding); 481 asm.emitModRM(dst, src); 482 } 483 } 484 485 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src) { 486 assert verify(asm, size, dst, null); 487 if (isSSEInstruction()) { 488 Register nds = Register.None; 489 switch (op) { 490 case 0x51: 491 if ((size == SS) || (size == SD)) { 492 nds = dst; 493 } 494 break; 495 case 0x2A: 496 case 0x54: 497 case 0x55: 498 case 0x56: 499 case 0x57: 500 case 0x58: 501 case 0x59: 502 case 0x5A: 503 case 0x5C: 504 case 0x5D: 505 case 0x5E: 506 case 0x5F: 507 nds = dst; 508 break; 509 default: 510 break; 511 } 512 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, size == QWORD); 513 asm.emitByte(op); 514 asm.emitOperandHelper(dst, src, 0); 515 } else { 516 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0); 517 asm.emitOperandHelper(dst, src, 0); 518 } 519 } 520 } 521 522 /** 523 * Opcode with operand order of MR. 524 */ 525 public static class AMD64MROp extends AMD64RROp { 526 // @formatter:off 527 public static final AMD64MROp MOVB = new AMD64MROp("MOVB", 0x88, OpAssertion.ByteAssertion); 528 public static final AMD64MROp MOV = new AMD64MROp("MOV", 0x89); 529 530 // MOVD and MOVQ are the same opcode, just with different operand size prefix 531 // Note that as MR opcodes, they have reverse operand order, so the IntToFloatingAssertion must be used. 532 public static final AMD64MROp MOVD = new AMD64MROp("MOVD", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 533 public static final AMD64MROp MOVQ = new AMD64MROp("MOVQ", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 534 535 // MOVSS and MOVSD are the same opcode, just with different operand size prefix 536 public static final AMD64MROp MOVSS = new AMD64MROp("MOVSS", P_0F, 0x11, OpAssertion.FloatAssertion, CPUFeature.SSE); 537 public static final AMD64MROp MOVSD = new AMD64MROp("MOVSD", P_0F, 0x11, OpAssertion.FloatAssertion, CPUFeature.SSE); 538 // @formatter:on 539 540 protected AMD64MROp(String opcode, int op) { 541 this(opcode, 0, op); 542 } 543 544 protected AMD64MROp(String opcode, int op, OpAssertion assertion) { 545 this(opcode, 0, op, assertion); 546 } 547 548 protected AMD64MROp(String opcode, int prefix, int op) { 549 this(opcode, prefix, op, OpAssertion.WordOrLargerAssertion); 550 } 551 552 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion) { 553 this(opcode, prefix, op, assertion, null); 554 } 555 556 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 557 this(opcode, 0, prefix, op, assertion, feature); 558 } 559 560 protected AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 561 super(opcode, prefix1, prefix2, op, assertion, feature); 562 } 563 564 @Override 565 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) { 566 assert verify(asm, size, src, dst); 567 if (isSSEInstruction()) { 568 Register nds = Register.None; 569 switch (op) { 570 case 0x11: 571 if ((size == SS) || (size == SD)) { 572 nds = src; 573 } 574 break; 575 default: 576 break; 577 } 578 asm.simdPrefix(src, nds, dst, size, prefix1, prefix2, size == QWORD); 579 asm.emitByte(op); 580 asm.emitModRM(src, dst); 581 } else { 582 emitOpcode(asm, size, getRXB(src, dst), src.encoding, dst.encoding); 583 asm.emitModRM(src, dst); 584 } 585 } 586 587 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src) { 588 assert verify(asm, size, src, null); 589 if (isSSEInstruction()) { 590 asm.simdPrefix(src, Register.None, dst, size, prefix1, prefix2, size == QWORD); 591 asm.emitByte(op); 592 } else { 593 emitOpcode(asm, size, getRXB(src, dst), src.encoding, 0); 594 } 595 asm.emitOperandHelper(src, dst, 0); 596 } 597 } 598 599 /** 600 * Opcodes with operand order of M. 601 */ 602 public static class AMD64MOp extends AMD64Op { 603 // @formatter:off 604 public static final AMD64MOp NOT = new AMD64MOp("NOT", 0xF7, 2); 605 public static final AMD64MOp NEG = new AMD64MOp("NEG", 0xF7, 3); 606 public static final AMD64MOp MUL = new AMD64MOp("MUL", 0xF7, 4); 607 public static final AMD64MOp IMUL = new AMD64MOp("IMUL", 0xF7, 5); 608 public static final AMD64MOp DIV = new AMD64MOp("DIV", 0xF7, 6); 609 public static final AMD64MOp IDIV = new AMD64MOp("IDIV", 0xF7, 7); 610 public static final AMD64MOp INC = new AMD64MOp("INC", 0xFF, 0); 611 public static final AMD64MOp DEC = new AMD64MOp("DEC", 0xFF, 1); 612 public static final AMD64MOp PUSH = new AMD64MOp("PUSH", 0xFF, 6); 613 public static final AMD64MOp POP = new AMD64MOp("POP", 0x8F, 0, OpAssertion.WordOrDwordAssertion); 614 // @formatter:on 615 616 private final int ext; 617 618 protected AMD64MOp(String opcode, int op, int ext) { 619 this(opcode, 0, op, ext); 620 } 621 622 protected AMD64MOp(String opcode, int prefix, int op, int ext) { 623 this(opcode, prefix, op, ext, OpAssertion.WordOrLargerAssertion); 624 } 625 626 protected AMD64MOp(String opcode, int op, int ext, OpAssertion assertion) { 627 this(opcode, 0, op, ext, assertion); 628 } 629 630 protected AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion) { 631 super(opcode, 0, prefix, op, assertion, null); 632 this.ext = ext; 633 } 634 635 public final void emit(AMD64Assembler asm, OperandSize size, Register dst) { 636 assert verify(asm, size, dst, null); 637 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding); 638 asm.emitModRM(ext, dst); 639 } 640 641 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst) { 642 assert verify(asm, size, null, null); 643 emitOpcode(asm, size, getRXB(null, dst), 0, 0); 644 asm.emitOperandHelper(ext, dst, 0); 645 } 646 } 647 648 /** 649 * Opcodes with operand order of MI. 650 */ 651 public static class AMD64MIOp extends AMD64ImmOp { 652 // @formatter:off 653 public static final AMD64MIOp MOVB = new AMD64MIOp("MOVB", true, 0xC6, 0, OpAssertion.ByteAssertion); 654 public static final AMD64MIOp MOV = new AMD64MIOp("MOV", false, 0xC7, 0); 655 public static final AMD64MIOp TEST = new AMD64MIOp("TEST", false, 0xF7, 0); 656 // @formatter:on 657 658 private final int ext; 659 660 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext) { 661 this(opcode, immIsByte, op, ext, OpAssertion.WordOrLargerAssertion); 662 } 663 664 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion) { 665 this(opcode, immIsByte, 0, op, ext, assertion); 666 } 667 668 protected AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion) { 669 super(opcode, immIsByte, prefix, op, assertion); 670 this.ext = ext; 671 } 672 673 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm) { 674 emit(asm, size, dst, imm, false); 675 } 676 677 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm, boolean annotateImm) { 678 assert verify(asm, size, dst, null); 679 int insnPos = asm.position(); 680 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding); 681 asm.emitModRM(ext, dst); 682 int immPos = asm.position(); 683 emitImmediate(asm, size, imm); 684 int nextInsnPos = asm.position(); 685 if (annotateImm && asm.codePatchingAnnotationConsumer != null) { 686 asm.codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 687 } 688 } 689 690 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm) { 691 emit(asm, size, dst, imm, false); 692 } 693 694 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm, boolean annotateImm) { 695 assert verify(asm, size, null, null); 696 int insnPos = asm.position(); 697 emitOpcode(asm, size, getRXB(null, dst), 0, 0); 698 asm.emitOperandHelper(ext, dst, immediateSize(size)); 699 int immPos = asm.position(); 700 emitImmediate(asm, size, imm); 701 int nextInsnPos = asm.position(); 702 if (annotateImm && asm.codePatchingAnnotationConsumer != null) { 703 asm.codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 704 } 705 } 706 } 707 708 /** 709 * Opcodes with operand order of RMI. 710 * 711 * We only have one form of round as the operation is always treated with single variant input, 712 * making its extension to 3 address forms redundant. 713 */ 714 public static class AMD64RMIOp extends AMD64ImmOp { 715 // @formatter:off 716 public static final AMD64RMIOp IMUL = new AMD64RMIOp("IMUL", false, 0x69); 717 public static final AMD64RMIOp IMUL_SX = new AMD64RMIOp("IMUL", true, 0x6B); 718 public static final AMD64RMIOp ROUNDSS = new AMD64RMIOp("ROUNDSS", true, P_0F3A, 0x0A, OpAssertion.PackedDoubleAssertion, CPUFeature.SSE4_1); 719 public static final AMD64RMIOp ROUNDSD = new AMD64RMIOp("ROUNDSD", true, P_0F3A, 0x0B, OpAssertion.PackedDoubleAssertion, CPUFeature.SSE4_1); 720 // @formatter:on 721 722 protected AMD64RMIOp(String opcode, boolean immIsByte, int op) { 723 this(opcode, immIsByte, 0, op, OpAssertion.WordOrLargerAssertion, null); 724 } 725 726 protected AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 727 super(opcode, immIsByte, prefix, op, assertion, feature); 728 } 729 730 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm) { 731 assert verify(asm, size, dst, src); 732 if (isSSEInstruction()) { 733 Register nds = Register.None; 734 switch (op) { 735 case 0x0A: 736 case 0x0B: 737 nds = dst; 738 break; 739 default: 740 break; 741 } 742 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, false); 743 asm.emitByte(op); 744 asm.emitModRM(dst, src); 745 } else { 746 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding); 747 asm.emitModRM(dst, src); 748 } 749 emitImmediate(asm, size, imm); 750 } 751 752 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm) { 753 assert verify(asm, size, dst, null); 754 if (isSSEInstruction()) { 755 Register nds = Register.None; 756 switch (op) { 757 case 0x0A: 758 case 0x0B: 759 nds = dst; 760 break; 761 default: 762 break; 763 } 764 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, false); 765 asm.emitByte(op); 766 } else { 767 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0); 768 } 769 asm.emitOperandHelper(dst, src, immediateSize(size)); 770 emitImmediate(asm, size, imm); 771 } 772 } 773 774 public static class SSEOp extends AMD64RMOp { 775 // @formatter:off 776 public static final SSEOp CVTSI2SS = new SSEOp("CVTSI2SS", 0xF3, P_0F, 0x2A, OpAssertion.IntToFloatAssertion); 777 public static final SSEOp CVTSI2SD = new SSEOp("CVTSI2SD", 0xF2, P_0F, 0x2A, OpAssertion.IntToFloatAssertion); 778 public static final SSEOp CVTTSS2SI = new SSEOp("CVTTSS2SI", 0xF3, P_0F, 0x2C, OpAssertion.FloatToIntAssertion); 779 public static final SSEOp CVTTSD2SI = new SSEOp("CVTTSD2SI", 0xF2, P_0F, 0x2C, OpAssertion.FloatToIntAssertion); 780 public static final SSEOp UCOMIS = new SSEOp("UCOMIS", P_0F, 0x2E, OpAssertion.PackedFloatAssertion); 781 public static final SSEOp SQRT = new SSEOp("SQRT", P_0F, 0x51); 782 public static final SSEOp AND = new SSEOp("AND", P_0F, 0x54, OpAssertion.PackedFloatAssertion); 783 public static final SSEOp ANDN = new SSEOp("ANDN", P_0F, 0x55, OpAssertion.PackedFloatAssertion); 784 public static final SSEOp OR = new SSEOp("OR", P_0F, 0x56, OpAssertion.PackedFloatAssertion); 785 public static final SSEOp XOR = new SSEOp("XOR", P_0F, 0x57, OpAssertion.PackedFloatAssertion); 786 public static final SSEOp ADD = new SSEOp("ADD", P_0F, 0x58); 787 public static final SSEOp MUL = new SSEOp("MUL", P_0F, 0x59); 788 public static final SSEOp CVTSS2SD = new SSEOp("CVTSS2SD", P_0F, 0x5A, OpAssertion.SingleAssertion); 789 public static final SSEOp CVTSD2SS = new SSEOp("CVTSD2SS", P_0F, 0x5A, OpAssertion.DoubleAssertion); 790 public static final SSEOp SUB = new SSEOp("SUB", P_0F, 0x5C); 791 public static final SSEOp MIN = new SSEOp("MIN", P_0F, 0x5D); 792 public static final SSEOp DIV = new SSEOp("DIV", P_0F, 0x5E); 793 public static final SSEOp MAX = new SSEOp("MAX", P_0F, 0x5F); 794 // @formatter:on 795 796 protected SSEOp(String opcode, int prefix, int op) { 797 this(opcode, prefix, op, OpAssertion.FloatAssertion); 798 } 799 800 protected SSEOp(String opcode, int prefix, int op, OpAssertion assertion) { 801 this(opcode, 0, prefix, op, assertion); 802 } 803 804 protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) { 805 super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.SSE2); 806 } 807 } 808 809 /** 810 * Arithmetic operation with operand order of RM, MR or MI. 811 */ 812 public static final class AMD64BinaryArithmetic { 813 // @formatter:off 814 public static final AMD64BinaryArithmetic ADD = new AMD64BinaryArithmetic("ADD", 0); 815 public static final AMD64BinaryArithmetic OR = new AMD64BinaryArithmetic("OR", 1); 816 public static final AMD64BinaryArithmetic ADC = new AMD64BinaryArithmetic("ADC", 2); 817 public static final AMD64BinaryArithmetic SBB = new AMD64BinaryArithmetic("SBB", 3); 818 public static final AMD64BinaryArithmetic AND = new AMD64BinaryArithmetic("AND", 4); 819 public static final AMD64BinaryArithmetic SUB = new AMD64BinaryArithmetic("SUB", 5); 820 public static final AMD64BinaryArithmetic XOR = new AMD64BinaryArithmetic("XOR", 6); 821 public static final AMD64BinaryArithmetic CMP = new AMD64BinaryArithmetic("CMP", 7); 822 // @formatter:on 823 824 private final AMD64MIOp byteImmOp; 825 private final AMD64MROp byteMrOp; 826 private final AMD64RMOp byteRmOp; 827 828 private final AMD64MIOp immOp; 829 private final AMD64MIOp immSxOp; 830 private final AMD64MROp mrOp; 831 private final AMD64RMOp rmOp; 832 833 private AMD64BinaryArithmetic(String opcode, int code) { 834 int baseOp = code << 3; 835 836 byteImmOp = new AMD64MIOp(opcode, true, 0, 0x80, code, OpAssertion.ByteAssertion); 837 byteMrOp = new AMD64MROp(opcode, 0, baseOp, OpAssertion.ByteAssertion); 838 byteRmOp = new AMD64RMOp(opcode, 0, baseOp | 0x02, OpAssertion.ByteAssertion); 839 840 immOp = new AMD64MIOp(opcode, false, 0, 0x81, code, OpAssertion.WordOrLargerAssertion); 841 immSxOp = new AMD64MIOp(opcode, true, 0, 0x83, code, OpAssertion.WordOrLargerAssertion); 842 mrOp = new AMD64MROp(opcode, 0, baseOp | 0x01, OpAssertion.WordOrLargerAssertion); 843 rmOp = new AMD64RMOp(opcode, 0, baseOp | 0x03, OpAssertion.WordOrLargerAssertion); 844 } 845 846 public AMD64MIOp getMIOpcode(OperandSize size, boolean sx) { 847 if (size == BYTE) { 848 return byteImmOp; 849 } else if (sx) { 850 return immSxOp; 851 } else { 852 return immOp; 853 } 854 } 855 856 public AMD64MROp getMROpcode(OperandSize size) { 857 if (size == BYTE) { 858 return byteMrOp; 859 } else { 860 return mrOp; 861 } 862 } 863 864 public AMD64RMOp getRMOpcode(OperandSize size) { 865 if (size == BYTE) { 866 return byteRmOp; 867 } else { 868 return rmOp; 869 } 870 } 871 } 872 873 /** 874 * Shift operation with operand order of M1, MC or MI. 875 */ 876 public static final class AMD64Shift { 877 // @formatter:off 878 public static final AMD64Shift ROL = new AMD64Shift("ROL", 0); 879 public static final AMD64Shift ROR = new AMD64Shift("ROR", 1); 880 public static final AMD64Shift RCL = new AMD64Shift("RCL", 2); 881 public static final AMD64Shift RCR = new AMD64Shift("RCR", 3); 882 public static final AMD64Shift SHL = new AMD64Shift("SHL", 4); 883 public static final AMD64Shift SHR = new AMD64Shift("SHR", 5); 884 public static final AMD64Shift SAR = new AMD64Shift("SAR", 7); 885 // @formatter:on 886 887 public final AMD64MOp m1Op; 888 public final AMD64MOp mcOp; 889 public final AMD64MIOp miOp; 890 891 private AMD64Shift(String opcode, int code) { 892 m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.WordOrLargerAssertion); 893 mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.WordOrLargerAssertion); 894 miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.WordOrLargerAssertion); 895 } 896 } 897 898 private enum VEXOpAssertion { 899 AVX1(CPUFeature.AVX, CPUFeature.AVX), 900 AVX1_2(CPUFeature.AVX, CPUFeature.AVX2), 901 AVX2(CPUFeature.AVX2, CPUFeature.AVX2), 902 AVX1_128ONLY(CPUFeature.AVX, null), 903 AVX1_256ONLY(null, CPUFeature.AVX), 904 AVX2_256ONLY(null, CPUFeature.AVX2), 905 XMM_CPU(CPUFeature.AVX, null, XMM, null, CPU, null), 906 XMM_XMM_CPU(CPUFeature.AVX, null, XMM, XMM, CPU, null), 907 CPU_XMM(CPUFeature.AVX, null, CPU, null, XMM, null), 908 AVX1_2_CPU_XMM(CPUFeature.AVX, CPUFeature.AVX2, CPU, null, XMM, null), 909 BMI1(CPUFeature.BMI1, null, CPU, CPU, CPU, null), 910 BMI2(CPUFeature.BMI2, null, CPU, CPU, CPU, null); 911 912 private final CPUFeature l128feature; 913 private final CPUFeature l256feature; 914 915 private final RegisterCategory rCategory; 916 private final RegisterCategory vCategory; 917 private final RegisterCategory mCategory; 918 private final RegisterCategory imm8Category; 919 920 VEXOpAssertion(CPUFeature l128feature, CPUFeature l256feature) { 921 this(l128feature, l256feature, XMM, XMM, XMM, XMM); 922 } 923 924 VEXOpAssertion(CPUFeature l128feature, CPUFeature l256feature, RegisterCategory rCategory, RegisterCategory vCategory, RegisterCategory mCategory, RegisterCategory imm8Category) { 925 this.l128feature = l128feature; 926 this.l256feature = l256feature; 927 this.rCategory = rCategory; 928 this.vCategory = vCategory; 929 this.mCategory = mCategory; 930 this.imm8Category = imm8Category; 931 } 932 933 public boolean check(AMD64 arch, AVXSize size, Register r, Register v, Register m) { 934 return check(arch, getLFlag(size), r, v, m, null); 935 } 936 937 public boolean check(AMD64 arch, AVXSize size, Register r, Register v, Register m, Register imm8) { 938 return check(arch, getLFlag(size), r, v, m, imm8); 939 } 940 941 public boolean check(AMD64 arch, int l, Register r, Register v, Register m, Register imm8) { 942 switch (l) { 943 case L128: 944 assert l128feature != null && arch.getFeatures().contains(l128feature) : "emitting illegal 128 bit instruction"; 945 break; 946 case L256: 947 assert l256feature != null && arch.getFeatures().contains(l256feature) : "emitting illegal 256 bit instruction"; 948 break; 949 } 950 if (r != null) { 951 assert r.getRegisterCategory().equals(rCategory); 952 } 953 if (v != null) { 954 assert v.getRegisterCategory().equals(vCategory); 955 } 956 if (m != null) { 957 assert m.getRegisterCategory().equals(mCategory); 958 } 959 if (imm8 != null) { 960 assert imm8.getRegisterCategory().equals(imm8Category); 961 } 962 return true; 963 } 964 965 public boolean supports(EnumSet<CPUFeature> features, AVXSize avxSize) { 966 switch (avxSize) { 967 case XMM: 968 return l128feature != null && features.contains(l128feature); 969 case YMM: 970 return l256feature != null && features.contains(l256feature); 971 default: 972 throw GraalError.shouldNotReachHere(); 973 } 974 } 975 } 976 977 /** 978 * Base class for VEX-encoded instructions. 979 */ 980 public static class VexOp { 981 protected final int pp; 982 protected final int mmmmm; 983 protected final int w; 984 protected final int op; 985 986 private final String opcode; 987 protected final VEXOpAssertion assertion; 988 989 protected VexOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 990 this.pp = pp; 991 this.mmmmm = mmmmm; 992 this.w = w; 993 this.op = op; 994 this.opcode = opcode; 995 this.assertion = assertion; 996 } 997 998 public final boolean isSupported(AMD64Assembler vasm, AVXSize size) { 999 return assertion.supports(((AMD64) vasm.target.arch).getFeatures(), size); 1000 } 1001 1002 @Override 1003 public String toString() { 1004 return opcode; 1005 } 1006 } 1007 1008 /** 1009 * VEX-encoded instructions with an operand order of RM, but the M operand must be a register. 1010 */ 1011 public static class VexRROp extends VexOp { 1012 // @formatter:off 1013 public static final VexRROp VMASKMOVDQU = new VexRROp("VMASKMOVDQU", P_66, M_0F, WIG, 0xF7, VEXOpAssertion.AVX1_128ONLY); 1014 // @formatter:on 1015 1016 protected VexRROp(String opcode, int pp, int mmmmm, int w, int op) { 1017 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1018 } 1019 1020 protected VexRROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1021 super(opcode, pp, mmmmm, w, op, assertion); 1022 } 1023 1024 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) { 1025 assert assertion.check((AMD64) asm.target.arch, size, dst, null, src); 1026 assert op != 0x1A || op != 0x5A; 1027 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); 1028 asm.emitByte(op); 1029 asm.emitModRM(dst, src); 1030 } 1031 } 1032 1033 /** 1034 * VEX-encoded instructions with an operand order of RM. 1035 */ 1036 public static class VexRMOp extends VexRROp { 1037 // @formatter:off 1038 public static final VexRMOp VCVTTSS2SI = new VexRMOp("VCVTTSS2SI", P_F3, M_0F, W0, 0x2C, VEXOpAssertion.CPU_XMM); 1039 public static final VexRMOp VCVTTSS2SQ = new VexRMOp("VCVTTSS2SQ", P_F3, M_0F, W1, 0x2C, VEXOpAssertion.CPU_XMM); 1040 public static final VexRMOp VCVTTSD2SI = new VexRMOp("VCVTTSD2SI", P_F2, M_0F, W0, 0x2C, VEXOpAssertion.CPU_XMM); 1041 public static final VexRMOp VCVTTSD2SQ = new VexRMOp("VCVTTSD2SQ", P_F2, M_0F, W1, 0x2C, VEXOpAssertion.CPU_XMM); 1042 public static final VexRMOp VCVTPS2PD = new VexRMOp("VCVTPS2PD", P_, M_0F, WIG, 0x5A); 1043 public static final VexRMOp VCVTPD2PS = new VexRMOp("VCVTPD2PS", P_66, M_0F, WIG, 0x5A); 1044 public static final VexRMOp VCVTDQ2PS = new VexRMOp("VCVTDQ2PS", P_, M_0F, WIG, 0x5B); 1045 public static final VexRMOp VCVTTPS2DQ = new VexRMOp("VCVTTPS2DQ", P_F3, M_0F, WIG, 0x5B); 1046 public static final VexRMOp VCVTTPD2DQ = new VexRMOp("VCVTTPD2DQ", P_66, M_0F, WIG, 0xE6); 1047 public static final VexRMOp VCVTDQ2PD = new VexRMOp("VCVTDQ2PD", P_F3, M_0F, WIG, 0xE6); 1048 public static final VexRMOp VBROADCASTSS = new VexRMOp("VBROADCASTSS", P_66, M_0F38, W0, 0x18); 1049 public static final VexRMOp VBROADCASTSD = new VexRMOp("VBROADCASTSD", P_66, M_0F38, W0, 0x19, VEXOpAssertion.AVX1_256ONLY); 1050 public static final VexRMOp VBROADCASTF128 = new VexRMOp("VBROADCASTF128", P_66, M_0F38, W0, 0x1A, VEXOpAssertion.AVX1_256ONLY); 1051 public static final VexRMOp VPBROADCASTI128 = new VexRMOp("VPBROADCASTI128", P_66, M_0F38, W0, 0x5A, VEXOpAssertion.AVX2_256ONLY); 1052 public static final VexRMOp VPBROADCASTB = new VexRMOp("VPBROADCASTB", P_66, M_0F38, W0, 0x78, VEXOpAssertion.AVX2); 1053 public static final VexRMOp VPBROADCASTW = new VexRMOp("VPBROADCASTW", P_66, M_0F38, W0, 0x79, VEXOpAssertion.AVX2); 1054 public static final VexRMOp VPBROADCASTD = new VexRMOp("VPBROADCASTD", P_66, M_0F38, W0, 0x58, VEXOpAssertion.AVX2); 1055 public static final VexRMOp VPBROADCASTQ = new VexRMOp("VPBROADCASTQ", P_66, M_0F38, W0, 0x59, VEXOpAssertion.AVX2); 1056 public static final VexRMOp VPMOVMSKB = new VexRMOp("VPMOVMSKB", P_66, M_0F, WIG, 0xD7, VEXOpAssertion.AVX1_2_CPU_XMM); 1057 public static final VexRMOp VPMOVSXBW = new VexRMOp("VPMOVSXBW", P_66, M_0F38, WIG, 0x20); 1058 public static final VexRMOp VPMOVSXBD = new VexRMOp("VPMOVSXBD", P_66, M_0F38, WIG, 0x21); 1059 public static final VexRMOp VPMOVSXBQ = new VexRMOp("VPMOVSXBQ", P_66, M_0F38, WIG, 0x22); 1060 public static final VexRMOp VPMOVSXWD = new VexRMOp("VPMOVSXWD", P_66, M_0F38, WIG, 0x23); 1061 public static final VexRMOp VPMOVSXWQ = new VexRMOp("VPMOVSXWQ", P_66, M_0F38, WIG, 0x24); 1062 public static final VexRMOp VPMOVSXDQ = new VexRMOp("VPMOVSXDQ", P_66, M_0F38, WIG, 0x25); 1063 public static final VexRMOp VPMOVZXBW = new VexRMOp("VPMOVZXBW", P_66, M_0F38, WIG, 0x30); 1064 public static final VexRMOp VPMOVZXBD = new VexRMOp("VPMOVZXBD", P_66, M_0F38, WIG, 0x31); 1065 public static final VexRMOp VPMOVZXBQ = new VexRMOp("VPMOVZXBQ", P_66, M_0F38, WIG, 0x32); 1066 public static final VexRMOp VPMOVZXWD = new VexRMOp("VPMOVZXWD", P_66, M_0F38, WIG, 0x33); 1067 public static final VexRMOp VPMOVZXWQ = new VexRMOp("VPMOVZXWQ", P_66, M_0F38, WIG, 0x34); 1068 public static final VexRMOp VPMOVZXDQ = new VexRMOp("VPMOVZXDQ", P_66, M_0F38, WIG, 0x35); 1069 public static final VexRMOp VPTEST = new VexRMOp("VPTEST", P_66, M_0F38, WIG, 0x17); 1070 public static final VexRMOp VSQRTPD = new VexRMOp("VSQRTPD", P_66, M_0F, WIG, 0x51); 1071 public static final VexRMOp VSQRTPS = new VexRMOp("VSQRTPS", P_, M_0F, WIG, 0x51); 1072 public static final VexRMOp VSQRTSD = new VexRMOp("VSQRTSD", P_F2, M_0F, WIG, 0x51); 1073 public static final VexRMOp VSQRTSS = new VexRMOp("VSQRTSS", P_F3, M_0F, WIG, 0x51); 1074 public static final VexRMOp VUCOMISS = new VexRMOp("VUCOMISS", P_, M_0F, WIG, 0x2E); 1075 public static final VexRMOp VUCOMISD = new VexRMOp("VUCOMISD", P_66, M_0F, WIG, 0x2E); 1076 // @formatter:on 1077 1078 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op) { 1079 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1080 } 1081 1082 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1083 super(opcode, pp, mmmmm, w, op, assertion); 1084 } 1085 1086 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) { 1087 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1088 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); 1089 asm.emitByte(op); 1090 asm.emitOperandHelper(dst, src, 0); 1091 } 1092 } 1093 1094 /** 1095 * VEX-encoded move instructions. 1096 * <p> 1097 * These instructions have two opcodes: op is the forward move instruction with an operand order 1098 * of RM, and opReverse is the reverse move instruction with an operand order of MR. 1099 */ 1100 public static final class VexMoveOp extends VexRMOp { 1101 // @formatter:off 1102 public static final VexMoveOp VMOVDQA = new VexMoveOp("VMOVDQA", P_66, M_0F, WIG, 0x6F, 0x7F); 1103 public static final VexMoveOp VMOVDQU = new VexMoveOp("VMOVDQU", P_F3, M_0F, WIG, 0x6F, 0x7F); 1104 public static final VexMoveOp VMOVAPS = new VexMoveOp("VMOVAPS", P_, M_0F, WIG, 0x28, 0x29); 1105 public static final VexMoveOp VMOVAPD = new VexMoveOp("VMOVAPD", P_66, M_0F, WIG, 0x28, 0x29); 1106 public static final VexMoveOp VMOVUPS = new VexMoveOp("VMOVUPS", P_, M_0F, WIG, 0x10, 0x11); 1107 public static final VexMoveOp VMOVUPD = new VexMoveOp("VMOVUPD", P_66, M_0F, WIG, 0x10, 0x11); 1108 public static final VexMoveOp VMOVSS = new VexMoveOp("VMOVSS", P_F3, M_0F, WIG, 0x10, 0x11); 1109 public static final VexMoveOp VMOVSD = new VexMoveOp("VMOVSD", P_F2, M_0F, WIG, 0x10, 0x11); 1110 public static final VexMoveOp VMOVD = new VexMoveOp("VMOVD", P_66, M_0F, W0, 0x6E, 0x7E, VEXOpAssertion.XMM_CPU); 1111 public static final VexMoveOp VMOVQ = new VexMoveOp("VMOVQ", P_66, M_0F, W1, 0x6E, 0x7E, VEXOpAssertion.XMM_CPU); 1112 // @formatter:on 1113 1114 private final int opReverse; 1115 1116 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) { 1117 this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1); 1118 } 1119 1120 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) { 1121 super(opcode, pp, mmmmm, w, op, assertion); 1122 this.opReverse = opReverse; 1123 } 1124 1125 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src) { 1126 assert assertion.check((AMD64) asm.target.arch, size, src, null, null); 1127 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); 1128 asm.emitByte(opReverse); 1129 asm.emitOperandHelper(src, dst, 0); 1130 } 1131 1132 public void emitReverse(AMD64Assembler asm, AVXSize size, Register dst, Register src) { 1133 assert assertion.check((AMD64) asm.target.arch, size, src, null, dst); 1134 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); 1135 asm.emitByte(opReverse); 1136 asm.emitModRM(src, dst); 1137 } 1138 } 1139 1140 public interface VexRRIOp { 1141 void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8); 1142 } 1143 1144 /** 1145 * VEX-encoded instructions with an operand order of RMI. 1146 */ 1147 public static final class VexRMIOp extends VexOp implements VexRRIOp { 1148 // @formatter:off 1149 public static final VexRMIOp VPERMQ = new VexRMIOp("VPERMQ", P_66, M_0F3A, W1, 0x00, VEXOpAssertion.AVX2_256ONLY); 1150 public static final VexRMIOp VPSHUFLW = new VexRMIOp("VPSHUFLW", P_F2, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2); 1151 public static final VexRMIOp VPSHUFHW = new VexRMIOp("VPSHUFHW", P_F3, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2); 1152 public static final VexRMIOp VPSHUFD = new VexRMIOp("VPSHUFD", P_66, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2); 1153 // @formatter:on 1154 1155 private VexRMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1156 super(opcode, pp, mmmmm, w, op, assertion); 1157 } 1158 1159 @Override 1160 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { 1161 assert assertion.check((AMD64) asm.target.arch, size, dst, null, src); 1162 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); 1163 asm.emitByte(op); 1164 asm.emitModRM(dst, src); 1165 asm.emitByte(imm8); 1166 } 1167 1168 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src, int imm8) { 1169 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1170 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); 1171 asm.emitByte(op); 1172 asm.emitOperandHelper(dst, src, 1); 1173 asm.emitByte(imm8); 1174 } 1175 } 1176 1177 /** 1178 * VEX-encoded instructions with an operand order of MRI. 1179 */ 1180 public static final class VexMRIOp extends VexOp implements VexRRIOp { 1181 // @formatter:off 1182 public static final VexMRIOp VEXTRACTF128 = new VexMRIOp("VEXTRACTF128", P_66, M_0F3A, W0, 0x19, VEXOpAssertion.AVX1_256ONLY); 1183 public static final VexMRIOp VEXTRACTI128 = new VexMRIOp("VEXTRACTI128", P_66, M_0F3A, W0, 0x39, VEXOpAssertion.AVX2_256ONLY); 1184 public static final VexMRIOp VPEXTRB = new VexMRIOp("VPEXTRB", P_66, M_0F3A, W0, 0x14, VEXOpAssertion.XMM_CPU); 1185 public static final VexMRIOp VPEXTRW = new VexMRIOp("VPEXTRW", P_66, M_0F3A, W0, 0x15, VEXOpAssertion.XMM_CPU); 1186 public static final VexMRIOp VPEXTRD = new VexMRIOp("VPEXTRD", P_66, M_0F3A, W0, 0x16, VEXOpAssertion.XMM_CPU); 1187 public static final VexMRIOp VPEXTRQ = new VexMRIOp("VPEXTRQ", P_66, M_0F3A, W1, 0x16, VEXOpAssertion.XMM_CPU); 1188 // @formatter:on 1189 1190 private VexMRIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1191 super(opcode, pp, mmmmm, w, op, assertion); 1192 } 1193 1194 @Override 1195 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { 1196 assert assertion.check((AMD64) asm.target.arch, size, src, null, dst); 1197 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); 1198 asm.emitByte(op); 1199 asm.emitModRM(src, dst); 1200 asm.emitByte(imm8); 1201 } 1202 1203 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src, int imm8) { 1204 assert assertion.check((AMD64) asm.target.arch, size, src, null, null); 1205 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); 1206 asm.emitByte(op); 1207 asm.emitOperandHelper(src, dst, 1); 1208 asm.emitByte(imm8); 1209 } 1210 } 1211 1212 /** 1213 * VEX-encoded instructions with an operand order of RVMR. 1214 */ 1215 public static class VexRVMROp extends VexOp { 1216 // @formatter:off 1217 public static final VexRVMROp VPBLENDVB = new VexRVMROp("VPBLENDVB", P_66, M_0F3A, W0, 0x4C, VEXOpAssertion.AVX1_2); 1218 public static final VexRVMROp VPBLENDVPS = new VexRVMROp("VPBLENDVPS", P_66, M_0F3A, W0, 0x4A, VEXOpAssertion.AVX1); 1219 public static final VexRVMROp VPBLENDVPD = new VexRVMROp("VPBLENDVPD", P_66, M_0F3A, W0, 0x4B, VEXOpAssertion.AVX1); 1220 // @formatter:on 1221 1222 protected VexRVMROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1223 super(opcode, pp, mmmmm, w, op, assertion); 1224 } 1225 1226 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, Register src2) { 1227 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, src2); 1228 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1229 asm.emitByte(op); 1230 asm.emitModRM(dst, src2); 1231 asm.emitByte(mask.encoding() << 4); 1232 } 1233 1234 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, AMD64Address src2) { 1235 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, null); 1236 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1237 asm.emitByte(op); 1238 asm.emitOperandHelper(dst, src2, 0); 1239 asm.emitByte(mask.encoding() << 4); 1240 } 1241 } 1242 1243 /** 1244 * VEX-encoded instructions with an operand order of RVM. 1245 */ 1246 public static class VexRVMOp extends VexOp { 1247 // @formatter:off 1248 public static final VexRVMOp VANDPS = new VexRVMOp("VANDPS", P_, M_0F, WIG, 0x54); 1249 public static final VexRVMOp VANDPD = new VexRVMOp("VANDPD", P_66, M_0F, WIG, 0x54); 1250 public static final VexRVMOp VANDNPS = new VexRVMOp("VANDNPS", P_, M_0F, WIG, 0x55); 1251 public static final VexRVMOp VANDNPD = new VexRVMOp("VANDNPD", P_66, M_0F, WIG, 0x55); 1252 public static final VexRVMOp VORPS = new VexRVMOp("VORPS", P_, M_0F, WIG, 0x56); 1253 public static final VexRVMOp VORPD = new VexRVMOp("VORPD", P_66, M_0F, WIG, 0x56); 1254 public static final VexRVMOp VXORPS = new VexRVMOp("VXORPS", P_, M_0F, WIG, 0x57); 1255 public static final VexRVMOp VXORPD = new VexRVMOp("VXORPD", P_66, M_0F, WIG, 0x57); 1256 public static final VexRVMOp VADDPS = new VexRVMOp("VADDPS", P_, M_0F, WIG, 0x58); 1257 public static final VexRVMOp VADDPD = new VexRVMOp("VADDPD", P_66, M_0F, WIG, 0x58); 1258 public static final VexRVMOp VADDSS = new VexRVMOp("VADDSS", P_F3, M_0F, WIG, 0x58); 1259 public static final VexRVMOp VADDSD = new VexRVMOp("VADDSD", P_F2, M_0F, WIG, 0x58); 1260 public static final VexRVMOp VMULPS = new VexRVMOp("VMULPS", P_, M_0F, WIG, 0x59); 1261 public static final VexRVMOp VMULPD = new VexRVMOp("VMULPD", P_66, M_0F, WIG, 0x59); 1262 public static final VexRVMOp VMULSS = new VexRVMOp("VMULSS", P_F3, M_0F, WIG, 0x59); 1263 public static final VexRVMOp VMULSD = new VexRVMOp("VMULSD", P_F2, M_0F, WIG, 0x59); 1264 public static final VexRVMOp VSUBPS = new VexRVMOp("VSUBPS", P_, M_0F, WIG, 0x5C); 1265 public static final VexRVMOp VSUBPD = new VexRVMOp("VSUBPD", P_66, M_0F, WIG, 0x5C); 1266 public static final VexRVMOp VSUBSS = new VexRVMOp("VSUBSS", P_F3, M_0F, WIG, 0x5C); 1267 public static final VexRVMOp VSUBSD = new VexRVMOp("VSUBSD", P_F2, M_0F, WIG, 0x5C); 1268 public static final VexRVMOp VMINPS = new VexRVMOp("VMINPS", P_, M_0F, WIG, 0x5D); 1269 public static final VexRVMOp VMINPD = new VexRVMOp("VMINPD", P_66, M_0F, WIG, 0x5D); 1270 public static final VexRVMOp VMINSS = new VexRVMOp("VMINSS", P_F3, M_0F, WIG, 0x5D); 1271 public static final VexRVMOp VMINSD = new VexRVMOp("VMINSD", P_F2, M_0F, WIG, 0x5D); 1272 public static final VexRVMOp VDIVPS = new VexRVMOp("VDIVPS", P_, M_0F, WIG, 0x5E); 1273 public static final VexRVMOp VDIVPD = new VexRVMOp("VDIVPD", P_66, M_0F, WIG, 0x5E); 1274 public static final VexRVMOp VDIVSS = new VexRVMOp("VDIVPS", P_F3, M_0F, WIG, 0x5E); 1275 public static final VexRVMOp VDIVSD = new VexRVMOp("VDIVPD", P_F2, M_0F, WIG, 0x5E); 1276 public static final VexRVMOp VMAXPS = new VexRVMOp("VMAXPS", P_, M_0F, WIG, 0x5F); 1277 public static final VexRVMOp VMAXPD = new VexRVMOp("VMAXPD", P_66, M_0F, WIG, 0x5F); 1278 public static final VexRVMOp VMAXSS = new VexRVMOp("VMAXSS", P_F3, M_0F, WIG, 0x5F); 1279 public static final VexRVMOp VMAXSD = new VexRVMOp("VMAXSD", P_F2, M_0F, WIG, 0x5F); 1280 public static final VexRVMOp VADDSUBPS = new VexRVMOp("VADDSUBPS", P_F2, M_0F, WIG, 0xD0); 1281 public static final VexRVMOp VADDSUBPD = new VexRVMOp("VADDSUBPD", P_66, M_0F, WIG, 0xD0); 1282 public static final VexRVMOp VPAND = new VexRVMOp("VPAND", P_66, M_0F, WIG, 0xDB, VEXOpAssertion.AVX1_2); 1283 public static final VexRVMOp VPOR = new VexRVMOp("VPOR", P_66, M_0F, WIG, 0xEB, VEXOpAssertion.AVX1_2); 1284 public static final VexRVMOp VPXOR = new VexRVMOp("VPXOR", P_66, M_0F, WIG, 0xEF, VEXOpAssertion.AVX1_2); 1285 public static final VexRVMOp VPADDB = new VexRVMOp("VPADDB", P_66, M_0F, WIG, 0xFC, VEXOpAssertion.AVX1_2); 1286 public static final VexRVMOp VPADDW = new VexRVMOp("VPADDW", P_66, M_0F, WIG, 0xFD, VEXOpAssertion.AVX1_2); 1287 public static final VexRVMOp VPADDD = new VexRVMOp("VPADDD", P_66, M_0F, WIG, 0xFE, VEXOpAssertion.AVX1_2); 1288 public static final VexRVMOp VPADDQ = new VexRVMOp("VPADDQ", P_66, M_0F, WIG, 0xD4, VEXOpAssertion.AVX1_2); 1289 public static final VexRVMOp VPMULHUW = new VexRVMOp("VPMULHUW", P_66, M_0F, WIG, 0xE4, VEXOpAssertion.AVX1_2); 1290 public static final VexRVMOp VPMULHW = new VexRVMOp("VPMULHW", P_66, M_0F, WIG, 0xE5, VEXOpAssertion.AVX1_2); 1291 public static final VexRVMOp VPMULLW = new VexRVMOp("VPMULLW", P_66, M_0F, WIG, 0xD5, VEXOpAssertion.AVX1_2); 1292 public static final VexRVMOp VPMULLD = new VexRVMOp("VPMULLD", P_66, M_0F38, WIG, 0x40, VEXOpAssertion.AVX1_2); 1293 public static final VexRVMOp VPSUBB = new VexRVMOp("VPSUBB", P_66, M_0F, WIG, 0xF8, VEXOpAssertion.AVX1_2); 1294 public static final VexRVMOp VPSUBW = new VexRVMOp("VPSUBW", P_66, M_0F, WIG, 0xF9, VEXOpAssertion.AVX1_2); 1295 public static final VexRVMOp VPSUBD = new VexRVMOp("VPSUBD", P_66, M_0F, WIG, 0xFA, VEXOpAssertion.AVX1_2); 1296 public static final VexRVMOp VPSUBQ = new VexRVMOp("VPSUBQ", P_66, M_0F, WIG, 0xFB, VEXOpAssertion.AVX1_2); 1297 public static final VexRVMOp VPSHUFB = new VexRVMOp("VPSHUFB", P_66, M_0F38, WIG, 0x00, VEXOpAssertion.AVX1_2); 1298 public static final VexRVMOp VCVTSD2SS = new VexRVMOp("VCVTSD2SS", P_F2, M_0F, WIG, 0x5A); 1299 public static final VexRVMOp VCVTSS2SD = new VexRVMOp("VCVTSS2SD", P_F3, M_0F, WIG, 0x5A); 1300 public static final VexRVMOp VCVTSI2SD = new VexRVMOp("VCVTSI2SD", P_F2, M_0F, W0, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1301 public static final VexRVMOp VCVTSQ2SD = new VexRVMOp("VCVTSQ2SD", P_F2, M_0F, W1, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1302 public static final VexRVMOp VCVTSI2SS = new VexRVMOp("VCVTSI2SS", P_F3, M_0F, W0, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1303 public static final VexRVMOp VCVTSQ2SS = new VexRVMOp("VCVTSQ2SS", P_F3, M_0F, W1, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1304 public static final VexRVMOp VPCMPEQB = new VexRVMOp("VPCMPEQB", P_66, M_0F, WIG, 0x74, VEXOpAssertion.AVX1_2); 1305 public static final VexRVMOp VPCMPEQW = new VexRVMOp("VPCMPEQW", P_66, M_0F, WIG, 0x75, VEXOpAssertion.AVX1_2); 1306 public static final VexRVMOp VPCMPEQD = new VexRVMOp("VPCMPEQD", P_66, M_0F, WIG, 0x76, VEXOpAssertion.AVX1_2); 1307 public static final VexRVMOp VPCMPEQQ = new VexRVMOp("VPCMPEQQ", P_66, M_0F38, WIG, 0x29, VEXOpAssertion.AVX1_2); 1308 public static final VexRVMOp VPCMPGTB = new VexRVMOp("VPCMPGTB", P_66, M_0F, WIG, 0x64, VEXOpAssertion.AVX1_2); 1309 public static final VexRVMOp VPCMPGTW = new VexRVMOp("VPCMPGTW", P_66, M_0F, WIG, 0x65, VEXOpAssertion.AVX1_2); 1310 public static final VexRVMOp VPCMPGTD = new VexRVMOp("VPCMPGTD", P_66, M_0F, WIG, 0x66, VEXOpAssertion.AVX1_2); 1311 public static final VexRVMOp VPCMPGTQ = new VexRVMOp("VPCMPGTQ", P_66, M_0F38, WIG, 0x37, VEXOpAssertion.AVX1_2); 1312 // @formatter:on 1313 1314 private VexRVMOp(String opcode, int pp, int mmmmm, int w, int op) { 1315 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1316 } 1317 1318 protected VexRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1319 super(opcode, pp, mmmmm, w, op, assertion); 1320 } 1321 1322 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { 1323 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); 1324 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1325 asm.emitByte(op); 1326 asm.emitModRM(dst, src2); 1327 } 1328 1329 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) { 1330 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); 1331 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1332 asm.emitByte(op); 1333 asm.emitOperandHelper(dst, src2, 0); 1334 } 1335 } 1336 1337 public static final class VexGeneralPurposeRVMOp extends VexRVMOp { 1338 // @formatter:off 1339 public static final VexGeneralPurposeRVMOp ANDN = new VexGeneralPurposeRVMOp("ANDN", P_, M_0F38, WIG, 0xF2, VEXOpAssertion.BMI1); 1340 public static final VexGeneralPurposeRVMOp MULX = new VexGeneralPurposeRVMOp("MULX", P_F2, M_0F38, WIG, 0xF6, VEXOpAssertion.BMI2); 1341 public static final VexGeneralPurposeRVMOp PDEP = new VexGeneralPurposeRVMOp("PDEP", P_F2, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); 1342 public static final VexGeneralPurposeRVMOp PEXT = new VexGeneralPurposeRVMOp("PEXT", P_F3, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); 1343 // @formatter:on 1344 1345 private VexGeneralPurposeRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1346 super(opcode, pp, mmmmm, w, op, assertion); 1347 } 1348 1349 @Override 1350 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { 1351 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, src2, null); 1352 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1353 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1354 asm.emitByte(op); 1355 asm.emitModRM(dst, src2); 1356 } 1357 1358 @Override 1359 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) { 1360 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, null, null); 1361 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1362 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1363 asm.emitByte(op); 1364 asm.emitOperandHelper(dst, src2, 0); 1365 } 1366 } 1367 1368 public static final class VexGeneralPurposeRMVOp extends VexOp { 1369 // @formatter:off 1370 public static final VexGeneralPurposeRMVOp BEXTR = new VexGeneralPurposeRMVOp("BEXTR", P_, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI1); 1371 public static final VexGeneralPurposeRMVOp BZHI = new VexGeneralPurposeRMVOp("BZHI", P_, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); 1372 public static final VexGeneralPurposeRMVOp SARX = new VexGeneralPurposeRMVOp("SARX", P_F3, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2); 1373 public static final VexGeneralPurposeRMVOp SHRX = new VexGeneralPurposeRMVOp("SHRX", P_F2, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2); 1374 public static final VexGeneralPurposeRMVOp SHLX = new VexGeneralPurposeRMVOp("SHLX", P_66, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2); 1375 // @formatter:on 1376 1377 private VexGeneralPurposeRMVOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1378 super(opcode, pp, mmmmm, w, op, assertion); 1379 } 1380 1381 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { 1382 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, src1, null); 1383 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1384 asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1385 asm.emitByte(op); 1386 asm.emitModRM(dst, src1); 1387 } 1388 1389 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src1, Register src2) { 1390 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, null, null); 1391 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1392 asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1393 asm.emitByte(op); 1394 asm.emitOperandHelper(dst, src1, 0); 1395 } 1396 } 1397 1398 public static final class VexGeneralPurposeRMOp extends VexRMOp { 1399 // @formatter:off 1400 public static final VexGeneralPurposeRMOp BLSI = new VexGeneralPurposeRMOp("BLSI", P_, M_0F38, WIG, 0xF3, 3, VEXOpAssertion.BMI1); 1401 public static final VexGeneralPurposeRMOp BLSMSK = new VexGeneralPurposeRMOp("BLSMSK", P_, M_0F38, WIG, 0xF3, 2, VEXOpAssertion.BMI1); 1402 public static final VexGeneralPurposeRMOp BLSR = new VexGeneralPurposeRMOp("BLSR", P_, M_0F38, WIG, 0xF3, 1, VEXOpAssertion.BMI1); 1403 // @formatter:on 1404 private final int ext; 1405 1406 private VexGeneralPurposeRMOp(String opcode, int pp, int mmmmm, int w, int op, int ext, VEXOpAssertion assertion) { 1407 super(opcode, pp, mmmmm, w, op, assertion); 1408 this.ext = ext; 1409 } 1410 1411 @Override 1412 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) { 1413 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1414 asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1415 asm.emitByte(op); 1416 asm.emitModRM(ext, src); 1417 } 1418 1419 @Override 1420 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) { 1421 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1422 asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1423 asm.emitByte(op); 1424 asm.emitOperandHelper(ext, src, 0); 1425 } 1426 } 1427 1428 /** 1429 * VEX-encoded shift instructions with an operand order of either RVM or VMI. 1430 */ 1431 public static final class VexShiftOp extends VexRVMOp implements VexRRIOp { 1432 // @formatter:off 1433 public static final VexShiftOp VPSRLW = new VexShiftOp("VPSRLW", P_66, M_0F, WIG, 0xD1, 0x71, 2); 1434 public static final VexShiftOp VPSRLD = new VexShiftOp("VPSRLD", P_66, M_0F, WIG, 0xD2, 0x72, 2); 1435 public static final VexShiftOp VPSRLQ = new VexShiftOp("VPSRLQ", P_66, M_0F, WIG, 0xD3, 0x73, 2); 1436 public static final VexShiftOp VPSRAW = new VexShiftOp("VPSRAW", P_66, M_0F, WIG, 0xE1, 0x71, 4); 1437 public static final VexShiftOp VPSRAD = new VexShiftOp("VPSRAD", P_66, M_0F, WIG, 0xE2, 0x72, 4); 1438 public static final VexShiftOp VPSLLW = new VexShiftOp("VPSLLW", P_66, M_0F, WIG, 0xF1, 0x71, 6); 1439 public static final VexShiftOp VPSLLD = new VexShiftOp("VPSLLD", P_66, M_0F, WIG, 0xF2, 0x72, 6); 1440 public static final VexShiftOp VPSLLQ = new VexShiftOp("VPSLLQ", P_66, M_0F, WIG, 0xF3, 0x73, 6); 1441 // @formatter:on 1442 1443 private final int immOp; 1444 private final int r; 1445 1446 private VexShiftOp(String opcode, int pp, int mmmmm, int w, int op, int immOp, int r) { 1447 super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1_2); 1448 this.immOp = immOp; 1449 this.r = r; 1450 } 1451 1452 @Override 1453 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { 1454 assert assertion.check((AMD64) asm.target.arch, size, null, dst, src); 1455 asm.vexPrefix(null, dst, src, size, pp, mmmmm, w, false); 1456 asm.emitByte(immOp); 1457 asm.emitModRM(r, src); 1458 asm.emitByte(imm8); 1459 } 1460 } 1461 1462 public static final class VexMaskMoveOp extends VexOp { 1463 // @formatter:off 1464 public static final VexMaskMoveOp VMASKMOVPS = new VexMaskMoveOp("VMASKMOVPS", P_66, M_0F38, W0, 0x2C, 0x2E); 1465 public static final VexMaskMoveOp VMASKMOVPD = new VexMaskMoveOp("VMASKMOVPD", P_66, M_0F38, W0, 0x2D, 0x2F); 1466 public static final VexMaskMoveOp VPMASKMOVD = new VexMaskMoveOp("VPMASKMOVD", P_66, M_0F38, W0, 0x8C, 0x8E, VEXOpAssertion.AVX2); 1467 public static final VexMaskMoveOp VPMASKMOVQ = new VexMaskMoveOp("VPMASKMOVQ", P_66, M_0F38, W1, 0x8C, 0x8E, VEXOpAssertion.AVX2); 1468 // @formatter:on 1469 1470 private final int opReverse; 1471 1472 private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) { 1473 this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1); 1474 } 1475 1476 private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) { 1477 super(opcode, pp, mmmmm, w, op, assertion); 1478 this.opReverse = opReverse; 1479 } 1480 1481 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, AMD64Address src) { 1482 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, null); 1483 asm.vexPrefix(dst, mask, src, size, pp, mmmmm, w, false); 1484 asm.emitByte(op); 1485 asm.emitOperandHelper(dst, src, 0); 1486 } 1487 1488 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register mask, Register src) { 1489 assert assertion.check((AMD64) asm.target.arch, size, src, mask, null); 1490 asm.vexPrefix(src, mask, dst, size, pp, mmmmm, w, false); 1491 asm.emitByte(opReverse); 1492 asm.emitOperandHelper(src, dst, 0); 1493 } 1494 } 1495 1496 /** 1497 * VEX-encoded instructions with an operand order of RVMI. 1498 */ 1499 public static final class VexRVMIOp extends VexOp { 1500 // @formatter:off 1501 public static final VexRVMIOp VSHUFPS = new VexRVMIOp("VSHUFPS", P_, M_0F, WIG, 0xC6); 1502 public static final VexRVMIOp VSHUFPD = new VexRVMIOp("VSHUFPD", P_66, M_0F, WIG, 0xC6); 1503 public static final VexRVMIOp VINSERTF128 = new VexRVMIOp("VINSERTF128", P_66, M_0F3A, W0, 0x18, VEXOpAssertion.AVX1_256ONLY); 1504 public static final VexRVMIOp VINSERTI128 = new VexRVMIOp("VINSERTI128", P_66, M_0F3A, W0, 0x38, VEXOpAssertion.AVX2_256ONLY); 1505 // @formatter:on 1506 1507 private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op) { 1508 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1509 } 1510 1511 private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1512 super(opcode, pp, mmmmm, w, op, assertion); 1513 } 1514 1515 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, int imm8) { 1516 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); 1517 assert (imm8 & 0xFF) == imm8; 1518 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1519 asm.emitByte(op); 1520 asm.emitModRM(dst, src2); 1521 asm.emitByte(imm8); 1522 } 1523 1524 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, int imm8) { 1525 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); 1526 assert (imm8 & 0xFF) == imm8; 1527 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1528 asm.emitByte(op); 1529 asm.emitOperandHelper(dst, src2, 1); 1530 asm.emitByte(imm8); 1531 } 1532 } 1533 1534 /** 1535 * VEX-encoded comparison operation with an operand order of RVMI. The immediate operand is a 1536 * comparison operator. 1537 */ 1538 public static final class VexFloatCompareOp extends VexOp { 1539 // @formatter:off 1540 public static final VexFloatCompareOp VCMPPS = new VexFloatCompareOp("VCMPPS", P_, M_0F, WIG, 0xC2); 1541 public static final VexFloatCompareOp VCMPPD = new VexFloatCompareOp("VCMPPD", P_66, M_0F, WIG, 0xC2); 1542 public static final VexFloatCompareOp VCMPSS = new VexFloatCompareOp("VCMPSS", P_F2, M_0F, WIG, 0xC2); 1543 public static final VexFloatCompareOp VCMPSD = new VexFloatCompareOp("VCMPSD", P_F2, M_0F, WIG, 0xC2); 1544 // @formatter:on 1545 1546 public enum Predicate { 1547 EQ_OQ(0x00), 1548 LT_OS(0x01), 1549 LE_OS(0x02), 1550 UNORD_Q(0x03), 1551 NEQ_UQ(0x04), 1552 NLT_US(0x05), 1553 NLE_US(0x06), 1554 ORD_Q(0x07), 1555 EQ_UQ(0x08), 1556 NGE_US(0x09), 1557 NGT_US(0x0a), 1558 FALSE_OQ(0x0b), 1559 NEQ_OQ(0x0c), 1560 GE_OS(0x0d), 1561 GT_OS(0x0e), 1562 TRUE_UQ(0x0f), 1563 EQ_OS(0x10), 1564 LT_OQ(0x11), 1565 LE_OQ(0x12), 1566 UNORD_S(0x13), 1567 NEQ_US(0x14), 1568 NLT_UQ(0x15), 1569 NLE_UQ(0x16), 1570 ORD_S(0x17), 1571 EQ_US(0x18), 1572 NGE_UQ(0x19), 1573 NGT_UQ(0x1a), 1574 FALSE_OS(0x1b), 1575 NEQ_OS(0x1c), 1576 GE_OQ(0x1d), 1577 GT_OQ(0x1e), 1578 TRUE_US(0x1f); 1579 1580 private int imm8; 1581 1582 Predicate(int imm8) { 1583 this.imm8 = imm8; 1584 } 1585 1586 public static Predicate getPredicate(Condition condition, boolean unorderedIsTrue) { 1587 if (unorderedIsTrue) { 1588 switch (condition) { 1589 case EQ: 1590 return EQ_UQ; 1591 case NE: 1592 return NEQ_UQ; 1593 case LT: 1594 return NGE_UQ; 1595 case LE: 1596 return NGT_UQ; 1597 case GT: 1598 return NLE_UQ; 1599 case GE: 1600 return NLT_UQ; 1601 default: 1602 throw GraalError.shouldNotReachHere(); 1603 } 1604 } else { 1605 switch (condition) { 1606 case EQ: 1607 return EQ_OQ; 1608 case NE: 1609 return NEQ_OQ; 1610 case LT: 1611 return LT_OQ; 1612 case LE: 1613 return LE_OQ; 1614 case GT: 1615 return GT_OQ; 1616 case GE: 1617 return GE_OQ; 1618 default: 1619 throw GraalError.shouldNotReachHere(); 1620 } 1621 } 1622 } 1623 } 1624 1625 private VexFloatCompareOp(String opcode, int pp, int mmmmm, int w, int op) { 1626 super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1627 } 1628 1629 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, Predicate p) { 1630 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); 1631 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1632 asm.emitByte(op); 1633 asm.emitModRM(dst, src2); 1634 asm.emitByte(p.imm8); 1635 } 1636 1637 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, Predicate p) { 1638 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); 1639 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1640 asm.emitByte(op); 1641 asm.emitOperandHelper(dst, src2, 1); 1642 asm.emitByte(p.imm8); 1643 } 1644 } 1645 1646 public final void addl(AMD64Address dst, int imm32) { 1647 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1648 } 1649 1650 public final void addl(Register dst, int imm32) { 1651 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1652 } 1653 1654 public final void addl(Register dst, Register src) { 1655 ADD.rmOp.emit(this, DWORD, dst, src); 1656 } 1657 1658 public final void addpd(Register dst, Register src) { 1659 SSEOp.ADD.emit(this, PD, dst, src); 1660 } 1661 1662 public final void addpd(Register dst, AMD64Address src) { 1663 SSEOp.ADD.emit(this, PD, dst, src); 1664 } 1665 1666 public final void addsd(Register dst, Register src) { 1667 SSEOp.ADD.emit(this, SD, dst, src); 1668 } 1669 1670 public final void addsd(Register dst, AMD64Address src) { 1671 SSEOp.ADD.emit(this, SD, dst, src); 1672 } 1673 1674 private void addrNop4() { 1675 // 4 bytes: NOP DWORD PTR [EAX+0] 1676 emitByte(0x0F); 1677 emitByte(0x1F); 1678 emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc); 1679 emitByte(0); // 8-bits offset (1 byte) 1680 } 1681 1682 private void addrNop5() { 1683 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 1684 emitByte(0x0F); 1685 emitByte(0x1F); 1686 emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4); 1687 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 1688 emitByte(0); // 8-bits offset (1 byte) 1689 } 1690 1691 private void addrNop7() { 1692 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 1693 emitByte(0x0F); 1694 emitByte(0x1F); 1695 emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc); 1696 emitInt(0); // 32-bits offset (4 bytes) 1697 } 1698 1699 private void addrNop8() { 1700 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 1701 emitByte(0x0F); 1702 emitByte(0x1F); 1703 emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4); 1704 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 1705 emitInt(0); // 32-bits offset (4 bytes) 1706 } 1707 1708 public final void andl(Register dst, int imm32) { 1709 AND.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1710 } 1711 1712 public final void andl(Register dst, Register src) { 1713 AND.rmOp.emit(this, DWORD, dst, src); 1714 } 1715 1716 public final void andpd(Register dst, Register src) { 1717 SSEOp.AND.emit(this, PD, dst, src); 1718 } 1719 1720 public final void andpd(Register dst, AMD64Address src) { 1721 SSEOp.AND.emit(this, PD, dst, src); 1722 } 1723 1724 public final void bsfq(Register dst, Register src) { 1725 prefixq(dst, src); 1726 emitByte(0x0F); 1727 emitByte(0xBC); 1728 emitModRM(dst, src); 1729 } 1730 1731 public final void bsrl(Register dst, Register src) { 1732 prefix(dst, src); 1733 emitByte(0x0F); 1734 emitByte(0xBD); 1735 emitModRM(dst, src); 1736 } 1737 1738 public final void bswapl(Register reg) { 1739 prefix(reg); 1740 emitByte(0x0F); 1741 emitModRM(1, reg); 1742 } 1743 1744 public final void cdql() { 1745 emitByte(0x99); 1746 } 1747 1748 public final void cmovl(ConditionFlag cc, Register dst, Register src) { 1749 prefix(dst, src); 1750 emitByte(0x0F); 1751 emitByte(0x40 | cc.getValue()); 1752 emitModRM(dst, src); 1753 } 1754 1755 public final void cmovl(ConditionFlag cc, Register dst, AMD64Address src) { 1756 prefix(src, dst); 1757 emitByte(0x0F); 1758 emitByte(0x40 | cc.getValue()); 1759 emitOperandHelper(dst, src, 0); 1760 } 1761 1762 public final void cmpl(Register dst, int imm32) { 1763 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1764 } 1765 1766 public final void cmpl(Register dst, Register src) { 1767 CMP.rmOp.emit(this, DWORD, dst, src); 1768 } 1769 1770 public final void cmpl(Register dst, AMD64Address src) { 1771 CMP.rmOp.emit(this, DWORD, dst, src); 1772 } 1773 1774 public final void cmpl(AMD64Address dst, int imm32) { 1775 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1776 } 1777 1778 /** 1779 * The 8-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg into 1780 * adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the compared 1781 * values were equal, and cleared otherwise. 1782 */ 1783 public final void cmpxchgb(Register reg, AMD64Address adr) { // cmpxchg 1784 prefixb(adr, reg); 1785 emitByte(0x0F); 1786 emitByte(0xB0); 1787 emitOperandHelper(reg, adr, 0); 1788 } 1789 1790 /** 1791 * The 16-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg 1792 * into adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the 1793 * compared values were equal, and cleared otherwise. 1794 */ 1795 public final void cmpxchgw(Register reg, AMD64Address adr) { // cmpxchg 1796 emitByte(0x66); // Switch to 16-bit mode. 1797 prefix(adr, reg); 1798 emitByte(0x0F); 1799 emitByte(0xB1); 1800 emitOperandHelper(reg, adr, 0); 1801 } 1802 1803 /** 1804 * The 32-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg 1805 * into adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the 1806 * compared values were equal, and cleared otherwise. 1807 */ 1808 public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg 1809 prefix(adr, reg); 1810 emitByte(0x0F); 1811 emitByte(0xB1); 1812 emitOperandHelper(reg, adr, 0); 1813 } 1814 1815 public final void cvtsi2sdl(Register dst, Register src) { 1816 SSEOp.CVTSI2SD.emit(this, DWORD, dst, src); 1817 } 1818 1819 public final void cvttsd2sil(Register dst, Register src) { 1820 SSEOp.CVTTSD2SI.emit(this, DWORD, dst, src); 1821 } 1822 1823 public final void decl(AMD64Address dst) { 1824 prefix(dst); 1825 emitByte(0xFF); 1826 emitOperandHelper(1, dst, 0); 1827 } 1828 1829 public final void divsd(Register dst, Register src) { 1830 SSEOp.DIV.emit(this, SD, dst, src); 1831 } 1832 1833 public final void hlt() { 1834 emitByte(0xF4); 1835 } 1836 1837 public final void imull(Register dst, Register src, int value) { 1838 if (isByte(value)) { 1839 AMD64RMIOp.IMUL_SX.emit(this, DWORD, dst, src, value); 1840 } else { 1841 AMD64RMIOp.IMUL.emit(this, DWORD, dst, src, value); 1842 } 1843 } 1844 1845 public final void incl(AMD64Address dst) { 1846 prefix(dst); 1847 emitByte(0xFF); 1848 emitOperandHelper(0, dst, 0); 1849 } 1850 1851 public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) { 1852 int shortSize = 2; 1853 int longSize = 6; 1854 long disp = jumpTarget - position(); 1855 if (!forceDisp32 && isByte(disp - shortSize)) { 1856 // 0111 tttn #8-bit disp 1857 emitByte(0x70 | cc.getValue()); 1858 emitByte((int) ((disp - shortSize) & 0xFF)); 1859 } else { 1860 // 0000 1111 1000 tttn #32-bit disp 1861 assert isInt(disp - longSize) : "must be 32bit offset (call4)"; 1862 emitByte(0x0F); 1863 emitByte(0x80 | cc.getValue()); 1864 emitInt((int) (disp - longSize)); 1865 } 1866 } 1867 1868 public final void jcc(ConditionFlag cc, Label l) { 1869 assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc"; 1870 if (l.isBound()) { 1871 jcc(cc, l.position(), false); 1872 } else { 1873 // Note: could eliminate cond. jumps to this jump if condition 1874 // is the same however, seems to be rather unlikely case. 1875 // Note: use jccb() if label to be bound is very close to get 1876 // an 8-bit displacement 1877 l.addPatchAt(position()); 1878 emitByte(0x0F); 1879 emitByte(0x80 | cc.getValue()); 1880 emitInt(0); 1881 } 1882 1883 } 1884 1885 public final void jccb(ConditionFlag cc, Label l) { 1886 if (l.isBound()) { 1887 int shortSize = 2; 1888 int entry = l.position(); 1889 assert isByte(entry - (position() + shortSize)) : "Dispacement too large for a short jmp"; 1890 long disp = entry - position(); 1891 // 0111 tttn #8-bit disp 1892 emitByte(0x70 | cc.getValue()); 1893 emitByte((int) ((disp - shortSize) & 0xFF)); 1894 } else { 1895 l.addPatchAt(position()); 1896 emitByte(0x70 | cc.getValue()); 1897 emitByte(0); 1898 } 1899 } 1900 1901 public final void jmp(int jumpTarget, boolean forceDisp32) { 1902 int shortSize = 2; 1903 int longSize = 5; 1904 long disp = jumpTarget - position(); 1905 if (!forceDisp32 && isByte(disp - shortSize)) { 1906 emitByte(0xEB); 1907 emitByte((int) ((disp - shortSize) & 0xFF)); 1908 } else { 1909 emitByte(0xE9); 1910 emitInt((int) (disp - longSize)); 1911 } 1912 } 1913 1914 @Override 1915 public final void jmp(Label l) { 1916 if (l.isBound()) { 1917 jmp(l.position(), false); 1918 } else { 1919 // By default, forward jumps are always 32-bit displacements, since 1920 // we can't yet know where the label will be bound. If you're sure that 1921 // the forward jump will not run beyond 256 bytes, use jmpb to 1922 // force an 8-bit displacement. 1923 1924 l.addPatchAt(position()); 1925 emitByte(0xE9); 1926 emitInt(0); 1927 } 1928 } 1929 1930 public final void jmp(Register entry) { 1931 prefix(entry); 1932 emitByte(0xFF); 1933 emitModRM(4, entry); 1934 } 1935 1936 public final void jmp(AMD64Address adr) { 1937 prefix(adr); 1938 emitByte(0xFF); 1939 emitOperandHelper(AMD64.rsp, adr, 0); 1940 } 1941 1942 public final void jmpb(Label l) { 1943 if (l.isBound()) { 1944 int shortSize = 2; 1945 int entry = l.position(); 1946 assert isByte((entry - position()) + shortSize) : "Dispacement too large for a short jmp"; 1947 long offs = entry - position(); 1948 emitByte(0xEB); 1949 emitByte((int) ((offs - shortSize) & 0xFF)); 1950 } else { 1951 1952 l.addPatchAt(position()); 1953 emitByte(0xEB); 1954 emitByte(0); 1955 } 1956 } 1957 1958 public final void lead(Register dst, AMD64Address src) { 1959 prefix(src, dst); 1960 emitByte(0x8D); 1961 emitOperandHelper(dst, src, 0); 1962 } 1963 1964 public final void leaq(Register dst, AMD64Address src) { 1965 prefixq(src, dst); 1966 emitByte(0x8D); 1967 emitOperandHelper(dst, src, 0); 1968 } 1969 1970 public final void leave() { 1971 emitByte(0xC9); 1972 } 1973 1974 public final void lock() { 1975 emitByte(0xF0); 1976 } 1977 1978 public final void movapd(Register dst, Register src) { 1979 assert inRC(XMM, dst) && inRC(XMM, src); 1980 simdPrefix(dst, Register.None, src, PD, P_0F, false); 1981 emitByte(0x28); 1982 emitModRM(dst, src); 1983 } 1984 1985 public final void movaps(Register dst, Register src) { 1986 assert inRC(XMM, dst) && inRC(XMM, src); 1987 simdPrefix(dst, Register.None, src, PS, P_0F, false); 1988 emitByte(0x28); 1989 emitModRM(dst, src); 1990 } 1991 1992 public final void movb(AMD64Address dst, int imm8) { 1993 prefix(dst); 1994 emitByte(0xC6); 1995 emitOperandHelper(0, dst, 1); 1996 emitByte(imm8); 1997 } 1998 1999 public final void movb(AMD64Address dst, Register src) { 2000 assert inRC(CPU, src) : "must have byte register"; 2001 prefixb(dst, src); 2002 emitByte(0x88); 2003 emitOperandHelper(src, dst, 0); 2004 } 2005 2006 public final void movl(Register dst, int imm32) { 2007 movl(dst, imm32, false); 2008 } 2009 2010 public final void movl(Register dst, int imm32, boolean annotateImm) { 2011 int insnPos = position(); 2012 prefix(dst); 2013 emitByte(0xB8 + encode(dst)); 2014 int immPos = position(); 2015 emitInt(imm32); 2016 int nextInsnPos = position(); 2017 if (annotateImm && codePatchingAnnotationConsumer != null) { 2018 codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 2019 } 2020 } 2021 2022 public final void movl(Register dst, Register src) { 2023 prefix(dst, src); 2024 emitByte(0x8B); 2025 emitModRM(dst, src); 2026 } 2027 2028 public final void movl(Register dst, AMD64Address src) { 2029 prefix(src, dst); 2030 emitByte(0x8B); 2031 emitOperandHelper(dst, src, 0); 2032 } 2033 2034 /** 2035 * @param wide use 4 byte encoding for displacements that would normally fit in a byte 2036 */ 2037 public final void movl(Register dst, AMD64Address src, boolean wide) { 2038 prefix(src, dst); 2039 emitByte(0x8B); 2040 emitOperandHelper(dst, src, wide, 0); 2041 } 2042 2043 public final void movl(AMD64Address dst, int imm32) { 2044 prefix(dst); 2045 emitByte(0xC7); 2046 emitOperandHelper(0, dst, 4); 2047 emitInt(imm32); 2048 } 2049 2050 public final void movl(AMD64Address dst, Register src) { 2051 prefix(dst, src); 2052 emitByte(0x89); 2053 emitOperandHelper(src, dst, 0); 2054 } 2055 2056 /** 2057 * New CPUs require use of movsd and movss to avoid partial register stall when loading from 2058 * memory. But for old Opteron use movlpd instead of movsd. The selection is done in 2059 * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and 2060 * {@link AMD64MacroAssembler#movflt(Register, Register)}. 2061 */ 2062 public final void movlpd(Register dst, AMD64Address src) { 2063 assert inRC(XMM, dst); 2064 simdPrefix(dst, dst, src, PD, P_0F, false); 2065 emitByte(0x12); 2066 emitOperandHelper(dst, src, 0); 2067 } 2068 2069 public final void movlhps(Register dst, Register src) { 2070 assert inRC(XMM, dst) && inRC(XMM, src); 2071 simdPrefix(dst, src, src, PS, P_0F, false); 2072 emitByte(0x16); 2073 emitModRM(dst, src); 2074 } 2075 2076 public final void movq(Register dst, AMD64Address src) { 2077 movq(dst, src, false); 2078 } 2079 2080 public final void movq(Register dst, AMD64Address src, boolean force4BytesDisplacement) { 2081 if (inRC(XMM, dst)) { 2082 // Insn: MOVQ xmm, r/m64 2083 // Code: F3 0F 7E /r 2084 // An alternative instruction would be 66 REX.W 0F 6E /r. We prefer the REX.W free 2085 // format, because it would allow us to emit 2-bytes-prefixed vex-encoding instruction 2086 // when applicable. 2087 simdPrefix(dst, Register.None, src, SS, P_0F, false); 2088 emitByte(0x7E); 2089 emitOperandHelper(dst, src, force4BytesDisplacement, 0); 2090 } else { 2091 // gpr version of movq 2092 prefixq(src, dst); 2093 emitByte(0x8B); 2094 emitOperandHelper(dst, src, force4BytesDisplacement, 0); 2095 } 2096 } 2097 2098 public final void movq(Register dst, Register src) { 2099 assert inRC(CPU, dst) && inRC(CPU, src); 2100 prefixq(dst, src); 2101 emitByte(0x8B); 2102 emitModRM(dst, src); 2103 } 2104 2105 public final void movq(AMD64Address dst, Register src) { 2106 if (inRC(XMM, src)) { 2107 // Insn: MOVQ r/m64, xmm 2108 // Code: 66 0F D6 /r 2109 // An alternative instruction would be 66 REX.W 0F 7E /r. We prefer the REX.W free 2110 // format, because it would allow us to emit 2-bytes-prefixed vex-encoding instruction 2111 // when applicable. 2112 simdPrefix(src, Register.None, dst, PD, P_0F, false); 2113 emitByte(0xD6); 2114 emitOperandHelper(src, dst, 0); 2115 } else { 2116 // gpr version of movq 2117 prefixq(dst, src); 2118 emitByte(0x89); 2119 emitOperandHelper(src, dst, 0); 2120 } 2121 } 2122 2123 public final void movsbl(Register dst, AMD64Address src) { 2124 prefix(src, dst); 2125 emitByte(0x0F); 2126 emitByte(0xBE); 2127 emitOperandHelper(dst, src, 0); 2128 } 2129 2130 public final void movsbl(Register dst, Register src) { 2131 prefix(dst, false, src, true); 2132 emitByte(0x0F); 2133 emitByte(0xBE); 2134 emitModRM(dst, src); 2135 } 2136 2137 public final void movsbq(Register dst, AMD64Address src) { 2138 prefixq(src, dst); 2139 emitByte(0x0F); 2140 emitByte(0xBE); 2141 emitOperandHelper(dst, src, 0); 2142 } 2143 2144 public final void movsbq(Register dst, Register src) { 2145 prefixq(dst, src); 2146 emitByte(0x0F); 2147 emitByte(0xBE); 2148 emitModRM(dst, src); 2149 } 2150 2151 public final void movsd(Register dst, Register src) { 2152 AMD64RMOp.MOVSD.emit(this, SD, dst, src); 2153 } 2154 2155 public final void movsd(Register dst, AMD64Address src) { 2156 AMD64RMOp.MOVSD.emit(this, SD, dst, src); 2157 } 2158 2159 public final void movsd(AMD64Address dst, Register src) { 2160 AMD64MROp.MOVSD.emit(this, SD, dst, src); 2161 } 2162 2163 public final void movss(Register dst, Register src) { 2164 AMD64RMOp.MOVSS.emit(this, SS, dst, src); 2165 } 2166 2167 public final void movss(Register dst, AMD64Address src) { 2168 AMD64RMOp.MOVSS.emit(this, SS, dst, src); 2169 } 2170 2171 public final void movss(AMD64Address dst, Register src) { 2172 AMD64MROp.MOVSS.emit(this, SS, dst, src); 2173 } 2174 2175 public final void mulpd(Register dst, Register src) { 2176 SSEOp.MUL.emit(this, PD, dst, src); 2177 } 2178 2179 public final void mulpd(Register dst, AMD64Address src) { 2180 SSEOp.MUL.emit(this, PD, dst, src); 2181 } 2182 2183 public final void mulsd(Register dst, Register src) { 2184 SSEOp.MUL.emit(this, SD, dst, src); 2185 } 2186 2187 public final void mulsd(Register dst, AMD64Address src) { 2188 SSEOp.MUL.emit(this, SD, dst, src); 2189 } 2190 2191 public final void mulss(Register dst, Register src) { 2192 SSEOp.MUL.emit(this, SS, dst, src); 2193 } 2194 2195 public final void movswl(Register dst, AMD64Address src) { 2196 prefix(src, dst); 2197 emitByte(0x0F); 2198 emitByte(0xBF); 2199 emitOperandHelper(dst, src, 0); 2200 } 2201 2202 public final void movw(AMD64Address dst, int imm16) { 2203 emitByte(0x66); // switch to 16-bit mode 2204 prefix(dst); 2205 emitByte(0xC7); 2206 emitOperandHelper(0, dst, 2); 2207 emitShort(imm16); 2208 } 2209 2210 public final void movw(AMD64Address dst, Register src) { 2211 emitByte(0x66); 2212 prefix(dst, src); 2213 emitByte(0x89); 2214 emitOperandHelper(src, dst, 0); 2215 } 2216 2217 public final void movzbl(Register dst, AMD64Address src) { 2218 prefix(src, dst); 2219 emitByte(0x0F); 2220 emitByte(0xB6); 2221 emitOperandHelper(dst, src, 0); 2222 } 2223 2224 public final void movzbl(Register dst, Register src) { 2225 AMD64RMOp.MOVZXB.emit(this, DWORD, dst, src); 2226 } 2227 2228 public final void movzbq(Register dst, Register src) { 2229 AMD64RMOp.MOVZXB.emit(this, QWORD, dst, src); 2230 } 2231 2232 public final void movzwl(Register dst, AMD64Address src) { 2233 prefix(src, dst); 2234 emitByte(0x0F); 2235 emitByte(0xB7); 2236 emitOperandHelper(dst, src, 0); 2237 } 2238 2239 public final void negl(Register dst) { 2240 NEG.emit(this, DWORD, dst); 2241 } 2242 2243 public final void notl(Register dst) { 2244 NOT.emit(this, DWORD, dst); 2245 } 2246 2247 public final void notq(Register dst) { 2248 NOT.emit(this, QWORD, dst); 2249 } 2250 2251 @Override 2252 public final void ensureUniquePC() { 2253 nop(); 2254 } 2255 2256 public final void nop() { 2257 nop(1); 2258 } 2259 2260 public void nop(int count) { 2261 int i = count; 2262 if (UseNormalNop) { 2263 assert i > 0 : " "; 2264 // The fancy nops aren't currently recognized by debuggers making it a 2265 // pain to disassemble code while debugging. If assert are on clearly 2266 // speed is not an issue so simply use the single byte traditional nop 2267 // to do alignment. 2268 2269 for (; i > 0; i--) { 2270 emitByte(0x90); 2271 } 2272 return; 2273 } 2274 2275 if (UseAddressNop) { 2276 // 2277 // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD. 2278 // 1: 0x90 2279 // 2: 0x66 0x90 2280 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2281 // 4: 0x0F 0x1F 0x40 0x00 2282 // 5: 0x0F 0x1F 0x44 0x00 0x00 2283 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2284 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2285 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2286 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2287 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2288 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2289 2290 // The rest coding is AMD specific - use consecutive Address nops 2291 2292 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2293 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2294 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2295 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2296 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2297 // Size prefixes (0x66) are added for larger sizes 2298 2299 while (i >= 22) { 2300 i -= 11; 2301 emitByte(0x66); // size prefix 2302 emitByte(0x66); // size prefix 2303 emitByte(0x66); // size prefix 2304 addrNop8(); 2305 } 2306 // Generate first nop for size between 21-12 2307 switch (i) { 2308 case 21: 2309 i -= 11; 2310 emitByte(0x66); // size prefix 2311 emitByte(0x66); // size prefix 2312 emitByte(0x66); // size prefix 2313 addrNop8(); 2314 break; 2315 case 20: 2316 case 19: 2317 i -= 10; 2318 emitByte(0x66); // size prefix 2319 emitByte(0x66); // size prefix 2320 addrNop8(); 2321 break; 2322 case 18: 2323 case 17: 2324 i -= 9; 2325 emitByte(0x66); // size prefix 2326 addrNop8(); 2327 break; 2328 case 16: 2329 case 15: 2330 i -= 8; 2331 addrNop8(); 2332 break; 2333 case 14: 2334 case 13: 2335 i -= 7; 2336 addrNop7(); 2337 break; 2338 case 12: 2339 i -= 6; 2340 emitByte(0x66); // size prefix 2341 addrNop5(); 2342 break; 2343 default: 2344 assert i < 12; 2345 } 2346 2347 // Generate second nop for size between 11-1 2348 switch (i) { 2349 case 11: 2350 emitByte(0x66); // size prefix 2351 emitByte(0x66); // size prefix 2352 emitByte(0x66); // size prefix 2353 addrNop8(); 2354 break; 2355 case 10: 2356 emitByte(0x66); // size prefix 2357 emitByte(0x66); // size prefix 2358 addrNop8(); 2359 break; 2360 case 9: 2361 emitByte(0x66); // size prefix 2362 addrNop8(); 2363 break; 2364 case 8: 2365 addrNop8(); 2366 break; 2367 case 7: 2368 addrNop7(); 2369 break; 2370 case 6: 2371 emitByte(0x66); // size prefix 2372 addrNop5(); 2373 break; 2374 case 5: 2375 addrNop5(); 2376 break; 2377 case 4: 2378 addrNop4(); 2379 break; 2380 case 3: 2381 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2382 emitByte(0x66); // size prefix 2383 emitByte(0x66); // size prefix 2384 emitByte(0x90); // nop 2385 break; 2386 case 2: 2387 emitByte(0x66); // size prefix 2388 emitByte(0x90); // nop 2389 break; 2390 case 1: 2391 emitByte(0x90); // nop 2392 break; 2393 default: 2394 assert i == 0; 2395 } 2396 return; 2397 } 2398 2399 // Using nops with size prefixes "0x66 0x90". 2400 // From AMD Optimization Guide: 2401 // 1: 0x90 2402 // 2: 0x66 0x90 2403 // 3: 0x66 0x66 0x90 2404 // 4: 0x66 0x66 0x66 0x90 2405 // 5: 0x66 0x66 0x90 0x66 0x90 2406 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2407 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2408 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2409 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2410 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2411 // 2412 while (i > 12) { 2413 i -= 4; 2414 emitByte(0x66); // size prefix 2415 emitByte(0x66); 2416 emitByte(0x66); 2417 emitByte(0x90); // nop 2418 } 2419 // 1 - 12 nops 2420 if (i > 8) { 2421 if (i > 9) { 2422 i -= 1; 2423 emitByte(0x66); 2424 } 2425 i -= 3; 2426 emitByte(0x66); 2427 emitByte(0x66); 2428 emitByte(0x90); 2429 } 2430 // 1 - 8 nops 2431 if (i > 4) { 2432 if (i > 6) { 2433 i -= 1; 2434 emitByte(0x66); 2435 } 2436 i -= 3; 2437 emitByte(0x66); 2438 emitByte(0x66); 2439 emitByte(0x90); 2440 } 2441 switch (i) { 2442 case 4: 2443 emitByte(0x66); 2444 emitByte(0x66); 2445 emitByte(0x66); 2446 emitByte(0x90); 2447 break; 2448 case 3: 2449 emitByte(0x66); 2450 emitByte(0x66); 2451 emitByte(0x90); 2452 break; 2453 case 2: 2454 emitByte(0x66); 2455 emitByte(0x90); 2456 break; 2457 case 1: 2458 emitByte(0x90); 2459 break; 2460 default: 2461 assert i == 0; 2462 } 2463 } 2464 2465 public final void orl(Register dst, Register src) { 2466 OR.rmOp.emit(this, DWORD, dst, src); 2467 } 2468 2469 public final void orl(Register dst, int imm32) { 2470 OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2471 } 2472 2473 // Insn: VPACKUSWB xmm1, xmm2, xmm3/m128 2474 // ----- 2475 // Insn: VPACKUSWB xmm1, xmm1, xmm2 2476 2477 public final void packuswb(Register dst, Register src) { 2478 assert inRC(XMM, dst) && inRC(XMM, src); 2479 // Code: VEX.NDS.128.66.0F.WIG 67 /r 2480 simdPrefix(dst, dst, src, PD, P_0F, false); 2481 emitByte(0x67); 2482 emitModRM(dst, src); 2483 } 2484 2485 public final void pop(Register dst) { 2486 prefix(dst); 2487 emitByte(0x58 + encode(dst)); 2488 } 2489 2490 public void popfq() { 2491 emitByte(0x9D); 2492 } 2493 2494 public final void ptest(Register dst, Register src) { 2495 assert supports(CPUFeature.SSE4_1); 2496 assert inRC(XMM, dst) && inRC(XMM, src); 2497 simdPrefix(dst, Register.None, src, PD, P_0F38, false); 2498 emitByte(0x17); 2499 emitModRM(dst, src); 2500 } 2501 2502 public final void pcmpeqb(Register dst, Register src) { 2503 assert supports(CPUFeature.SSE2); 2504 assert inRC(XMM, dst) && inRC(XMM, src); 2505 simdPrefix(dst, dst, src, PD, P_0F, false); 2506 emitByte(0x74); 2507 emitModRM(dst, src); 2508 } 2509 2510 public final void pcmpeqw(Register dst, Register src) { 2511 assert supports(CPUFeature.SSE2); 2512 assert inRC(XMM, dst) && inRC(XMM, src); 2513 simdPrefix(dst, dst, src, PD, P_0F, false); 2514 emitByte(0x75); 2515 emitModRM(dst, src); 2516 } 2517 2518 public final void pcmpeqd(Register dst, Register src) { 2519 assert supports(CPUFeature.SSE2); 2520 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2521 simdPrefix(dst, dst, src, PD, P_0F, false); 2522 emitByte(0x76); 2523 emitModRM(dst, src); 2524 } 2525 2526 public final void pcmpestri(Register dst, AMD64Address src, int imm8) { 2527 assert supports(CPUFeature.SSE4_2); 2528 assert inRC(XMM, dst); 2529 simdPrefix(dst, Register.None, src, PD, P_0F3A, false); 2530 emitByte(0x61); 2531 emitOperandHelper(dst, src, 0); 2532 emitByte(imm8); 2533 } 2534 2535 public final void pcmpestri(Register dst, Register src, int imm8) { 2536 assert supports(CPUFeature.SSE4_2); 2537 assert inRC(XMM, dst) && inRC(XMM, src); 2538 simdPrefix(dst, Register.None, src, PD, P_0F3A, false); 2539 emitByte(0x61); 2540 emitModRM(dst, src); 2541 emitByte(imm8); 2542 } 2543 2544 public final void pmovmskb(Register dst, Register src) { 2545 assert supports(CPUFeature.SSE2); 2546 assert inRC(CPU, dst) && inRC(XMM, src); 2547 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2548 emitByte(0xD7); 2549 emitModRM(dst, src); 2550 } 2551 2552 // Insn: VPMOVZXBW xmm1, xmm2/m64 2553 2554 public final void pmovzxbw(Register dst, AMD64Address src) { 2555 assert supports(CPUFeature.SSE4_1); 2556 assert inRC(XMM, dst); 2557 simdPrefix(dst, Register.None, src, PD, P_0F38, false); 2558 emitByte(0x30); 2559 emitOperandHelper(dst, src, 0); 2560 } 2561 2562 public final void pmovzxbw(Register dst, Register src) { 2563 assert supports(CPUFeature.SSE4_1); 2564 assert inRC(XMM, dst) && inRC(XMM, src); 2565 simdPrefix(dst, Register.None, src, PD, P_0F38, false); 2566 emitByte(0x30); 2567 emitModRM(dst, src); 2568 } 2569 2570 public final void push(Register src) { 2571 prefix(src); 2572 emitByte(0x50 + encode(src)); 2573 } 2574 2575 public void pushfq() { 2576 emitByte(0x9c); 2577 } 2578 2579 public final void paddd(Register dst, Register src) { 2580 assert inRC(XMM, dst) && inRC(XMM, src); 2581 simdPrefix(dst, dst, src, PD, P_0F, false); 2582 emitByte(0xFE); 2583 emitModRM(dst, src); 2584 } 2585 2586 public final void paddq(Register dst, Register src) { 2587 assert inRC(XMM, dst) && inRC(XMM, src); 2588 simdPrefix(dst, dst, src, PD, P_0F, false); 2589 emitByte(0xD4); 2590 emitModRM(dst, src); 2591 } 2592 2593 public final void pextrw(Register dst, Register src, int imm8) { 2594 assert inRC(CPU, dst) && inRC(XMM, src); 2595 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2596 emitByte(0xC5); 2597 emitModRM(dst, src); 2598 emitByte(imm8); 2599 } 2600 2601 public final void pinsrw(Register dst, Register src, int imm8) { 2602 assert inRC(XMM, dst) && inRC(CPU, src); 2603 simdPrefix(dst, dst, src, PD, P_0F, false); 2604 emitByte(0xC4); 2605 emitModRM(dst, src); 2606 emitByte(imm8); 2607 } 2608 2609 public final void por(Register dst, Register src) { 2610 assert inRC(XMM, dst) && inRC(XMM, src); 2611 simdPrefix(dst, dst, src, PD, P_0F, false); 2612 emitByte(0xEB); 2613 emitModRM(dst, src); 2614 } 2615 2616 public final void pand(Register dst, Register src) { 2617 assert inRC(XMM, dst) && inRC(XMM, src); 2618 simdPrefix(dst, dst, src, PD, P_0F, false); 2619 emitByte(0xDB); 2620 emitModRM(dst, src); 2621 } 2622 2623 public final void pxor(Register dst, Register src) { 2624 assert inRC(XMM, dst) && inRC(XMM, src); 2625 simdPrefix(dst, dst, src, PD, P_0F, false); 2626 emitByte(0xEF); 2627 emitModRM(dst, src); 2628 } 2629 2630 public final void pslld(Register dst, int imm8) { 2631 assert isUByte(imm8) : "invalid value"; 2632 assert inRC(XMM, dst); 2633 // XMM6 is for /6 encoding: 66 0F 72 /6 ib 2634 simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false); 2635 emitByte(0x72); 2636 emitModRM(6, dst); 2637 emitByte(imm8 & 0xFF); 2638 } 2639 2640 public final void psllq(Register dst, Register shift) { 2641 assert inRC(XMM, dst) && inRC(XMM, shift); 2642 simdPrefix(dst, dst, shift, PD, P_0F, false); 2643 emitByte(0xF3); 2644 emitModRM(dst, shift); 2645 } 2646 2647 public final void psllq(Register dst, int imm8) { 2648 assert isUByte(imm8) : "invalid value"; 2649 assert inRC(XMM, dst); 2650 // XMM6 is for /6 encoding: 66 0F 73 /6 ib 2651 simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false); 2652 emitByte(0x73); 2653 emitModRM(6, dst); 2654 emitByte(imm8); 2655 } 2656 2657 public final void psrad(Register dst, int imm8) { 2658 assert isUByte(imm8) : "invalid value"; 2659 assert inRC(XMM, dst); 2660 // XMM4 is for /4 encoding: 66 0F 72 /4 ib 2661 simdPrefix(AMD64.xmm4, dst, dst, PD, P_0F, false); 2662 emitByte(0x72); 2663 emitModRM(4, dst); 2664 emitByte(imm8); 2665 } 2666 2667 public final void psrld(Register dst, int imm8) { 2668 assert isUByte(imm8) : "invalid value"; 2669 assert inRC(XMM, dst); 2670 // XMM2 is for /2 encoding: 66 0F 72 /2 ib 2671 simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false); 2672 emitByte(0x72); 2673 emitModRM(2, dst); 2674 emitByte(imm8); 2675 } 2676 2677 public final void psrlq(Register dst, int imm8) { 2678 assert isUByte(imm8) : "invalid value"; 2679 assert inRC(XMM, dst); 2680 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 2681 simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false); 2682 emitByte(0x73); 2683 emitModRM(2, dst); 2684 emitByte(imm8); 2685 } 2686 2687 public final void psrldq(Register dst, int imm8) { 2688 assert isUByte(imm8) : "invalid value"; 2689 assert inRC(XMM, dst); 2690 simdPrefix(AMD64.xmm3, dst, dst, PD, P_0F, false); 2691 emitByte(0x73); 2692 emitModRM(3, dst); 2693 emitByte(imm8); 2694 } 2695 2696 public final void pshufb(Register dst, Register src) { 2697 assert supports(CPUFeature.SSSE3); 2698 assert inRC(XMM, dst) && inRC(XMM, src); 2699 simdPrefix(dst, dst, src, PD, P_0F38, false); 2700 emitByte(0x00); 2701 emitModRM(dst, src); 2702 } 2703 2704 public final void pshuflw(Register dst, Register src, int imm8) { 2705 assert supports(CPUFeature.SSE2); 2706 assert isUByte(imm8) : "invalid value"; 2707 assert inRC(XMM, dst) && inRC(XMM, src); 2708 simdPrefix(dst, Register.None, src, SD, P_0F, false); 2709 emitByte(0x70); 2710 emitModRM(dst, src); 2711 emitByte(imm8); 2712 } 2713 2714 public final void pshufd(Register dst, Register src, int imm8) { 2715 assert isUByte(imm8) : "invalid value"; 2716 assert inRC(XMM, dst) && inRC(XMM, src); 2717 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2718 emitByte(0x70); 2719 emitModRM(dst, src); 2720 emitByte(imm8); 2721 } 2722 2723 public final void psubd(Register dst, Register src) { 2724 assert inRC(XMM, dst) && inRC(XMM, src); 2725 simdPrefix(dst, dst, src, PD, P_0F, false); 2726 emitByte(0xFA); 2727 emitModRM(dst, src); 2728 } 2729 2730 public final void punpcklbw(Register dst, Register src) { 2731 assert supports(CPUFeature.SSE2); 2732 assert inRC(XMM, dst) && inRC(XMM, src); 2733 simdPrefix(dst, dst, src, PD, P_0F, false); 2734 emitByte(0x60); 2735 emitModRM(dst, src); 2736 } 2737 2738 public final void rcpps(Register dst, Register src) { 2739 assert inRC(XMM, dst) && inRC(XMM, src); 2740 simdPrefix(dst, Register.None, src, PS, P_0F, false); 2741 emitByte(0x53); 2742 emitModRM(dst, src); 2743 } 2744 2745 public final void ret(int imm16) { 2746 if (imm16 == 0) { 2747 emitByte(0xC3); 2748 } else { 2749 emitByte(0xC2); 2750 emitShort(imm16); 2751 } 2752 } 2753 2754 public final void sarl(Register dst, int imm8) { 2755 prefix(dst); 2756 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2757 if (imm8 == 1) { 2758 emitByte(0xD1); 2759 emitModRM(7, dst); 2760 } else { 2761 emitByte(0xC1); 2762 emitModRM(7, dst); 2763 emitByte(imm8); 2764 } 2765 } 2766 2767 public final void shll(Register dst, int imm8) { 2768 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2769 prefix(dst); 2770 if (imm8 == 1) { 2771 emitByte(0xD1); 2772 emitModRM(4, dst); 2773 } else { 2774 emitByte(0xC1); 2775 emitModRM(4, dst); 2776 emitByte(imm8); 2777 } 2778 } 2779 2780 public final void shll(Register dst) { 2781 // Multiply dst by 2, CL times. 2782 prefix(dst); 2783 emitByte(0xD3); 2784 emitModRM(4, dst); 2785 } 2786 2787 // Insn: SHLX r32a, r/m32, r32b 2788 2789 public final void shlxl(Register dst, Register src1, Register src2) { 2790 VexGeneralPurposeRMVOp.SHLX.emit(this, AVXSize.DWORD, dst, src1, src2); 2791 } 2792 2793 public final void shrl(Register dst, int imm8) { 2794 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2795 prefix(dst); 2796 emitByte(0xC1); 2797 emitModRM(5, dst); 2798 emitByte(imm8); 2799 } 2800 2801 public final void shrl(Register dst) { 2802 // Unsigned divide dst by 2, CL times. 2803 prefix(dst); 2804 emitByte(0xD3); 2805 emitModRM(5, dst); 2806 } 2807 2808 public final void subl(AMD64Address dst, int imm32) { 2809 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2810 } 2811 2812 public final void subl(Register dst, int imm32) { 2813 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2814 } 2815 2816 public final void subl(Register dst, Register src) { 2817 SUB.rmOp.emit(this, DWORD, dst, src); 2818 } 2819 2820 public final void subpd(Register dst, Register src) { 2821 SSEOp.SUB.emit(this, PD, dst, src); 2822 } 2823 2824 public final void subsd(Register dst, Register src) { 2825 SSEOp.SUB.emit(this, SD, dst, src); 2826 } 2827 2828 public final void subsd(Register dst, AMD64Address src) { 2829 SSEOp.SUB.emit(this, SD, dst, src); 2830 } 2831 2832 public final void testl(Register dst, int imm32) { 2833 // not using emitArith because test 2834 // doesn't support sign-extension of 2835 // 8bit operands 2836 if (dst.encoding == 0) { 2837 emitByte(0xA9); 2838 } else { 2839 prefix(dst); 2840 emitByte(0xF7); 2841 emitModRM(0, dst); 2842 } 2843 emitInt(imm32); 2844 } 2845 2846 public final void testl(Register dst, Register src) { 2847 prefix(dst, src); 2848 emitByte(0x85); 2849 emitModRM(dst, src); 2850 } 2851 2852 public final void testl(Register dst, AMD64Address src) { 2853 prefix(src, dst); 2854 emitByte(0x85); 2855 emitOperandHelper(dst, src, 0); 2856 } 2857 2858 public final void unpckhpd(Register dst, Register src) { 2859 assert inRC(XMM, dst) && inRC(XMM, src); 2860 simdPrefix(dst, dst, src, PD, P_0F, false); 2861 emitByte(0x15); 2862 emitModRM(dst, src); 2863 } 2864 2865 public final void unpcklpd(Register dst, Register src) { 2866 assert inRC(XMM, dst) && inRC(XMM, src); 2867 simdPrefix(dst, dst, src, PD, P_0F, false); 2868 emitByte(0x14); 2869 emitModRM(dst, src); 2870 } 2871 2872 public final void xorl(Register dst, Register src) { 2873 XOR.rmOp.emit(this, DWORD, dst, src); 2874 } 2875 2876 public final void xorpd(Register dst, Register src) { 2877 SSEOp.XOR.emit(this, PD, dst, src); 2878 } 2879 2880 public final void xorps(Register dst, Register src) { 2881 SSEOp.XOR.emit(this, PS, dst, src); 2882 } 2883 2884 protected final void decl(Register dst) { 2885 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 2886 prefix(dst); 2887 emitByte(0xFF); 2888 emitModRM(1, dst); 2889 } 2890 2891 protected final void incl(Register dst) { 2892 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 2893 prefix(dst); 2894 emitByte(0xFF); 2895 emitModRM(0, dst); 2896 } 2897 2898 public final void addq(Register dst, int imm32) { 2899 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 2900 } 2901 2902 public final void addq(AMD64Address dst, int imm32) { 2903 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 2904 } 2905 2906 public final void addq(Register dst, Register src) { 2907 ADD.rmOp.emit(this, QWORD, dst, src); 2908 } 2909 2910 public final void addq(AMD64Address dst, Register src) { 2911 ADD.mrOp.emit(this, QWORD, dst, src); 2912 } 2913 2914 public final void andq(Register dst, int imm32) { 2915 AND.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 2916 } 2917 2918 public final void bsrq(Register dst, Register src) { 2919 prefixq(dst, src); 2920 emitByte(0x0F); 2921 emitByte(0xBD); 2922 emitModRM(dst, src); 2923 } 2924 2925 public final void bswapq(Register reg) { 2926 prefixq(reg); 2927 emitByte(0x0F); 2928 emitByte(0xC8 + encode(reg)); 2929 } 2930 2931 public final void cdqq() { 2932 rexw(); 2933 emitByte(0x99); 2934 } 2935 2936 public final void cmovq(ConditionFlag cc, Register dst, Register src) { 2937 prefixq(dst, src); 2938 emitByte(0x0F); 2939 emitByte(0x40 | cc.getValue()); 2940 emitModRM(dst, src); 2941 } 2942 2943 public final void setb(ConditionFlag cc, Register dst) { 2944 prefix(dst, true); 2945 emitByte(0x0F); 2946 emitByte(0x90 | cc.getValue()); 2947 emitModRM(0, dst); 2948 } 2949 2950 public final void cmovq(ConditionFlag cc, Register dst, AMD64Address src) { 2951 prefixq(src, dst); 2952 emitByte(0x0F); 2953 emitByte(0x40 | cc.getValue()); 2954 emitOperandHelper(dst, src, 0); 2955 } 2956 2957 public final void cmpq(Register dst, int imm32) { 2958 CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 2959 } 2960 2961 public final void cmpq(Register dst, Register src) { 2962 CMP.rmOp.emit(this, QWORD, dst, src); 2963 } 2964 2965 public final void cmpq(Register dst, AMD64Address src) { 2966 CMP.rmOp.emit(this, QWORD, dst, src); 2967 } 2968 2969 public final void cmpxchgq(Register reg, AMD64Address adr) { 2970 prefixq(adr, reg); 2971 emitByte(0x0F); 2972 emitByte(0xB1); 2973 emitOperandHelper(reg, adr, 0); 2974 } 2975 2976 public final void cvtdq2pd(Register dst, Register src) { 2977 assert inRC(XMM, dst) && inRC(XMM, src); 2978 simdPrefix(dst, Register.None, src, SS, P_0F, false); 2979 emitByte(0xE6); 2980 emitModRM(dst, src); 2981 } 2982 2983 public final void cvtsi2sdq(Register dst, Register src) { 2984 SSEOp.CVTSI2SD.emit(this, QWORD, dst, src); 2985 } 2986 2987 public final void cvttsd2siq(Register dst, Register src) { 2988 SSEOp.CVTTSD2SI.emit(this, QWORD, dst, src); 2989 } 2990 2991 public final void cvttpd2dq(Register dst, Register src) { 2992 assert inRC(XMM, dst) && inRC(XMM, src); 2993 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2994 emitByte(0xE6); 2995 emitModRM(dst, src); 2996 } 2997 2998 public final void decq(Register dst) { 2999 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 3000 prefixq(dst); 3001 emitByte(0xFF); 3002 emitModRM(1, dst); 3003 } 3004 3005 public final void decq(AMD64Address dst) { 3006 DEC.emit(this, QWORD, dst); 3007 } 3008 3009 public final void imulq(Register dst, Register src) { 3010 prefixq(dst, src); 3011 emitByte(0x0F); 3012 emitByte(0xAF); 3013 emitModRM(dst, src); 3014 } 3015 3016 public final void incq(Register dst) { 3017 // Don't use it directly. Use Macroincrementq() instead. 3018 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3019 prefixq(dst); 3020 emitByte(0xFF); 3021 emitModRM(0, dst); 3022 } 3023 3024 public final void incq(AMD64Address dst) { 3025 INC.emit(this, QWORD, dst); 3026 } 3027 3028 public final void movq(Register dst, long imm64) { 3029 movq(dst, imm64, false); 3030 } 3031 3032 public final void movq(Register dst, long imm64, boolean annotateImm) { 3033 int insnPos = position(); 3034 prefixq(dst); 3035 emitByte(0xB8 + encode(dst)); 3036 int immPos = position(); 3037 emitLong(imm64); 3038 int nextInsnPos = position(); 3039 if (annotateImm && codePatchingAnnotationConsumer != null) { 3040 codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 3041 } 3042 } 3043 3044 public final void movslq(Register dst, int imm32) { 3045 prefixq(dst); 3046 emitByte(0xC7); 3047 emitModRM(0, dst); 3048 emitInt(imm32); 3049 } 3050 3051 public final void movdq(Register dst, AMD64Address src) { 3052 AMD64RMOp.MOVQ.emit(this, QWORD, dst, src); 3053 } 3054 3055 public final void movdq(AMD64Address dst, Register src) { 3056 AMD64MROp.MOVQ.emit(this, QWORD, dst, src); 3057 } 3058 3059 public final void movdq(Register dst, Register src) { 3060 if (inRC(XMM, dst) && inRC(CPU, src)) { 3061 AMD64RMOp.MOVQ.emit(this, QWORD, dst, src); 3062 } else if (inRC(XMM, src) && inRC(CPU, dst)) { 3063 AMD64MROp.MOVQ.emit(this, QWORD, dst, src); 3064 } else { 3065 throw new InternalError("should not reach here"); 3066 } 3067 } 3068 3069 public final void movdl(Register dst, Register src) { 3070 if (inRC(XMM, dst) && inRC(CPU, src)) { 3071 AMD64RMOp.MOVD.emit(this, DWORD, dst, src); 3072 } else if (inRC(XMM, src) && inRC(CPU, dst)) { 3073 AMD64MROp.MOVD.emit(this, DWORD, dst, src); 3074 } else { 3075 throw new InternalError("should not reach here"); 3076 } 3077 } 3078 3079 public final void movdl(Register dst, AMD64Address src) { 3080 AMD64RMOp.MOVD.emit(this, DWORD, dst, src); 3081 } 3082 3083 public final void movddup(Register dst, Register src) { 3084 assert supports(CPUFeature.SSE3); 3085 assert inRC(XMM, dst) && inRC(XMM, src); 3086 simdPrefix(dst, Register.None, src, SD, P_0F, false); 3087 emitByte(0x12); 3088 emitModRM(dst, src); 3089 } 3090 3091 public final void movdqu(Register dst, AMD64Address src) { 3092 assert inRC(XMM, dst); 3093 simdPrefix(dst, Register.None, src, SS, P_0F, false); 3094 emitByte(0x6F); 3095 emitOperandHelper(dst, src, 0); 3096 } 3097 3098 public final void movdqu(Register dst, Register src) { 3099 assert inRC(XMM, dst) && inRC(XMM, src); 3100 simdPrefix(dst, Register.None, src, SS, P_0F, false); 3101 emitByte(0x6F); 3102 emitModRM(dst, src); 3103 } 3104 3105 // Insn: VMOVDQU xmm2/m128, xmm1 3106 3107 public final void movdqu(AMD64Address dst, Register src) { 3108 assert inRC(XMM, src); 3109 // Code: VEX.128.F3.0F.WIG 7F /r 3110 simdPrefix(src, Register.None, dst, SS, P_0F, false); 3111 emitByte(0x7F); 3112 emitOperandHelper(src, dst, 0); 3113 } 3114 3115 public final void movslq(AMD64Address dst, int imm32) { 3116 prefixq(dst); 3117 emitByte(0xC7); 3118 emitOperandHelper(0, dst, 4); 3119 emitInt(imm32); 3120 } 3121 3122 public final void movslq(Register dst, AMD64Address src) { 3123 prefixq(src, dst); 3124 emitByte(0x63); 3125 emitOperandHelper(dst, src, 0); 3126 } 3127 3128 public final void movslq(Register dst, Register src) { 3129 prefixq(dst, src); 3130 emitByte(0x63); 3131 emitModRM(dst, src); 3132 } 3133 3134 public final void negq(Register dst) { 3135 prefixq(dst); 3136 emitByte(0xF7); 3137 emitModRM(3, dst); 3138 } 3139 3140 public final void orq(Register dst, Register src) { 3141 OR.rmOp.emit(this, QWORD, dst, src); 3142 } 3143 3144 public final void shlq(Register dst, int imm8) { 3145 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3146 prefixq(dst); 3147 if (imm8 == 1) { 3148 emitByte(0xD1); 3149 emitModRM(4, dst); 3150 } else { 3151 emitByte(0xC1); 3152 emitModRM(4, dst); 3153 emitByte(imm8); 3154 } 3155 } 3156 3157 public final void shlq(Register dst) { 3158 // Multiply dst by 2, CL times. 3159 prefixq(dst); 3160 emitByte(0xD3); 3161 emitModRM(4, dst); 3162 } 3163 3164 public final void shrq(Register dst, int imm8) { 3165 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3166 prefixq(dst); 3167 if (imm8 == 1) { 3168 emitByte(0xD1); 3169 emitModRM(5, dst); 3170 } else { 3171 emitByte(0xC1); 3172 emitModRM(5, dst); 3173 emitByte(imm8); 3174 } 3175 } 3176 3177 public final void shrq(Register dst) { 3178 prefixq(dst); 3179 emitByte(0xD3); 3180 // Unsigned divide dst by 2, CL times. 3181 emitModRM(5, dst); 3182 } 3183 3184 public final void sbbq(Register dst, Register src) { 3185 SBB.rmOp.emit(this, QWORD, dst, src); 3186 } 3187 3188 public final void subq(Register dst, int imm32) { 3189 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3190 } 3191 3192 public final void subq(AMD64Address dst, int imm32) { 3193 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3194 } 3195 3196 public final void subqWide(Register dst, int imm32) { 3197 // don't use the sign-extending version, forcing a 32-bit immediate 3198 SUB.getMIOpcode(QWORD, false).emit(this, QWORD, dst, imm32); 3199 } 3200 3201 public final void subq(Register dst, Register src) { 3202 SUB.rmOp.emit(this, QWORD, dst, src); 3203 } 3204 3205 public final void testq(Register dst, Register src) { 3206 prefixq(dst, src); 3207 emitByte(0x85); 3208 emitModRM(dst, src); 3209 } 3210 3211 public final void btrq(Register src, int imm8) { 3212 prefixq(src); 3213 emitByte(0x0F); 3214 emitByte(0xBA); 3215 emitModRM(6, src); 3216 emitByte(imm8); 3217 } 3218 3219 public final void xaddb(AMD64Address dst, Register src) { 3220 prefixb(dst, src); 3221 emitByte(0x0F); 3222 emitByte(0xC0); 3223 emitOperandHelper(src, dst, 0); 3224 } 3225 3226 public final void xaddw(AMD64Address dst, Register src) { 3227 emitByte(0x66); // Switch to 16-bit mode. 3228 prefix(dst, src); 3229 emitByte(0x0F); 3230 emitByte(0xC1); 3231 emitOperandHelper(src, dst, 0); 3232 } 3233 3234 public final void xaddl(AMD64Address dst, Register src) { 3235 prefix(dst, src); 3236 emitByte(0x0F); 3237 emitByte(0xC1); 3238 emitOperandHelper(src, dst, 0); 3239 } 3240 3241 public final void xaddq(AMD64Address dst, Register src) { 3242 prefixq(dst, src); 3243 emitByte(0x0F); 3244 emitByte(0xC1); 3245 emitOperandHelper(src, dst, 0); 3246 } 3247 3248 public final void xchgb(Register dst, AMD64Address src) { 3249 prefixb(src, dst); 3250 emitByte(0x86); 3251 emitOperandHelper(dst, src, 0); 3252 } 3253 3254 public final void xchgw(Register dst, AMD64Address src) { 3255 emitByte(0x66); 3256 prefix(src, dst); 3257 emitByte(0x87); 3258 emitOperandHelper(dst, src, 0); 3259 } 3260 3261 public final void xchgl(Register dst, AMD64Address src) { 3262 prefix(src, dst); 3263 emitByte(0x87); 3264 emitOperandHelper(dst, src, 0); 3265 } 3266 3267 public final void xchgq(Register dst, AMD64Address src) { 3268 prefixq(src, dst); 3269 emitByte(0x87); 3270 emitOperandHelper(dst, src, 0); 3271 } 3272 3273 public final void membar(int barriers) { 3274 if (target.isMP) { 3275 // We only have to handle StoreLoad 3276 if ((barriers & STORE_LOAD) != 0) { 3277 // All usable chips support "locked" instructions which suffice 3278 // as barriers, and are much faster than the alternative of 3279 // using cpuid instruction. We use here a locked add [rsp],0. 3280 // This is conveniently otherwise a no-op except for blowing 3281 // flags. 3282 // Any change to this code may need to revisit other places in 3283 // the code where this idiom is used, in particular the 3284 // orderAccess code. 3285 lock(); 3286 addl(new AMD64Address(AMD64.rsp, 0), 0); // Assert the lock# signal here 3287 } 3288 } 3289 } 3290 3291 @Override 3292 protected final void patchJumpTarget(int branch, int branchTarget) { 3293 int op = getByte(branch); 3294 assert op == 0xE8 // call 3295 || op == 0x00 // jump table entry 3296 || op == 0xE9 // jmp 3297 || op == 0xEB // short jmp 3298 || (op & 0xF0) == 0x70 // short jcc 3299 || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc 3300 : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op; 3301 3302 if (op == 0x00) { 3303 int offsetToJumpTableBase = getShort(branch + 1); 3304 int jumpTableBase = branch - offsetToJumpTableBase; 3305 int imm32 = branchTarget - jumpTableBase; 3306 emitInt(imm32, branch); 3307 } else if (op == 0xEB || (op & 0xF0) == 0x70) { 3308 3309 // short offset operators (jmp and jcc) 3310 final int imm8 = branchTarget - (branch + 2); 3311 /* 3312 * Since a wrongly patched short branch can potentially lead to working but really bad 3313 * behaving code we should always fail with an exception instead of having an assert. 3314 */ 3315 if (!NumUtil.isByte(imm8)) { 3316 throw new InternalError("branch displacement out of range: " + imm8); 3317 } 3318 emitByte(imm8, branch + 1); 3319 3320 } else { 3321 3322 int off = 1; 3323 if (op == 0x0F) { 3324 off = 2; 3325 } 3326 3327 int imm32 = branchTarget - (branch + 4 + off); 3328 emitInt(imm32, branch + off); 3329 } 3330 } 3331 3332 public void nullCheck(AMD64Address address) { 3333 testl(AMD64.rax, address); 3334 } 3335 3336 @Override 3337 public void align(int modulus) { 3338 if (position() % modulus != 0) { 3339 nop(modulus - (position() % modulus)); 3340 } 3341 } 3342 3343 /** 3344 * Emits a direct call instruction. Note that the actual call target is not specified, because 3345 * all calls need patching anyway. Therefore, 0 is emitted as the call target, and the user is 3346 * responsible to add the call address to the appropriate patching tables. 3347 */ 3348 public final void call() { 3349 annotatePatchingImmediate(1, 4); 3350 emitByte(0xE8); 3351 emitInt(0); 3352 } 3353 3354 public final void call(Register src) { 3355 prefix(src); 3356 emitByte(0xFF); 3357 emitModRM(2, src); 3358 } 3359 3360 public final void int3() { 3361 emitByte(0xCC); 3362 } 3363 3364 public final void pause() { 3365 emitByte(0xF3); 3366 emitByte(0x90); 3367 } 3368 3369 private void emitx87(int b1, int b2, int i) { 3370 assert 0 <= i && i < 8 : "illegal stack offset"; 3371 emitByte(b1); 3372 emitByte(b2 + i); 3373 } 3374 3375 public final void fldd(AMD64Address src) { 3376 emitByte(0xDD); 3377 emitOperandHelper(0, src, 0); 3378 } 3379 3380 public final void flds(AMD64Address src) { 3381 emitByte(0xD9); 3382 emitOperandHelper(0, src, 0); 3383 } 3384 3385 public final void fldln2() { 3386 emitByte(0xD9); 3387 emitByte(0xED); 3388 } 3389 3390 public final void fldlg2() { 3391 emitByte(0xD9); 3392 emitByte(0xEC); 3393 } 3394 3395 public final void fyl2x() { 3396 emitByte(0xD9); 3397 emitByte(0xF1); 3398 } 3399 3400 public final void fstps(AMD64Address src) { 3401 emitByte(0xD9); 3402 emitOperandHelper(3, src, 0); 3403 } 3404 3405 public final void fstpd(AMD64Address src) { 3406 emitByte(0xDD); 3407 emitOperandHelper(3, src, 0); 3408 } 3409 3410 private void emitFPUArith(int b1, int b2, int i) { 3411 assert 0 <= i && i < 8 : "illegal FPU register: " + i; 3412 emitByte(b1); 3413 emitByte(b2 + i); 3414 } 3415 3416 public void ffree(int i) { 3417 emitFPUArith(0xDD, 0xC0, i); 3418 } 3419 3420 public void fincstp() { 3421 emitByte(0xD9); 3422 emitByte(0xF7); 3423 } 3424 3425 public void fxch(int i) { 3426 emitFPUArith(0xD9, 0xC8, i); 3427 } 3428 3429 public void fnstswAX() { 3430 emitByte(0xDF); 3431 emitByte(0xE0); 3432 } 3433 3434 public void fwait() { 3435 emitByte(0x9B); 3436 } 3437 3438 public void fprem() { 3439 emitByte(0xD9); 3440 emitByte(0xF8); 3441 } 3442 3443 public final void fsin() { 3444 emitByte(0xD9); 3445 emitByte(0xFE); 3446 } 3447 3448 public final void fcos() { 3449 emitByte(0xD9); 3450 emitByte(0xFF); 3451 } 3452 3453 public final void fptan() { 3454 emitByte(0xD9); 3455 emitByte(0xF2); 3456 } 3457 3458 public final void fstp(int i) { 3459 emitx87(0xDD, 0xD8, i); 3460 } 3461 3462 @Override 3463 public AMD64Address makeAddress(Register base, int displacement) { 3464 return new AMD64Address(base, displacement); 3465 } 3466 3467 @Override 3468 public AMD64Address getPlaceholder(int instructionStartPosition) { 3469 return new AMD64Address(AMD64.rip, Register.None, Scale.Times1, 0, instructionStartPosition); 3470 } 3471 3472 private void prefetchPrefix(AMD64Address src) { 3473 prefix(src); 3474 emitByte(0x0F); 3475 } 3476 3477 public void prefetchnta(AMD64Address src) { 3478 prefetchPrefix(src); 3479 emitByte(0x18); 3480 emitOperandHelper(0, src, 0); 3481 } 3482 3483 void prefetchr(AMD64Address src) { 3484 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 3485 prefetchPrefix(src); 3486 emitByte(0x0D); 3487 emitOperandHelper(0, src, 0); 3488 } 3489 3490 public void prefetcht0(AMD64Address src) { 3491 assert supports(CPUFeature.SSE); 3492 prefetchPrefix(src); 3493 emitByte(0x18); 3494 emitOperandHelper(1, src, 0); 3495 } 3496 3497 public void prefetcht1(AMD64Address src) { 3498 assert supports(CPUFeature.SSE); 3499 prefetchPrefix(src); 3500 emitByte(0x18); 3501 emitOperandHelper(2, src, 0); 3502 } 3503 3504 public void prefetcht2(AMD64Address src) { 3505 assert supports(CPUFeature.SSE); 3506 prefix(src); 3507 emitByte(0x0f); 3508 emitByte(0x18); 3509 emitOperandHelper(3, src, 0); 3510 } 3511 3512 public void prefetchw(AMD64Address src) { 3513 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 3514 prefix(src); 3515 emitByte(0x0f); 3516 emitByte(0x0D); 3517 emitOperandHelper(1, src, 0); 3518 } 3519 3520 public void rdtsc() { 3521 emitByte(0x0F); 3522 emitByte(0x31); 3523 } 3524 3525 /** 3526 * Emits an instruction which is considered to be illegal. This is used if we deliberately want 3527 * to crash the program (debugging etc.). 3528 */ 3529 public void illegal() { 3530 emitByte(0x0f); 3531 emitByte(0x0b); 3532 } 3533 3534 public void lfence() { 3535 emitByte(0x0f); 3536 emitByte(0xae); 3537 emitByte(0xe8); 3538 } 3539 3540 public final void vptest(Register dst, Register src) { 3541 VexRMOp.VPTEST.emit(this, AVXSize.YMM, dst, src); 3542 } 3543 3544 public final void vpxor(Register dst, Register nds, Register src) { 3545 VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src); 3546 } 3547 3548 public final void vpxor(Register dst, Register nds, AMD64Address src) { 3549 VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src); 3550 } 3551 3552 public final void vmovdqu(Register dst, AMD64Address src) { 3553 VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src); 3554 } 3555 3556 public final void vmovdqu(AMD64Address dst, Register src) { 3557 assert inRC(XMM, src); 3558 VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src); 3559 } 3560 3561 public final void vpmovzxbw(Register dst, AMD64Address src) { 3562 assert supports(CPUFeature.AVX2); 3563 VexRMOp.VPMOVZXBW.emit(this, AVXSize.YMM, dst, src); 3564 } 3565 3566 public final void vzeroupper() { 3567 emitVEX(L128, P_, M_0F, W0, 0, 0, true); 3568 emitByte(0x77); 3569 } 3570 3571 // Insn: KORTESTD k1, k2 3572 3573 // This instruction produces ZF or CF flags 3574 public final void kortestd(Register src1, Register src2) { 3575 assert supports(CPUFeature.AVX512BW); 3576 assert inRC(MASK, src1) && inRC(MASK, src2); 3577 // Code: VEX.L0.66.0F.W1 98 /r 3578 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, true); 3579 emitByte(0x98); 3580 emitModRM(src1, src2); 3581 } 3582 3583 // Insn: KORTESTQ k1, k2 3584 3585 // This instruction produces ZF or CF flags 3586 public final void kortestq(Register src1, Register src2) { 3587 assert supports(CPUFeature.AVX512BW); 3588 assert inRC(MASK, src1) && inRC(MASK, src2); 3589 // Code: VEX.L0.0F.W1 98 /r 3590 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_, M_0F, W1, true); 3591 emitByte(0x98); 3592 emitModRM(src1, src2); 3593 } 3594 3595 public final void kmovd(Register dst, Register src) { 3596 assert supports(CPUFeature.AVX512BW); 3597 assert inRC(MASK, dst) || inRC(CPU, dst); 3598 assert inRC(MASK, src) || inRC(CPU, src); 3599 assert !(inRC(CPU, dst) && inRC(CPU, src)); 3600 3601 if (inRC(MASK, dst)) { 3602 if (inRC(MASK, src)) { 3603 // kmovd(KRegister dst, KRegister src): 3604 // Insn: KMOVD k1, k2/m32 3605 // Code: VEX.L0.66.0F.W1 90 /r 3606 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_66, M_0F, W1, true); 3607 emitByte(0x90); 3608 emitModRM(dst, src); 3609 } else { 3610 // kmovd(KRegister dst, Register src) 3611 // Insn: KMOVD k1, r32 3612 // Code: VEX.L0.F2.0F.W0 92 /r 3613 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, true); 3614 emitByte(0x92); 3615 emitModRM(dst, src); 3616 } 3617 } else { 3618 if (inRC(MASK, src)) { 3619 // kmovd(Register dst, KRegister src) 3620 // Insn: KMOVD r32, k1 3621 // Code: VEX.L0.F2.0F.W0 93 /r 3622 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, true); 3623 emitByte(0x93); 3624 emitModRM(dst, src); 3625 } else { 3626 throw GraalError.shouldNotReachHere(); 3627 } 3628 } 3629 } 3630 3631 public final void kmovq(Register dst, Register src) { 3632 assert supports(CPUFeature.AVX512BW); 3633 assert inRC(MASK, dst) || inRC(CPU, dst); 3634 assert inRC(MASK, src) || inRC(CPU, src); 3635 assert !(inRC(CPU, dst) && inRC(CPU, src)); 3636 3637 if (inRC(MASK, dst)) { 3638 if (inRC(MASK, src)) { 3639 // kmovq(KRegister dst, KRegister src): 3640 // Insn: KMOVQ k1, k2/m64 3641 // Code: VEX.L0.0F.W1 90 /r 3642 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_, M_0F, W1, true); 3643 emitByte(0x90); 3644 emitModRM(dst, src); 3645 } else { 3646 // kmovq(KRegister dst, Register src) 3647 // Insn: KMOVQ k1, r64 3648 // Code: VEX.L0.F2.0F.W1 92 /r 3649 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, true); 3650 emitByte(0x92); 3651 emitModRM(dst, src); 3652 } 3653 } else { 3654 if (inRC(MASK, src)) { 3655 // kmovq(Register dst, KRegister src) 3656 // Insn: KMOVQ r64, k1 3657 // Code: VEX.L0.F2.0F.W1 93 /r 3658 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, true); 3659 emitByte(0x93); 3660 emitModRM(dst, src); 3661 } else { 3662 throw GraalError.shouldNotReachHere(); 3663 } 3664 } 3665 } 3666 3667 // Insn: KTESTD k1, k2 3668 3669 public final void ktestd(Register src1, Register src2) { 3670 assert supports(CPUFeature.AVX512BW); 3671 assert inRC(MASK, src1) && inRC(MASK, src2); 3672 // Code: VEX.L0.66.0F.W1 99 /r 3673 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, true); 3674 emitByte(0x99); 3675 emitModRM(src1, src2); 3676 } 3677 3678 public final void evmovdqu64(Register dst, AMD64Address src) { 3679 assert supports(CPUFeature.AVX512F); 3680 assert inRC(XMM, dst); 3681 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F3, M_0F, W1, Z0, B0); 3682 emitByte(0x6F); 3683 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3684 } 3685 3686 // Insn: VPMOVZXBW zmm1, m256 3687 3688 public final void evpmovzxbw(Register dst, AMD64Address src) { 3689 assert supports(CPUFeature.AVX512BW); 3690 assert inRC(XMM, dst); 3691 // Code: EVEX.512.66.0F38.WIG 30 /r 3692 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0); 3693 emitByte(0x30); 3694 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3695 } 3696 3697 public final void evpcmpeqb(Register kdst, Register nds, AMD64Address src) { 3698 assert supports(CPUFeature.AVX512BW); 3699 assert inRC(MASK, kdst) && inRC(XMM, nds); 3700 evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F, WIG, Z0, B0); 3701 emitByte(0x74); 3702 emitEVEXOperandHelper(kdst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3703 } 3704 3705 // Insn: VMOVDQU16 zmm1 {k1}{z}, zmm2/m512 3706 // ----- 3707 // Insn: VMOVDQU16 zmm1, m512 3708 3709 public final void evmovdqu16(Register dst, AMD64Address src) { 3710 assert supports(CPUFeature.AVX512BW); 3711 assert inRC(XMM, dst); 3712 // Code: EVEX.512.F2.0F.W1 6F /r 3713 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0); 3714 emitByte(0x6F); 3715 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3716 } 3717 3718 // Insn: VMOVDQU16 zmm1, k1:z, m512 3719 3720 public final void evmovdqu16(Register dst, Register mask, AMD64Address src) { 3721 assert supports(CPUFeature.AVX512BW); 3722 assert inRC(XMM, dst) && inRC(MASK, mask); 3723 // Code: EVEX.512.F2.0F.W1 6F /r 3724 evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z1, B0); 3725 emitByte(0x6F); 3726 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3727 } 3728 3729 // Insn: VMOVDQU16 zmm2/m512 {k1}{z}, zmm1 3730 // ----- 3731 // Insn: VMOVDQU16 m512, zmm1 3732 3733 public final void evmovdqu16(AMD64Address dst, Register src) { 3734 assert supports(CPUFeature.AVX512BW); 3735 assert inRC(XMM, src); 3736 // Code: EVEX.512.F2.0F.W1 7F /r 3737 evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0); 3738 emitByte(0x7F); 3739 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3740 } 3741 3742 // Insn: VMOVDQU16 m512, k1, zmm1 3743 3744 public final void evmovdqu16(AMD64Address dst, Register mask, Register src) { 3745 assert supports(CPUFeature.AVX512BW); 3746 assert inRC(MASK, mask) && inRC(XMM, src); 3747 // Code: EVEX.512.F2.0F.W1 7F /r 3748 evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0); 3749 emitByte(0x7F); 3750 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3751 } 3752 3753 // Insn: VPBROADCASTW zmm1 {k1}{z}, reg 3754 // ----- 3755 // Insn: VPBROADCASTW zmm1, reg 3756 3757 public final void evpbroadcastw(Register dst, Register src) { 3758 assert supports(CPUFeature.AVX512BW); 3759 assert inRC(XMM, dst) && inRC(CPU, src); 3760 // Code: EVEX.512.66.0F38.W0 7B /r 3761 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, W0, Z0, B0); 3762 emitByte(0x7B); 3763 emitModRM(dst, src); 3764 } 3765 3766 // Insn: VPCMPUW k1 {k2}, zmm2, zmm3/m512, imm8 3767 // ----- 3768 // Insn: VPCMPUW k1, zmm2, zmm3, imm8 3769 3770 public final void evpcmpuw(Register kdst, Register nds, Register src, int vcc) { 3771 assert supports(CPUFeature.AVX512BW); 3772 assert inRC(MASK, kdst) && inRC(XMM, nds) && inRC(XMM, src); 3773 // Code: EVEX.NDS.512.66.0F3A.W1 3E /r ib 3774 evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F3A, W1, Z0, B0); 3775 emitByte(0x3E); 3776 emitModRM(kdst, src); 3777 emitByte(vcc); 3778 } 3779 3780 // Insn: VPCMPUW k1 {k2}, zmm2, zmm3/m512, imm8 3781 // ----- 3782 // Insn: VPCMPUW k1, k2, zmm2, zmm3, imm8 3783 3784 public final void evpcmpuw(Register kdst, Register mask, Register nds, Register src, int vcc) { 3785 assert supports(CPUFeature.AVX512BW); 3786 assert inRC(MASK, kdst) && inRC(MASK, mask); 3787 assert inRC(XMM, nds) && inRC(XMM, src); 3788 // Code: EVEX.NDS.512.66.0F3A.W1 3E /r ib 3789 evexPrefix(kdst, mask, nds, src, AVXSize.ZMM, P_66, M_0F3A, W1, Z0, B0); 3790 emitByte(0x3E); 3791 emitModRM(kdst, src); 3792 emitByte(vcc); 3793 } 3794 3795 // Insn: VPMOVWB ymm1/m256 {k1}{z}, zmm2 3796 // ----- 3797 // Insn: VPMOVWB m256, zmm2 3798 3799 public final void evpmovwb(AMD64Address dst, Register src) { 3800 assert supports(CPUFeature.AVX512BW); 3801 assert inRC(XMM, src); 3802 // Code: EVEX.512.F3.0F38.W0 30 /r 3803 evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0); 3804 emitByte(0x30); 3805 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3806 } 3807 3808 // Insn: VPMOVWB m256, k1, zmm2 3809 3810 public final void evpmovwb(AMD64Address dst, Register mask, Register src) { 3811 assert supports(CPUFeature.AVX512BW); 3812 assert inRC(MASK, mask) && inRC(XMM, src); 3813 // Code: EVEX.512.F3.0F38.W0 30 /r 3814 evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0); 3815 emitByte(0x30); 3816 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3817 } 3818 3819 // Insn: VPMOVZXBW zmm1 {k1}{z}, ymm2/m256 3820 // ----- 3821 // Insn: VPMOVZXBW zmm1, k1, m256 3822 3823 public final void evpmovzxbw(Register dst, Register mask, AMD64Address src) { 3824 assert supports(CPUFeature.AVX512BW); 3825 assert inRC(MASK, mask) && inRC(XMM, dst); 3826 // Code: EVEX.512.66.0F38.WIG 30 /r 3827 evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0); 3828 emitByte(0x30); 3829 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3830 } 3831 3832 }