1 /* 2 * Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 25 package org.graalvm.compiler.asm.amd64; 26 27 import static jdk.vm.ci.amd64.AMD64.CPU; 28 import static jdk.vm.ci.amd64.AMD64.MASK; 29 import static jdk.vm.ci.amd64.AMD64.XMM; 30 import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD; 31 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop; 32 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop; 33 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD; 34 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND; 35 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP; 36 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR; 37 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB; 38 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB; 39 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR; 40 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC; 41 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC; 42 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG; 43 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT; 44 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0; 45 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0; 46 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1; 47 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.BYTE; 48 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.DWORD; 49 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PD; 50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PS; 51 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.QWORD; 52 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SD; 53 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SS; 54 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.WORD; 55 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128; 56 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256; 57 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ; 58 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F; 59 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38; 60 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A; 61 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_; 62 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66; 63 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2; 64 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3; 65 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0; 66 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1; 67 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG; 68 import static org.graalvm.compiler.core.common.NumUtil.isByte; 69 import static org.graalvm.compiler.core.common.NumUtil.isInt; 70 import static org.graalvm.compiler.core.common.NumUtil.isShiftCount; 71 import static org.graalvm.compiler.core.common.NumUtil.isUByte; 72 73 import java.util.EnumSet; 74 75 import org.graalvm.compiler.asm.Label; 76 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; 77 import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize; 78 import org.graalvm.compiler.core.common.NumUtil; 79 import org.graalvm.compiler.core.common.calc.Condition; 80 import org.graalvm.compiler.debug.GraalError; 81 82 import jdk.vm.ci.amd64.AMD64; 83 import jdk.vm.ci.amd64.AMD64.CPUFeature; 84 import jdk.vm.ci.code.Register; 85 import jdk.vm.ci.code.Register.RegisterCategory; 86 import jdk.vm.ci.code.TargetDescription; 87 88 /** 89 * This class implements an assembler that can encode most X86 instructions. 90 */ 91 public class AMD64Assembler extends AMD64BaseAssembler { 92 93 /** 94 * Constructs an assembler for the AMD64 architecture. 95 */ 96 public AMD64Assembler(TargetDescription target) { 97 super(target); 98 } 99 100 /** 101 * The x86 condition codes used for conditional jumps/moves. 102 */ 103 public enum ConditionFlag { 104 Zero(0x4, "|zero|"), 105 NotZero(0x5, "|nzero|"), 106 Equal(0x4, "="), 107 NotEqual(0x5, "!="), 108 Less(0xc, "<"), 109 LessEqual(0xe, "<="), 110 Greater(0xf, ">"), 111 GreaterEqual(0xd, ">="), 112 Below(0x2, "|<|"), 113 BelowEqual(0x6, "|<=|"), 114 Above(0x7, "|>|"), 115 AboveEqual(0x3, "|>=|"), 116 Overflow(0x0, "|of|"), 117 NoOverflow(0x1, "|nof|"), 118 CarrySet(0x2, "|carry|"), 119 CarryClear(0x3, "|ncarry|"), 120 Negative(0x8, "|neg|"), 121 Positive(0x9, "|pos|"), 122 Parity(0xa, "|par|"), 123 NoParity(0xb, "|npar|"); 124 125 private final int value; 126 private final String operator; 127 128 ConditionFlag(int value, String operator) { 129 this.value = value; 130 this.operator = operator; 131 } 132 133 public ConditionFlag negate() { 134 switch (this) { 135 case Zero: 136 return NotZero; 137 case NotZero: 138 return Zero; 139 case Equal: 140 return NotEqual; 141 case NotEqual: 142 return Equal; 143 case Less: 144 return GreaterEqual; 145 case LessEqual: 146 return Greater; 147 case Greater: 148 return LessEqual; 149 case GreaterEqual: 150 return Less; 151 case Below: 152 return AboveEqual; 153 case BelowEqual: 154 return Above; 155 case Above: 156 return BelowEqual; 157 case AboveEqual: 158 return Below; 159 case Overflow: 160 return NoOverflow; 161 case NoOverflow: 162 return Overflow; 163 case CarrySet: 164 return CarryClear; 165 case CarryClear: 166 return CarrySet; 167 case Negative: 168 return Positive; 169 case Positive: 170 return Negative; 171 case Parity: 172 return NoParity; 173 case NoParity: 174 return Parity; 175 } 176 throw new IllegalArgumentException(); 177 } 178 179 public int getValue() { 180 return value; 181 } 182 183 @Override 184 public String toString() { 185 return operator; 186 } 187 } 188 189 /** 190 * Operand size and register type constraints. 191 */ 192 private enum OpAssertion { 193 ByteAssertion(CPU, CPU, BYTE), 194 ByteOrLargerAssertion(CPU, CPU, BYTE, WORD, DWORD, QWORD), 195 WordOrLargerAssertion(CPU, CPU, WORD, DWORD, QWORD), 196 DwordOrLargerAssertion(CPU, CPU, DWORD, QWORD), 197 WordOrDwordAssertion(CPU, CPU, WORD, QWORD), 198 QwordAssertion(CPU, CPU, QWORD), 199 FloatAssertion(XMM, XMM, SS, SD, PS, PD), 200 PackedFloatAssertion(XMM, XMM, PS, PD), 201 SingleAssertion(XMM, XMM, SS), 202 DoubleAssertion(XMM, XMM, SD), 203 PackedDoubleAssertion(XMM, XMM, PD), 204 IntToFloatAssertion(XMM, CPU, DWORD, QWORD), 205 FloatToIntAssertion(CPU, XMM, DWORD, QWORD); 206 207 private final RegisterCategory resultCategory; 208 private final RegisterCategory inputCategory; 209 private final OperandSize[] allowedSizes; 210 211 OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes) { 212 this.resultCategory = resultCategory; 213 this.inputCategory = inputCategory; 214 this.allowedSizes = allowedSizes; 215 } 216 217 protected boolean checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg) { 218 assert resultReg == null || resultCategory.equals(resultReg.getRegisterCategory()) : "invalid result register " + resultReg + " used in " + op; 219 assert inputReg == null || inputCategory.equals(inputReg.getRegisterCategory()) : "invalid input register " + inputReg + " used in " + op; 220 221 for (OperandSize s : allowedSizes) { 222 if (size == s) { 223 return true; 224 } 225 } 226 227 assert false : "invalid operand size " + size + " used in " + op; 228 return false; 229 } 230 231 } 232 233 protected static final int P_0F = 0x0F; 234 protected static final int P_0F38 = 0x380F; 235 protected static final int P_0F3A = 0x3A0F; 236 237 /** 238 * Base class for AMD64 opcodes. 239 */ 240 public static class AMD64Op { 241 242 private final String opcode; 243 244 protected final int prefix1; 245 protected final int prefix2; 246 protected final int op; 247 248 private final boolean dstIsByte; 249 private final boolean srcIsByte; 250 251 private final OpAssertion assertion; 252 private final CPUFeature feature; 253 254 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 255 this(opcode, prefix1, prefix2, op, assertion == OpAssertion.ByteAssertion, assertion == OpAssertion.ByteAssertion, assertion, feature); 256 } 257 258 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 259 this.opcode = opcode; 260 this.prefix1 = prefix1; 261 this.prefix2 = prefix2; 262 this.op = op; 263 264 this.dstIsByte = dstIsByte; 265 this.srcIsByte = srcIsByte; 266 267 this.assertion = assertion; 268 this.feature = feature; 269 } 270 271 protected final void emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc) { 272 if (prefix1 != 0) { 273 asm.emitByte(prefix1); 274 } 275 if (size.getSizePrefix() != 0) { 276 asm.emitByte(size.getSizePrefix()); 277 } 278 int rexPrefix = 0x40 | rxb; 279 if (size == QWORD) { 280 rexPrefix |= 0x08; 281 } 282 if (rexPrefix != 0x40 || (dstIsByte && dstEnc >= 4) || (srcIsByte && srcEnc >= 4)) { 283 asm.emitByte(rexPrefix); 284 } 285 if (prefix2 > 0xFF) { 286 asm.emitShort(prefix2); 287 } else if (prefix2 > 0) { 288 asm.emitByte(prefix2); 289 } 290 asm.emitByte(op); 291 } 292 293 protected final boolean verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg) { 294 assert feature == null || asm.supports(feature) : String.format("unsupported feature %s required for %s", feature, opcode); 295 assert assertion.checkOperands(this, size, resultReg, inputReg); 296 return true; 297 } 298 299 public OperandSize[] getAllowedSizes() { 300 return assertion.allowedSizes; 301 } 302 303 protected final boolean isSSEInstruction() { 304 if (feature == null) { 305 return false; 306 } 307 switch (feature) { 308 case SSE: 309 case SSE2: 310 case SSE3: 311 case SSSE3: 312 case SSE4A: 313 case SSE4_1: 314 case SSE4_2: 315 return true; 316 default: 317 return false; 318 } 319 } 320 321 public final OpAssertion getAssertion() { 322 return assertion; 323 } 324 325 @Override 326 public String toString() { 327 return opcode; 328 } 329 } 330 331 /** 332 * Base class for AMD64 opcodes with immediate operands. 333 */ 334 public static class AMD64ImmOp extends AMD64Op { 335 336 private final boolean immIsByte; 337 338 protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) { 339 this(opcode, immIsByte, prefix, op, assertion, null); 340 } 341 342 protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 343 super(opcode, 0, prefix, op, assertion, feature); 344 this.immIsByte = immIsByte; 345 } 346 347 protected final void emitImmediate(AMD64Assembler asm, OperandSize size, int imm) { 348 if (immIsByte) { 349 assert imm == (byte) imm; 350 asm.emitByte(imm); 351 } else { 352 size.emitImmediate(asm, imm); 353 } 354 } 355 356 protected final int immediateSize(OperandSize size) { 357 if (immIsByte) { 358 return 1; 359 } else { 360 return size.getBytes(); 361 } 362 } 363 } 364 365 /** 366 * Opcode with operand order of either RM or MR for 2 address forms. 367 */ 368 public abstract static class AMD64RROp extends AMD64Op { 369 370 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 371 super(opcode, prefix1, prefix2, op, assertion, feature); 372 } 373 374 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 375 super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature); 376 } 377 378 public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src); 379 } 380 381 /** 382 * Opcode with operand order of RM. 383 */ 384 public static class AMD64RMOp extends AMD64RROp { 385 // @formatter:off 386 public static final AMD64RMOp IMUL = new AMD64RMOp("IMUL", P_0F, 0xAF, OpAssertion.ByteOrLargerAssertion); 387 public static final AMD64RMOp BSF = new AMD64RMOp("BSF", P_0F, 0xBC); 388 public static final AMD64RMOp BSR = new AMD64RMOp("BSR", P_0F, 0xBD); 389 // POPCNT, TZCNT, and LZCNT support word operation. However, the legacy size prefix should 390 // be emitted before the mandatory prefix 0xF3. Since we are not emitting bit count for 391 // 16-bit operands, here we simply use DwordOrLargerAssertion. 392 public static final AMD64RMOp POPCNT = new AMD64RMOp("POPCNT", 0xF3, P_0F, 0xB8, OpAssertion.DwordOrLargerAssertion, CPUFeature.POPCNT); 393 public static final AMD64RMOp TZCNT = new AMD64RMOp("TZCNT", 0xF3, P_0F, 0xBC, OpAssertion.DwordOrLargerAssertion, CPUFeature.BMI1); 394 public static final AMD64RMOp LZCNT = new AMD64RMOp("LZCNT", 0xF3, P_0F, 0xBD, OpAssertion.DwordOrLargerAssertion, CPUFeature.LZCNT); 395 public static final AMD64RMOp MOVZXB = new AMD64RMOp("MOVZXB", P_0F, 0xB6, false, true, OpAssertion.WordOrLargerAssertion); 396 public static final AMD64RMOp MOVZX = new AMD64RMOp("MOVZX", P_0F, 0xB7, OpAssertion.DwordOrLargerAssertion); 397 public static final AMD64RMOp MOVSXB = new AMD64RMOp("MOVSXB", P_0F, 0xBE, false, true, OpAssertion.WordOrLargerAssertion); 398 public static final AMD64RMOp MOVSX = new AMD64RMOp("MOVSX", P_0F, 0xBF, OpAssertion.DwordOrLargerAssertion); 399 public static final AMD64RMOp MOVSXD = new AMD64RMOp("MOVSXD", 0x63, OpAssertion.QwordAssertion); 400 public static final AMD64RMOp MOVB = new AMD64RMOp("MOVB", 0x8A, OpAssertion.ByteAssertion); 401 public static final AMD64RMOp MOV = new AMD64RMOp("MOV", 0x8B); 402 public static final AMD64RMOp CMP = new AMD64RMOp("CMP", 0x3B); 403 404 // MOVD/MOVQ and MOVSS/MOVSD are the same opcode, just with different operand size prefix 405 public static final AMD64RMOp MOVD = new AMD64RMOp("MOVD", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 406 public static final AMD64RMOp MOVQ = new AMD64RMOp("MOVQ", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 407 public static final AMD64RMOp MOVSS = new AMD64RMOp("MOVSS", P_0F, 0x10, OpAssertion.FloatAssertion, CPUFeature.SSE); 408 public static final AMD64RMOp MOVSD = new AMD64RMOp("MOVSD", P_0F, 0x10, OpAssertion.FloatAssertion, CPUFeature.SSE); 409 410 // TEST is documented as MR operation, but it's symmetric, and using it as RM operation is more convenient. 411 public static final AMD64RMOp TESTB = new AMD64RMOp("TEST", 0x84, OpAssertion.ByteAssertion); 412 public static final AMD64RMOp TEST = new AMD64RMOp("TEST", 0x85); 413 // @formatter:on 414 415 protected AMD64RMOp(String opcode, int op) { 416 this(opcode, 0, op); 417 } 418 419 protected AMD64RMOp(String opcode, int op, OpAssertion assertion) { 420 this(opcode, 0, op, assertion); 421 } 422 423 protected AMD64RMOp(String opcode, int prefix, int op) { 424 this(opcode, 0, prefix, op, null); 425 } 426 427 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion) { 428 this(opcode, 0, prefix, op, assertion, null); 429 } 430 431 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 432 this(opcode, 0, prefix, op, assertion, feature); 433 } 434 435 protected AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) { 436 super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null); 437 } 438 439 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) { 440 this(opcode, prefix1, prefix2, op, OpAssertion.WordOrLargerAssertion, feature); 441 } 442 443 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 444 super(opcode, prefix1, prefix2, op, assertion, feature); 445 } 446 447 @Override 448 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) { 449 assert verify(asm, size, dst, src); 450 if (isSSEInstruction()) { 451 Register nds = Register.None; 452 switch (op) { 453 case 0x10: 454 case 0x51: 455 if ((size == SS) || (size == SD)) { 456 nds = dst; 457 } 458 break; 459 case 0x2A: 460 case 0x54: 461 case 0x55: 462 case 0x56: 463 case 0x57: 464 case 0x58: 465 case 0x59: 466 case 0x5A: 467 case 0x5C: 468 case 0x5D: 469 case 0x5E: 470 case 0x5F: 471 nds = dst; 472 break; 473 default: 474 break; 475 } 476 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, size == QWORD); 477 asm.emitByte(op); 478 asm.emitModRM(dst, src); 479 } else { 480 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding); 481 asm.emitModRM(dst, src); 482 } 483 } 484 485 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src) { 486 assert verify(asm, size, dst, null); 487 if (isSSEInstruction()) { 488 Register nds = Register.None; 489 switch (op) { 490 case 0x51: 491 if ((size == SS) || (size == SD)) { 492 nds = dst; 493 } 494 break; 495 case 0x2A: 496 case 0x54: 497 case 0x55: 498 case 0x56: 499 case 0x57: 500 case 0x58: 501 case 0x59: 502 case 0x5A: 503 case 0x5C: 504 case 0x5D: 505 case 0x5E: 506 case 0x5F: 507 nds = dst; 508 break; 509 default: 510 break; 511 } 512 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, size == QWORD); 513 asm.emitByte(op); 514 asm.emitOperandHelper(dst, src, 0); 515 } else { 516 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0); 517 asm.emitOperandHelper(dst, src, 0); 518 } 519 } 520 } 521 522 /** 523 * Opcode with operand order of MR. 524 */ 525 public static class AMD64MROp extends AMD64RROp { 526 // @formatter:off 527 public static final AMD64MROp MOVB = new AMD64MROp("MOVB", 0x88, OpAssertion.ByteAssertion); 528 public static final AMD64MROp MOV = new AMD64MROp("MOV", 0x89); 529 530 // MOVD and MOVQ are the same opcode, just with different operand size prefix 531 // Note that as MR opcodes, they have reverse operand order, so the IntToFloatingAssertion must be used. 532 public static final AMD64MROp MOVD = new AMD64MROp("MOVD", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 533 public static final AMD64MROp MOVQ = new AMD64MROp("MOVQ", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 534 535 // MOVSS and MOVSD are the same opcode, just with different operand size prefix 536 public static final AMD64MROp MOVSS = new AMD64MROp("MOVSS", P_0F, 0x11, OpAssertion.FloatAssertion, CPUFeature.SSE); 537 public static final AMD64MROp MOVSD = new AMD64MROp("MOVSD", P_0F, 0x11, OpAssertion.FloatAssertion, CPUFeature.SSE); 538 // @formatter:on 539 540 protected AMD64MROp(String opcode, int op) { 541 this(opcode, 0, op); 542 } 543 544 protected AMD64MROp(String opcode, int op, OpAssertion assertion) { 545 this(opcode, 0, op, assertion); 546 } 547 548 protected AMD64MROp(String opcode, int prefix, int op) { 549 this(opcode, prefix, op, OpAssertion.WordOrLargerAssertion); 550 } 551 552 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion) { 553 this(opcode, prefix, op, assertion, null); 554 } 555 556 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 557 this(opcode, 0, prefix, op, assertion, feature); 558 } 559 560 protected AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 561 super(opcode, prefix1, prefix2, op, assertion, feature); 562 } 563 564 @Override 565 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) { 566 assert verify(asm, size, src, dst); 567 if (isSSEInstruction()) { 568 Register nds = Register.None; 569 switch (op) { 570 case 0x11: 571 if ((size == SS) || (size == SD)) { 572 nds = src; 573 } 574 break; 575 default: 576 break; 577 } 578 asm.simdPrefix(src, nds, dst, size, prefix1, prefix2, size == QWORD); 579 asm.emitByte(op); 580 asm.emitModRM(src, dst); 581 } else { 582 emitOpcode(asm, size, getRXB(src, dst), src.encoding, dst.encoding); 583 asm.emitModRM(src, dst); 584 } 585 } 586 587 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src) { 588 assert verify(asm, size, src, null); 589 if (isSSEInstruction()) { 590 asm.simdPrefix(src, Register.None, dst, size, prefix1, prefix2, size == QWORD); 591 asm.emitByte(op); 592 } else { 593 emitOpcode(asm, size, getRXB(src, dst), src.encoding, 0); 594 } 595 asm.emitOperandHelper(src, dst, 0); 596 } 597 } 598 599 /** 600 * Opcodes with operand order of M. 601 */ 602 public static class AMD64MOp extends AMD64Op { 603 // @formatter:off 604 public static final AMD64MOp NOT = new AMD64MOp("NOT", 0xF7, 2); 605 public static final AMD64MOp NEG = new AMD64MOp("NEG", 0xF7, 3); 606 public static final AMD64MOp MUL = new AMD64MOp("MUL", 0xF7, 4); 607 public static final AMD64MOp IMUL = new AMD64MOp("IMUL", 0xF7, 5); 608 public static final AMD64MOp DIV = new AMD64MOp("DIV", 0xF7, 6); 609 public static final AMD64MOp IDIV = new AMD64MOp("IDIV", 0xF7, 7); 610 public static final AMD64MOp INC = new AMD64MOp("INC", 0xFF, 0); 611 public static final AMD64MOp DEC = new AMD64MOp("DEC", 0xFF, 1); 612 public static final AMD64MOp PUSH = new AMD64MOp("PUSH", 0xFF, 6); 613 public static final AMD64MOp POP = new AMD64MOp("POP", 0x8F, 0, OpAssertion.WordOrDwordAssertion); 614 // @formatter:on 615 616 private final int ext; 617 618 protected AMD64MOp(String opcode, int op, int ext) { 619 this(opcode, 0, op, ext); 620 } 621 622 protected AMD64MOp(String opcode, int prefix, int op, int ext) { 623 this(opcode, prefix, op, ext, OpAssertion.WordOrLargerAssertion); 624 } 625 626 protected AMD64MOp(String opcode, int op, int ext, OpAssertion assertion) { 627 this(opcode, 0, op, ext, assertion); 628 } 629 630 protected AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion) { 631 super(opcode, 0, prefix, op, assertion, null); 632 this.ext = ext; 633 } 634 635 public final void emit(AMD64Assembler asm, OperandSize size, Register dst) { 636 assert verify(asm, size, dst, null); 637 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding); 638 asm.emitModRM(ext, dst); 639 } 640 641 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst) { 642 assert verify(asm, size, null, null); 643 emitOpcode(asm, size, getRXB(null, dst), 0, 0); 644 asm.emitOperandHelper(ext, dst, 0); 645 } 646 } 647 648 /** 649 * Opcodes with operand order of MI. 650 */ 651 public static class AMD64MIOp extends AMD64ImmOp { 652 // @formatter:off 653 public static final AMD64MIOp MOVB = new AMD64MIOp("MOVB", true, 0xC6, 0, OpAssertion.ByteAssertion); 654 public static final AMD64MIOp MOV = new AMD64MIOp("MOV", false, 0xC7, 0); 655 public static final AMD64MIOp TEST = new AMD64MIOp("TEST", false, 0xF7, 0); 656 // @formatter:on 657 658 private final int ext; 659 660 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext) { 661 this(opcode, immIsByte, op, ext, OpAssertion.WordOrLargerAssertion); 662 } 663 664 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion) { 665 this(opcode, immIsByte, 0, op, ext, assertion); 666 } 667 668 protected AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion) { 669 super(opcode, immIsByte, prefix, op, assertion); 670 this.ext = ext; 671 } 672 673 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm) { 674 emit(asm, size, dst, imm, false); 675 } 676 677 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm, boolean annotateImm) { 678 assert verify(asm, size, dst, null); 679 int insnPos = asm.position(); 680 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding); 681 asm.emitModRM(ext, dst); 682 int immPos = asm.position(); 683 emitImmediate(asm, size, imm); 684 int nextInsnPos = asm.position(); 685 if (annotateImm && asm.codePatchingAnnotationConsumer != null) { 686 asm.codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 687 } 688 } 689 690 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm) { 691 emit(asm, size, dst, imm, false); 692 } 693 694 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm, boolean annotateImm) { 695 assert verify(asm, size, null, null); 696 int insnPos = asm.position(); 697 emitOpcode(asm, size, getRXB(null, dst), 0, 0); 698 asm.emitOperandHelper(ext, dst, immediateSize(size)); 699 int immPos = asm.position(); 700 emitImmediate(asm, size, imm); 701 int nextInsnPos = asm.position(); 702 if (annotateImm && asm.codePatchingAnnotationConsumer != null) { 703 asm.codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 704 } 705 } 706 } 707 708 /** 709 * Opcodes with operand order of RMI. 710 * 711 * We only have one form of round as the operation is always treated with single variant input, 712 * making its extension to 3 address forms redundant. 713 */ 714 public static class AMD64RMIOp extends AMD64ImmOp { 715 // @formatter:off 716 public static final AMD64RMIOp IMUL = new AMD64RMIOp("IMUL", false, 0x69); 717 public static final AMD64RMIOp IMUL_SX = new AMD64RMIOp("IMUL", true, 0x6B); 718 public static final AMD64RMIOp ROUNDSS = new AMD64RMIOp("ROUNDSS", true, P_0F3A, 0x0A, OpAssertion.PackedDoubleAssertion, CPUFeature.SSE4_1); 719 public static final AMD64RMIOp ROUNDSD = new AMD64RMIOp("ROUNDSD", true, P_0F3A, 0x0B, OpAssertion.PackedDoubleAssertion, CPUFeature.SSE4_1); 720 // @formatter:on 721 722 protected AMD64RMIOp(String opcode, boolean immIsByte, int op) { 723 this(opcode, immIsByte, 0, op, OpAssertion.WordOrLargerAssertion, null); 724 } 725 726 protected AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 727 super(opcode, immIsByte, prefix, op, assertion, feature); 728 } 729 730 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm) { 731 assert verify(asm, size, dst, src); 732 if (isSSEInstruction()) { 733 Register nds = Register.None; 734 switch (op) { 735 case 0x0A: 736 case 0x0B: 737 nds = dst; 738 break; 739 default: 740 break; 741 } 742 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, false); 743 asm.emitByte(op); 744 asm.emitModRM(dst, src); 745 } else { 746 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding); 747 asm.emitModRM(dst, src); 748 } 749 emitImmediate(asm, size, imm); 750 } 751 752 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm) { 753 assert verify(asm, size, dst, null); 754 if (isSSEInstruction()) { 755 Register nds = Register.None; 756 switch (op) { 757 case 0x0A: 758 case 0x0B: 759 nds = dst; 760 break; 761 default: 762 break; 763 } 764 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, false); 765 asm.emitByte(op); 766 } else { 767 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0); 768 } 769 asm.emitOperandHelper(dst, src, immediateSize(size)); 770 emitImmediate(asm, size, imm); 771 } 772 } 773 774 public static class SSEOp extends AMD64RMOp { 775 // @formatter:off 776 public static final SSEOp CVTSI2SS = new SSEOp("CVTSI2SS", 0xF3, P_0F, 0x2A, OpAssertion.IntToFloatAssertion); 777 public static final SSEOp CVTSI2SD = new SSEOp("CVTSI2SD", 0xF2, P_0F, 0x2A, OpAssertion.IntToFloatAssertion); 778 public static final SSEOp CVTTSS2SI = new SSEOp("CVTTSS2SI", 0xF3, P_0F, 0x2C, OpAssertion.FloatToIntAssertion); 779 public static final SSEOp CVTTSD2SI = new SSEOp("CVTTSD2SI", 0xF2, P_0F, 0x2C, OpAssertion.FloatToIntAssertion); 780 public static final SSEOp UCOMIS = new SSEOp("UCOMIS", P_0F, 0x2E, OpAssertion.PackedFloatAssertion); 781 public static final SSEOp SQRT = new SSEOp("SQRT", P_0F, 0x51); 782 public static final SSEOp AND = new SSEOp("AND", P_0F, 0x54, OpAssertion.PackedFloatAssertion); 783 public static final SSEOp ANDN = new SSEOp("ANDN", P_0F, 0x55, OpAssertion.PackedFloatAssertion); 784 public static final SSEOp OR = new SSEOp("OR", P_0F, 0x56, OpAssertion.PackedFloatAssertion); 785 public static final SSEOp XOR = new SSEOp("XOR", P_0F, 0x57, OpAssertion.PackedFloatAssertion); 786 public static final SSEOp ADD = new SSEOp("ADD", P_0F, 0x58); 787 public static final SSEOp MUL = new SSEOp("MUL", P_0F, 0x59); 788 public static final SSEOp CVTSS2SD = new SSEOp("CVTSS2SD", P_0F, 0x5A, OpAssertion.SingleAssertion); 789 public static final SSEOp CVTSD2SS = new SSEOp("CVTSD2SS", P_0F, 0x5A, OpAssertion.DoubleAssertion); 790 public static final SSEOp SUB = new SSEOp("SUB", P_0F, 0x5C); 791 public static final SSEOp MIN = new SSEOp("MIN", P_0F, 0x5D); 792 public static final SSEOp DIV = new SSEOp("DIV", P_0F, 0x5E); 793 public static final SSEOp MAX = new SSEOp("MAX", P_0F, 0x5F); 794 // @formatter:on 795 796 protected SSEOp(String opcode, int prefix, int op) { 797 this(opcode, prefix, op, OpAssertion.FloatAssertion); 798 } 799 800 protected SSEOp(String opcode, int prefix, int op, OpAssertion assertion) { 801 this(opcode, 0, prefix, op, assertion); 802 } 803 804 protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) { 805 super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.SSE2); 806 } 807 } 808 809 /** 810 * Arithmetic operation with operand order of RM, MR or MI. 811 */ 812 public static final class AMD64BinaryArithmetic { 813 // @formatter:off 814 public static final AMD64BinaryArithmetic ADD = new AMD64BinaryArithmetic("ADD", 0); 815 public static final AMD64BinaryArithmetic OR = new AMD64BinaryArithmetic("OR", 1); 816 public static final AMD64BinaryArithmetic ADC = new AMD64BinaryArithmetic("ADC", 2); 817 public static final AMD64BinaryArithmetic SBB = new AMD64BinaryArithmetic("SBB", 3); 818 public static final AMD64BinaryArithmetic AND = new AMD64BinaryArithmetic("AND", 4); 819 public static final AMD64BinaryArithmetic SUB = new AMD64BinaryArithmetic("SUB", 5); 820 public static final AMD64BinaryArithmetic XOR = new AMD64BinaryArithmetic("XOR", 6); 821 public static final AMD64BinaryArithmetic CMP = new AMD64BinaryArithmetic("CMP", 7); 822 // @formatter:on 823 824 private final AMD64MIOp byteImmOp; 825 private final AMD64MROp byteMrOp; 826 private final AMD64RMOp byteRmOp; 827 828 private final AMD64MIOp immOp; 829 private final AMD64MIOp immSxOp; 830 private final AMD64MROp mrOp; 831 private final AMD64RMOp rmOp; 832 833 private AMD64BinaryArithmetic(String opcode, int code) { 834 int baseOp = code << 3; 835 836 byteImmOp = new AMD64MIOp(opcode, true, 0, 0x80, code, OpAssertion.ByteAssertion); 837 byteMrOp = new AMD64MROp(opcode, 0, baseOp, OpAssertion.ByteAssertion); 838 byteRmOp = new AMD64RMOp(opcode, 0, baseOp | 0x02, OpAssertion.ByteAssertion); 839 840 immOp = new AMD64MIOp(opcode, false, 0, 0x81, code, OpAssertion.WordOrLargerAssertion); 841 immSxOp = new AMD64MIOp(opcode, true, 0, 0x83, code, OpAssertion.WordOrLargerAssertion); 842 mrOp = new AMD64MROp(opcode, 0, baseOp | 0x01, OpAssertion.WordOrLargerAssertion); 843 rmOp = new AMD64RMOp(opcode, 0, baseOp | 0x03, OpAssertion.WordOrLargerAssertion); 844 } 845 846 public AMD64MIOp getMIOpcode(OperandSize size, boolean sx) { 847 if (size == BYTE) { 848 return byteImmOp; 849 } else if (sx) { 850 return immSxOp; 851 } else { 852 return immOp; 853 } 854 } 855 856 public AMD64MROp getMROpcode(OperandSize size) { 857 if (size == BYTE) { 858 return byteMrOp; 859 } else { 860 return mrOp; 861 } 862 } 863 864 public AMD64RMOp getRMOpcode(OperandSize size) { 865 if (size == BYTE) { 866 return byteRmOp; 867 } else { 868 return rmOp; 869 } 870 } 871 } 872 873 /** 874 * Shift operation with operand order of M1, MC or MI. 875 */ 876 public static final class AMD64Shift { 877 // @formatter:off 878 public static final AMD64Shift ROL = new AMD64Shift("ROL", 0); 879 public static final AMD64Shift ROR = new AMD64Shift("ROR", 1); 880 public static final AMD64Shift RCL = new AMD64Shift("RCL", 2); 881 public static final AMD64Shift RCR = new AMD64Shift("RCR", 3); 882 public static final AMD64Shift SHL = new AMD64Shift("SHL", 4); 883 public static final AMD64Shift SHR = new AMD64Shift("SHR", 5); 884 public static final AMD64Shift SAR = new AMD64Shift("SAR", 7); 885 // @formatter:on 886 887 public final AMD64MOp m1Op; 888 public final AMD64MOp mcOp; 889 public final AMD64MIOp miOp; 890 891 private AMD64Shift(String opcode, int code) { 892 m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.WordOrLargerAssertion); 893 mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.WordOrLargerAssertion); 894 miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.WordOrLargerAssertion); 895 } 896 } 897 898 private enum VEXOpAssertion { 899 AVX1(CPUFeature.AVX, CPUFeature.AVX), 900 AVX1_2(CPUFeature.AVX, CPUFeature.AVX2), 901 AVX2(CPUFeature.AVX2, CPUFeature.AVX2), 902 AVX1_128ONLY(CPUFeature.AVX, null), 903 AVX1_256ONLY(null, CPUFeature.AVX), 904 AVX2_256ONLY(null, CPUFeature.AVX2), 905 XMM_CPU(CPUFeature.AVX, null, XMM, null, CPU, null), 906 XMM_XMM_CPU(CPUFeature.AVX, null, XMM, XMM, CPU, null), 907 CPU_XMM(CPUFeature.AVX, null, CPU, null, XMM, null), 908 AVX1_2_CPU_XMM(CPUFeature.AVX, CPUFeature.AVX2, CPU, null, XMM, null), 909 BMI1(CPUFeature.BMI1, null, CPU, CPU, CPU, null), 910 BMI2(CPUFeature.BMI2, null, CPU, CPU, CPU, null); 911 912 private final CPUFeature l128feature; 913 private final CPUFeature l256feature; 914 915 private final RegisterCategory rCategory; 916 private final RegisterCategory vCategory; 917 private final RegisterCategory mCategory; 918 private final RegisterCategory imm8Category; 919 920 VEXOpAssertion(CPUFeature l128feature, CPUFeature l256feature) { 921 this(l128feature, l256feature, XMM, XMM, XMM, XMM); 922 } 923 924 VEXOpAssertion(CPUFeature l128feature, CPUFeature l256feature, RegisterCategory rCategory, RegisterCategory vCategory, RegisterCategory mCategory, RegisterCategory imm8Category) { 925 this.l128feature = l128feature; 926 this.l256feature = l256feature; 927 this.rCategory = rCategory; 928 this.vCategory = vCategory; 929 this.mCategory = mCategory; 930 this.imm8Category = imm8Category; 931 } 932 933 public boolean check(AMD64 arch, AVXSize size, Register r, Register v, Register m) { 934 return check(arch, getLFlag(size), r, v, m, null); 935 } 936 937 public boolean check(AMD64 arch, AVXSize size, Register r, Register v, Register m, Register imm8) { 938 return check(arch, getLFlag(size), r, v, m, imm8); 939 } 940 941 public boolean check(AMD64 arch, int l, Register r, Register v, Register m, Register imm8) { 942 switch (l) { 943 case L128: 944 assert l128feature != null && arch.getFeatures().contains(l128feature) : "emitting illegal 128 bit instruction"; 945 break; 946 case L256: 947 assert l256feature != null && arch.getFeatures().contains(l256feature) : "emitting illegal 256 bit instruction"; 948 break; 949 } 950 if (r != null) { 951 assert r.getRegisterCategory().equals(rCategory); 952 } 953 if (v != null) { 954 assert v.getRegisterCategory().equals(vCategory); 955 } 956 if (m != null) { 957 assert m.getRegisterCategory().equals(mCategory); 958 } 959 if (imm8 != null) { 960 assert imm8.getRegisterCategory().equals(imm8Category); 961 } 962 return true; 963 } 964 965 public boolean supports(EnumSet<CPUFeature> features, AVXSize avxSize) { 966 switch (avxSize) { 967 case XMM: 968 return l128feature != null && features.contains(l128feature); 969 case YMM: 970 return l256feature != null && features.contains(l256feature); 971 default: 972 throw GraalError.shouldNotReachHere(); 973 } 974 } 975 } 976 977 /** 978 * Base class for VEX-encoded instructions. 979 */ 980 public static class VexOp { 981 protected final int pp; 982 protected final int mmmmm; 983 protected final int w; 984 protected final int op; 985 986 private final String opcode; 987 protected final VEXOpAssertion assertion; 988 989 protected VexOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 990 this.pp = pp; 991 this.mmmmm = mmmmm; 992 this.w = w; 993 this.op = op; 994 this.opcode = opcode; 995 this.assertion = assertion; 996 } 997 998 public final boolean isSupported(AMD64Assembler vasm, AVXSize size) { 999 return assertion.supports(((AMD64) vasm.target.arch).getFeatures(), size); 1000 } 1001 1002 @Override 1003 public String toString() { 1004 return opcode; 1005 } 1006 } 1007 1008 /** 1009 * VEX-encoded instructions with an operand order of RM, but the M operand must be a register. 1010 */ 1011 public static class VexRROp extends VexOp { 1012 // @formatter:off 1013 public static final VexRROp VMASKMOVDQU = new VexRROp("VMASKMOVDQU", P_66, M_0F, WIG, 0xF7, VEXOpAssertion.AVX1_128ONLY); 1014 // @formatter:on 1015 1016 protected VexRROp(String opcode, int pp, int mmmmm, int w, int op) { 1017 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1018 } 1019 1020 protected VexRROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1021 super(opcode, pp, mmmmm, w, op, assertion); 1022 } 1023 1024 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) { 1025 assert assertion.check((AMD64) asm.target.arch, size, dst, null, src); 1026 assert op != 0x1A || op != 0x5A; 1027 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); 1028 asm.emitByte(op); 1029 asm.emitModRM(dst, src); 1030 } 1031 } 1032 1033 /** 1034 * VEX-encoded instructions with an operand order of RM. 1035 */ 1036 public static class VexRMOp extends VexRROp { 1037 // @formatter:off 1038 public static final VexRMOp VCVTTSS2SI = new VexRMOp("VCVTTSS2SI", P_F3, M_0F, W0, 0x2C, VEXOpAssertion.CPU_XMM); 1039 public static final VexRMOp VCVTTSS2SQ = new VexRMOp("VCVTTSS2SQ", P_F3, M_0F, W1, 0x2C, VEXOpAssertion.CPU_XMM); 1040 public static final VexRMOp VCVTTSD2SI = new VexRMOp("VCVTTSD2SI", P_F2, M_0F, W0, 0x2C, VEXOpAssertion.CPU_XMM); 1041 public static final VexRMOp VCVTTSD2SQ = new VexRMOp("VCVTTSD2SQ", P_F2, M_0F, W1, 0x2C, VEXOpAssertion.CPU_XMM); 1042 public static final VexRMOp VCVTPS2PD = new VexRMOp("VCVTPS2PD", P_, M_0F, WIG, 0x5A); 1043 public static final VexRMOp VCVTPD2PS = new VexRMOp("VCVTPD2PS", P_66, M_0F, WIG, 0x5A); 1044 public static final VexRMOp VCVTDQ2PS = new VexRMOp("VCVTDQ2PS", P_, M_0F, WIG, 0x5B); 1045 public static final VexRMOp VCVTTPS2DQ = new VexRMOp("VCVTTPS2DQ", P_F3, M_0F, WIG, 0x5B); 1046 public static final VexRMOp VCVTTPD2DQ = new VexRMOp("VCVTTPD2DQ", P_66, M_0F, WIG, 0xE6); 1047 public static final VexRMOp VCVTDQ2PD = new VexRMOp("VCVTDQ2PD", P_F3, M_0F, WIG, 0xE6); 1048 public static final VexRMOp VBROADCASTSS = new VexRMOp("VBROADCASTSS", P_66, M_0F38, W0, 0x18); 1049 public static final VexRMOp VBROADCASTSD = new VexRMOp("VBROADCASTSD", P_66, M_0F38, W0, 0x19, VEXOpAssertion.AVX1_256ONLY); 1050 public static final VexRMOp VBROADCASTF128 = new VexRMOp("VBROADCASTF128", P_66, M_0F38, W0, 0x1A, VEXOpAssertion.AVX1_256ONLY); 1051 public static final VexRMOp VPBROADCASTI128 = new VexRMOp("VPBROADCASTI128", P_66, M_0F38, W0, 0x5A, VEXOpAssertion.AVX2_256ONLY); 1052 public static final VexRMOp VPBROADCASTB = new VexRMOp("VPBROADCASTB", P_66, M_0F38, W0, 0x78, VEXOpAssertion.AVX2); 1053 public static final VexRMOp VPBROADCASTW = new VexRMOp("VPBROADCASTW", P_66, M_0F38, W0, 0x79, VEXOpAssertion.AVX2); 1054 public static final VexRMOp VPBROADCASTD = new VexRMOp("VPBROADCASTD", P_66, M_0F38, W0, 0x58, VEXOpAssertion.AVX2); 1055 public static final VexRMOp VPBROADCASTQ = new VexRMOp("VPBROADCASTQ", P_66, M_0F38, W0, 0x59, VEXOpAssertion.AVX2); 1056 public static final VexRMOp VPMOVMSKB = new VexRMOp("VPMOVMSKB", P_66, M_0F, WIG, 0xD7, VEXOpAssertion.AVX1_2_CPU_XMM); 1057 public static final VexRMOp VPMOVSXBW = new VexRMOp("VPMOVSXBW", P_66, M_0F38, WIG, 0x20); 1058 public static final VexRMOp VPMOVSXBD = new VexRMOp("VPMOVSXBD", P_66, M_0F38, WIG, 0x21); 1059 public static final VexRMOp VPMOVSXBQ = new VexRMOp("VPMOVSXBQ", P_66, M_0F38, WIG, 0x22); 1060 public static final VexRMOp VPMOVSXWD = new VexRMOp("VPMOVSXWD", P_66, M_0F38, WIG, 0x23); 1061 public static final VexRMOp VPMOVSXWQ = new VexRMOp("VPMOVSXWQ", P_66, M_0F38, WIG, 0x24); 1062 public static final VexRMOp VPMOVSXDQ = new VexRMOp("VPMOVSXDQ", P_66, M_0F38, WIG, 0x25); 1063 public static final VexRMOp VPMOVZXBW = new VexRMOp("VPMOVZXBW", P_66, M_0F38, WIG, 0x30); 1064 public static final VexRMOp VPMOVZXBD = new VexRMOp("VPMOVZXBD", P_66, M_0F38, WIG, 0x31); 1065 public static final VexRMOp VPMOVZXBQ = new VexRMOp("VPMOVZXBQ", P_66, M_0F38, WIG, 0x32); 1066 public static final VexRMOp VPMOVZXWD = new VexRMOp("VPMOVZXWD", P_66, M_0F38, WIG, 0x33); 1067 public static final VexRMOp VPMOVZXWQ = new VexRMOp("VPMOVZXWQ", P_66, M_0F38, WIG, 0x34); 1068 public static final VexRMOp VPMOVZXDQ = new VexRMOp("VPMOVZXDQ", P_66, M_0F38, WIG, 0x35); 1069 public static final VexRMOp VPTEST = new VexRMOp("VPTEST", P_66, M_0F38, WIG, 0x17); 1070 public static final VexRMOp VSQRTPD = new VexRMOp("VSQRTPD", P_66, M_0F, WIG, 0x51); 1071 public static final VexRMOp VSQRTPS = new VexRMOp("VSQRTPS", P_, M_0F, WIG, 0x51); 1072 public static final VexRMOp VSQRTSD = new VexRMOp("VSQRTSD", P_F2, M_0F, WIG, 0x51); 1073 public static final VexRMOp VSQRTSS = new VexRMOp("VSQRTSS", P_F3, M_0F, WIG, 0x51); 1074 public static final VexRMOp VUCOMISS = new VexRMOp("VUCOMISS", P_, M_0F, WIG, 0x2E); 1075 public static final VexRMOp VUCOMISD = new VexRMOp("VUCOMISD", P_66, M_0F, WIG, 0x2E); 1076 // @formatter:on 1077 1078 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op) { 1079 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1080 } 1081 1082 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1083 super(opcode, pp, mmmmm, w, op, assertion); 1084 } 1085 1086 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) { 1087 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1088 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); 1089 asm.emitByte(op); 1090 asm.emitOperandHelper(dst, src, 0); 1091 } 1092 } 1093 1094 /** 1095 * VEX-encoded move instructions. 1096 * <p> 1097 * These instructions have two opcodes: op is the forward move instruction with an operand order 1098 * of RM, and opReverse is the reverse move instruction with an operand order of MR. 1099 */ 1100 public static final class VexMoveOp extends VexRMOp { 1101 // @formatter:off 1102 public static final VexMoveOp VMOVDQA = new VexMoveOp("VMOVDQA", P_66, M_0F, WIG, 0x6F, 0x7F); 1103 public static final VexMoveOp VMOVDQU = new VexMoveOp("VMOVDQU", P_F3, M_0F, WIG, 0x6F, 0x7F); 1104 public static final VexMoveOp VMOVAPS = new VexMoveOp("VMOVAPS", P_, M_0F, WIG, 0x28, 0x29); 1105 public static final VexMoveOp VMOVAPD = new VexMoveOp("VMOVAPD", P_66, M_0F, WIG, 0x28, 0x29); 1106 public static final VexMoveOp VMOVUPS = new VexMoveOp("VMOVUPS", P_, M_0F, WIG, 0x10, 0x11); 1107 public static final VexMoveOp VMOVUPD = new VexMoveOp("VMOVUPD", P_66, M_0F, WIG, 0x10, 0x11); 1108 public static final VexMoveOp VMOVSS = new VexMoveOp("VMOVSS", P_F3, M_0F, WIG, 0x10, 0x11); 1109 public static final VexMoveOp VMOVSD = new VexMoveOp("VMOVSD", P_F2, M_0F, WIG, 0x10, 0x11); 1110 public static final VexMoveOp VMOVD = new VexMoveOp("VMOVD", P_66, M_0F, W0, 0x6E, 0x7E, VEXOpAssertion.XMM_CPU); 1111 public static final VexMoveOp VMOVQ = new VexMoveOp("VMOVQ", P_66, M_0F, W1, 0x6E, 0x7E, VEXOpAssertion.XMM_CPU); 1112 // @formatter:on 1113 1114 private final int opReverse; 1115 1116 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) { 1117 this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1); 1118 } 1119 1120 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) { 1121 super(opcode, pp, mmmmm, w, op, assertion); 1122 this.opReverse = opReverse; 1123 } 1124 1125 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src) { 1126 assert assertion.check((AMD64) asm.target.arch, size, src, null, null); 1127 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); 1128 asm.emitByte(opReverse); 1129 asm.emitOperandHelper(src, dst, 0); 1130 } 1131 1132 public void emitReverse(AMD64Assembler asm, AVXSize size, Register dst, Register src) { 1133 assert assertion.check((AMD64) asm.target.arch, size, src, null, dst); 1134 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); 1135 asm.emitByte(opReverse); 1136 asm.emitModRM(src, dst); 1137 } 1138 } 1139 1140 public interface VexRRIOp { 1141 void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8); 1142 } 1143 1144 /** 1145 * VEX-encoded instructions with an operand order of RMI. 1146 */ 1147 public static final class VexRMIOp extends VexOp implements VexRRIOp { 1148 // @formatter:off 1149 public static final VexRMIOp VPERMQ = new VexRMIOp("VPERMQ", P_66, M_0F3A, W1, 0x00, VEXOpAssertion.AVX2_256ONLY); 1150 public static final VexRMIOp VPSHUFLW = new VexRMIOp("VPSHUFLW", P_F2, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2); 1151 public static final VexRMIOp VPSHUFHW = new VexRMIOp("VPSHUFHW", P_F3, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2); 1152 public static final VexRMIOp VPSHUFD = new VexRMIOp("VPSHUFD", P_66, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2); 1153 // @formatter:on 1154 1155 private VexRMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1156 super(opcode, pp, mmmmm, w, op, assertion); 1157 } 1158 1159 @Override 1160 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { 1161 assert assertion.check((AMD64) asm.target.arch, size, dst, null, src); 1162 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); 1163 asm.emitByte(op); 1164 asm.emitModRM(dst, src); 1165 asm.emitByte(imm8); 1166 } 1167 1168 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src, int imm8) { 1169 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1170 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); 1171 asm.emitByte(op); 1172 asm.emitOperandHelper(dst, src, 1); 1173 asm.emitByte(imm8); 1174 } 1175 } 1176 1177 /** 1178 * VEX-encoded instructions with an operand order of MRI. 1179 */ 1180 public static final class VexMRIOp extends VexOp implements VexRRIOp { 1181 // @formatter:off 1182 public static final VexMRIOp VEXTRACTF128 = new VexMRIOp("VEXTRACTF128", P_66, M_0F3A, W0, 0x19, VEXOpAssertion.AVX1_256ONLY); 1183 public static final VexMRIOp VEXTRACTI128 = new VexMRIOp("VEXTRACTI128", P_66, M_0F3A, W0, 0x39, VEXOpAssertion.AVX2_256ONLY); 1184 public static final VexMRIOp VPEXTRB = new VexMRIOp("VPEXTRB", P_66, M_0F3A, W0, 0x14, VEXOpAssertion.XMM_CPU); 1185 public static final VexMRIOp VPEXTRW = new VexMRIOp("VPEXTRW", P_66, M_0F3A, W0, 0x15, VEXOpAssertion.XMM_CPU); 1186 public static final VexMRIOp VPEXTRD = new VexMRIOp("VPEXTRD", P_66, M_0F3A, W0, 0x16, VEXOpAssertion.XMM_CPU); 1187 public static final VexMRIOp VPEXTRQ = new VexMRIOp("VPEXTRQ", P_66, M_0F3A, W1, 0x16, VEXOpAssertion.XMM_CPU); 1188 // @formatter:on 1189 1190 private VexMRIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1191 super(opcode, pp, mmmmm, w, op, assertion); 1192 } 1193 1194 @Override 1195 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { 1196 assert assertion.check((AMD64) asm.target.arch, size, src, null, dst); 1197 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); 1198 asm.emitByte(op); 1199 asm.emitModRM(src, dst); 1200 asm.emitByte(imm8); 1201 } 1202 1203 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src, int imm8) { 1204 assert assertion.check((AMD64) asm.target.arch, size, src, null, null); 1205 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); 1206 asm.emitByte(op); 1207 asm.emitOperandHelper(src, dst, 1); 1208 asm.emitByte(imm8); 1209 } 1210 } 1211 1212 /** 1213 * VEX-encoded instructions with an operand order of RVMR. 1214 */ 1215 public static class VexRVMROp extends VexOp { 1216 // @formatter:off 1217 public static final VexRVMROp VPBLENDVB = new VexRVMROp("VPBLENDVB", P_66, M_0F3A, W0, 0x4C, VEXOpAssertion.AVX1_2); 1218 public static final VexRVMROp VPBLENDVPS = new VexRVMROp("VPBLENDVPS", P_66, M_0F3A, W0, 0x4A, VEXOpAssertion.AVX1); 1219 public static final VexRVMROp VPBLENDVPD = new VexRVMROp("VPBLENDVPD", P_66, M_0F3A, W0, 0x4B, VEXOpAssertion.AVX1); 1220 // @formatter:on 1221 1222 protected VexRVMROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1223 super(opcode, pp, mmmmm, w, op, assertion); 1224 } 1225 1226 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, Register src2) { 1227 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, src2); 1228 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1229 asm.emitByte(op); 1230 asm.emitModRM(dst, src2); 1231 asm.emitByte(mask.encoding() << 4); 1232 } 1233 1234 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, AMD64Address src2) { 1235 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, null); 1236 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1237 asm.emitByte(op); 1238 asm.emitOperandHelper(dst, src2, 0); 1239 asm.emitByte(mask.encoding() << 4); 1240 } 1241 } 1242 1243 /** 1244 * VEX-encoded instructions with an operand order of RVM. 1245 */ 1246 public static class VexRVMOp extends VexOp { 1247 // @formatter:off 1248 public static final VexRVMOp VANDPS = new VexRVMOp("VANDPS", P_, M_0F, WIG, 0x54); 1249 public static final VexRVMOp VANDPD = new VexRVMOp("VANDPD", P_66, M_0F, WIG, 0x54); 1250 public static final VexRVMOp VANDNPS = new VexRVMOp("VANDNPS", P_, M_0F, WIG, 0x55); 1251 public static final VexRVMOp VANDNPD = new VexRVMOp("VANDNPD", P_66, M_0F, WIG, 0x55); 1252 public static final VexRVMOp VORPS = new VexRVMOp("VORPS", P_, M_0F, WIG, 0x56); 1253 public static final VexRVMOp VORPD = new VexRVMOp("VORPD", P_66, M_0F, WIG, 0x56); 1254 public static final VexRVMOp VXORPS = new VexRVMOp("VXORPS", P_, M_0F, WIG, 0x57); 1255 public static final VexRVMOp VXORPD = new VexRVMOp("VXORPD", P_66, M_0F, WIG, 0x57); 1256 public static final VexRVMOp VADDPS = new VexRVMOp("VADDPS", P_, M_0F, WIG, 0x58); 1257 public static final VexRVMOp VADDPD = new VexRVMOp("VADDPD", P_66, M_0F, WIG, 0x58); 1258 public static final VexRVMOp VADDSS = new VexRVMOp("VADDSS", P_F3, M_0F, WIG, 0x58); 1259 public static final VexRVMOp VADDSD = new VexRVMOp("VADDSD", P_F2, M_0F, WIG, 0x58); 1260 public static final VexRVMOp VMULPS = new VexRVMOp("VMULPS", P_, M_0F, WIG, 0x59); 1261 public static final VexRVMOp VMULPD = new VexRVMOp("VMULPD", P_66, M_0F, WIG, 0x59); 1262 public static final VexRVMOp VMULSS = new VexRVMOp("VMULSS", P_F3, M_0F, WIG, 0x59); 1263 public static final VexRVMOp VMULSD = new VexRVMOp("VMULSD", P_F2, M_0F, WIG, 0x59); 1264 public static final VexRVMOp VSUBPS = new VexRVMOp("VSUBPS", P_, M_0F, WIG, 0x5C); 1265 public static final VexRVMOp VSUBPD = new VexRVMOp("VSUBPD", P_66, M_0F, WIG, 0x5C); 1266 public static final VexRVMOp VSUBSS = new VexRVMOp("VSUBSS", P_F3, M_0F, WIG, 0x5C); 1267 public static final VexRVMOp VSUBSD = new VexRVMOp("VSUBSD", P_F2, M_0F, WIG, 0x5C); 1268 public static final VexRVMOp VMINPS = new VexRVMOp("VMINPS", P_, M_0F, WIG, 0x5D); 1269 public static final VexRVMOp VMINPD = new VexRVMOp("VMINPD", P_66, M_0F, WIG, 0x5D); 1270 public static final VexRVMOp VMINSS = new VexRVMOp("VMINSS", P_F3, M_0F, WIG, 0x5D); 1271 public static final VexRVMOp VMINSD = new VexRVMOp("VMINSD", P_F2, M_0F, WIG, 0x5D); 1272 public static final VexRVMOp VDIVPS = new VexRVMOp("VDIVPS", P_, M_0F, WIG, 0x5E); 1273 public static final VexRVMOp VDIVPD = new VexRVMOp("VDIVPD", P_66, M_0F, WIG, 0x5E); 1274 public static final VexRVMOp VDIVSS = new VexRVMOp("VDIVPS", P_F3, M_0F, WIG, 0x5E); 1275 public static final VexRVMOp VDIVSD = new VexRVMOp("VDIVPD", P_F2, M_0F, WIG, 0x5E); 1276 public static final VexRVMOp VMAXPS = new VexRVMOp("VMAXPS", P_, M_0F, WIG, 0x5F); 1277 public static final VexRVMOp VMAXPD = new VexRVMOp("VMAXPD", P_66, M_0F, WIG, 0x5F); 1278 public static final VexRVMOp VMAXSS = new VexRVMOp("VMAXSS", P_F3, M_0F, WIG, 0x5F); 1279 public static final VexRVMOp VMAXSD = new VexRVMOp("VMAXSD", P_F2, M_0F, WIG, 0x5F); 1280 public static final VexRVMOp VADDSUBPS = new VexRVMOp("VADDSUBPS", P_F2, M_0F, WIG, 0xD0); 1281 public static final VexRVMOp VADDSUBPD = new VexRVMOp("VADDSUBPD", P_66, M_0F, WIG, 0xD0); 1282 public static final VexRVMOp VPAND = new VexRVMOp("VPAND", P_66, M_0F, WIG, 0xDB, VEXOpAssertion.AVX1_2); 1283 public static final VexRVMOp VPOR = new VexRVMOp("VPOR", P_66, M_0F, WIG, 0xEB, VEXOpAssertion.AVX1_2); 1284 public static final VexRVMOp VPXOR = new VexRVMOp("VPXOR", P_66, M_0F, WIG, 0xEF, VEXOpAssertion.AVX1_2); 1285 public static final VexRVMOp VPADDB = new VexRVMOp("VPADDB", P_66, M_0F, WIG, 0xFC, VEXOpAssertion.AVX1_2); 1286 public static final VexRVMOp VPADDW = new VexRVMOp("VPADDW", P_66, M_0F, WIG, 0xFD, VEXOpAssertion.AVX1_2); 1287 public static final VexRVMOp VPADDD = new VexRVMOp("VPADDD", P_66, M_0F, WIG, 0xFE, VEXOpAssertion.AVX1_2); 1288 public static final VexRVMOp VPADDQ = new VexRVMOp("VPADDQ", P_66, M_0F, WIG, 0xD4, VEXOpAssertion.AVX1_2); 1289 public static final VexRVMOp VPMULHUW = new VexRVMOp("VPMULHUW", P_66, M_0F, WIG, 0xE4, VEXOpAssertion.AVX1_2); 1290 public static final VexRVMOp VPMULHW = new VexRVMOp("VPMULHW", P_66, M_0F, WIG, 0xE5, VEXOpAssertion.AVX1_2); 1291 public static final VexRVMOp VPMULLW = new VexRVMOp("VPMULLW", P_66, M_0F, WIG, 0xD5, VEXOpAssertion.AVX1_2); 1292 public static final VexRVMOp VPMULLD = new VexRVMOp("VPMULLD", P_66, M_0F38, WIG, 0x40, VEXOpAssertion.AVX1_2); 1293 public static final VexRVMOp VPSUBB = new VexRVMOp("VPSUBB", P_66, M_0F, WIG, 0xF8, VEXOpAssertion.AVX1_2); 1294 public static final VexRVMOp VPSUBW = new VexRVMOp("VPSUBW", P_66, M_0F, WIG, 0xF9, VEXOpAssertion.AVX1_2); 1295 public static final VexRVMOp VPSUBD = new VexRVMOp("VPSUBD", P_66, M_0F, WIG, 0xFA, VEXOpAssertion.AVX1_2); 1296 public static final VexRVMOp VPSUBQ = new VexRVMOp("VPSUBQ", P_66, M_0F, WIG, 0xFB, VEXOpAssertion.AVX1_2); 1297 public static final VexRVMOp VPSHUFB = new VexRVMOp("VPSHUFB", P_66, M_0F38, WIG, 0x00, VEXOpAssertion.AVX1_2); 1298 public static final VexRVMOp VCVTSD2SS = new VexRVMOp("VCVTSD2SS", P_F2, M_0F, WIG, 0x5A); 1299 public static final VexRVMOp VCVTSS2SD = new VexRVMOp("VCVTSS2SD", P_F3, M_0F, WIG, 0x5A); 1300 public static final VexRVMOp VCVTSI2SD = new VexRVMOp("VCVTSI2SD", P_F2, M_0F, W0, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1301 public static final VexRVMOp VCVTSQ2SD = new VexRVMOp("VCVTSQ2SD", P_F2, M_0F, W1, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1302 public static final VexRVMOp VCVTSI2SS = new VexRVMOp("VCVTSI2SS", P_F3, M_0F, W0, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1303 public static final VexRVMOp VCVTSQ2SS = new VexRVMOp("VCVTSQ2SS", P_F3, M_0F, W1, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1304 public static final VexRVMOp VPCMPEQB = new VexRVMOp("VPCMPEQB", P_66, M_0F, WIG, 0x74, VEXOpAssertion.AVX1_2); 1305 public static final VexRVMOp VPCMPEQW = new VexRVMOp("VPCMPEQW", P_66, M_0F, WIG, 0x75, VEXOpAssertion.AVX1_2); 1306 public static final VexRVMOp VPCMPEQD = new VexRVMOp("VPCMPEQD", P_66, M_0F, WIG, 0x76, VEXOpAssertion.AVX1_2); 1307 public static final VexRVMOp VPCMPEQQ = new VexRVMOp("VPCMPEQQ", P_66, M_0F38, WIG, 0x29, VEXOpAssertion.AVX1_2); 1308 public static final VexRVMOp VPCMPGTB = new VexRVMOp("VPCMPGTB", P_66, M_0F, WIG, 0x64, VEXOpAssertion.AVX1_2); 1309 public static final VexRVMOp VPCMPGTW = new VexRVMOp("VPCMPGTW", P_66, M_0F, WIG, 0x65, VEXOpAssertion.AVX1_2); 1310 public static final VexRVMOp VPCMPGTD = new VexRVMOp("VPCMPGTD", P_66, M_0F, WIG, 0x66, VEXOpAssertion.AVX1_2); 1311 public static final VexRVMOp VPCMPGTQ = new VexRVMOp("VPCMPGTQ", P_66, M_0F38, WIG, 0x37, VEXOpAssertion.AVX1_2); 1312 // @formatter:on 1313 1314 private VexRVMOp(String opcode, int pp, int mmmmm, int w, int op) { 1315 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1316 } 1317 1318 protected VexRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1319 super(opcode, pp, mmmmm, w, op, assertion); 1320 } 1321 1322 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { 1323 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); 1324 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1325 asm.emitByte(op); 1326 asm.emitModRM(dst, src2); 1327 } 1328 1329 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) { 1330 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); 1331 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1332 asm.emitByte(op); 1333 asm.emitOperandHelper(dst, src2, 0); 1334 } 1335 } 1336 1337 public static final class VexGeneralPurposeRVMOp extends VexRVMOp { 1338 // @formatter:off 1339 public static final VexGeneralPurposeRVMOp ANDN = new VexGeneralPurposeRVMOp("ANDN", P_, M_0F38, WIG, 0xF2, VEXOpAssertion.BMI1); 1340 public static final VexGeneralPurposeRVMOp MULX = new VexGeneralPurposeRVMOp("MULX", P_F2, M_0F38, WIG, 0xF6, VEXOpAssertion.BMI2); 1341 public static final VexGeneralPurposeRVMOp PDEP = new VexGeneralPurposeRVMOp("PDEP", P_F2, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); 1342 public static final VexGeneralPurposeRVMOp PEXT = new VexGeneralPurposeRVMOp("PEXT", P_F3, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); 1343 // @formatter:on 1344 1345 private VexGeneralPurposeRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1346 super(opcode, pp, mmmmm, w, op, assertion); 1347 } 1348 1349 @Override 1350 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { 1351 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, src2, null); 1352 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1353 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1354 asm.emitByte(op); 1355 asm.emitModRM(dst, src2); 1356 } 1357 1358 @Override 1359 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) { 1360 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, null, null); 1361 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1362 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1363 asm.emitByte(op); 1364 asm.emitOperandHelper(dst, src2, 0); 1365 } 1366 } 1367 1368 public static final class VexGeneralPurposeRMVOp extends VexOp { 1369 // @formatter:off 1370 public static final VexGeneralPurposeRMVOp BEXTR = new VexGeneralPurposeRMVOp("BEXTR", P_, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI1); 1371 public static final VexGeneralPurposeRMVOp BZHI = new VexGeneralPurposeRMVOp("BZHI", P_, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); 1372 public static final VexGeneralPurposeRMVOp SARX = new VexGeneralPurposeRMVOp("SARX", P_F3, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2); 1373 public static final VexGeneralPurposeRMVOp SHRX = new VexGeneralPurposeRMVOp("SHRX", P_F2, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2); 1374 public static final VexGeneralPurposeRMVOp SHLX = new VexGeneralPurposeRMVOp("SHLX", P_66, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2); 1375 // @formatter:on 1376 1377 private VexGeneralPurposeRMVOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1378 super(opcode, pp, mmmmm, w, op, assertion); 1379 } 1380 1381 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { 1382 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, src1, null); 1383 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1384 asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1385 asm.emitByte(op); 1386 asm.emitModRM(dst, src1); 1387 } 1388 1389 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src1, Register src2) { 1390 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, null, null); 1391 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1392 asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1393 asm.emitByte(op); 1394 asm.emitOperandHelper(dst, src1, 0); 1395 } 1396 } 1397 1398 public static final class VexGeneralPurposeRMOp extends VexRMOp { 1399 // @formatter:off 1400 public static final VexGeneralPurposeRMOp BLSI = new VexGeneralPurposeRMOp("BLSI", P_, M_0F38, WIG, 0xF3, 3, VEXOpAssertion.BMI1); 1401 public static final VexGeneralPurposeRMOp BLSMSK = new VexGeneralPurposeRMOp("BLSMSK", P_, M_0F38, WIG, 0xF3, 2, VEXOpAssertion.BMI1); 1402 public static final VexGeneralPurposeRMOp BLSR = new VexGeneralPurposeRMOp("BLSR", P_, M_0F38, WIG, 0xF3, 1, VEXOpAssertion.BMI1); 1403 // @formatter:on 1404 private final int ext; 1405 1406 private VexGeneralPurposeRMOp(String opcode, int pp, int mmmmm, int w, int op, int ext, VEXOpAssertion assertion) { 1407 super(opcode, pp, mmmmm, w, op, assertion); 1408 this.ext = ext; 1409 } 1410 1411 @Override 1412 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) { 1413 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1414 asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1415 asm.emitByte(op); 1416 asm.emitModRM(ext, src); 1417 } 1418 1419 @Override 1420 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) { 1421 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1422 asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1423 asm.emitByte(op); 1424 asm.emitOperandHelper(ext, src, 0); 1425 } 1426 } 1427 1428 /** 1429 * VEX-encoded shift instructions with an operand order of either RVM or VMI. 1430 */ 1431 public static final class VexShiftOp extends VexRVMOp implements VexRRIOp { 1432 // @formatter:off 1433 public static final VexShiftOp VPSRLW = new VexShiftOp("VPSRLW", P_66, M_0F, WIG, 0xD1, 0x71, 2); 1434 public static final VexShiftOp VPSRLD = new VexShiftOp("VPSRLD", P_66, M_0F, WIG, 0xD2, 0x72, 2); 1435 public static final VexShiftOp VPSRLQ = new VexShiftOp("VPSRLQ", P_66, M_0F, WIG, 0xD3, 0x73, 2); 1436 public static final VexShiftOp VPSRAW = new VexShiftOp("VPSRAW", P_66, M_0F, WIG, 0xE1, 0x71, 4); 1437 public static final VexShiftOp VPSRAD = new VexShiftOp("VPSRAD", P_66, M_0F, WIG, 0xE2, 0x72, 4); 1438 public static final VexShiftOp VPSLLW = new VexShiftOp("VPSLLW", P_66, M_0F, WIG, 0xF1, 0x71, 6); 1439 public static final VexShiftOp VPSLLD = new VexShiftOp("VPSLLD", P_66, M_0F, WIG, 0xF2, 0x72, 6); 1440 public static final VexShiftOp VPSLLQ = new VexShiftOp("VPSLLQ", P_66, M_0F, WIG, 0xF3, 0x73, 6); 1441 // @formatter:on 1442 1443 private final int immOp; 1444 private final int r; 1445 1446 private VexShiftOp(String opcode, int pp, int mmmmm, int w, int op, int immOp, int r) { 1447 super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1_2); 1448 this.immOp = immOp; 1449 this.r = r; 1450 } 1451 1452 @Override 1453 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { 1454 assert assertion.check((AMD64) asm.target.arch, size, null, dst, src); 1455 asm.vexPrefix(null, dst, src, size, pp, mmmmm, w, false); 1456 asm.emitByte(immOp); 1457 asm.emitModRM(r, src); 1458 asm.emitByte(imm8); 1459 } 1460 } 1461 1462 public static final class VexMaskMoveOp extends VexOp { 1463 // @formatter:off 1464 public static final VexMaskMoveOp VMASKMOVPS = new VexMaskMoveOp("VMASKMOVPS", P_66, M_0F38, W0, 0x2C, 0x2E); 1465 public static final VexMaskMoveOp VMASKMOVPD = new VexMaskMoveOp("VMASKMOVPD", P_66, M_0F38, W0, 0x2D, 0x2F); 1466 public static final VexMaskMoveOp VPMASKMOVD = new VexMaskMoveOp("VPMASKMOVD", P_66, M_0F38, W0, 0x8C, 0x8E, VEXOpAssertion.AVX2); 1467 public static final VexMaskMoveOp VPMASKMOVQ = new VexMaskMoveOp("VPMASKMOVQ", P_66, M_0F38, W1, 0x8C, 0x8E, VEXOpAssertion.AVX2); 1468 // @formatter:on 1469 1470 private final int opReverse; 1471 1472 private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) { 1473 this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1); 1474 } 1475 1476 private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) { 1477 super(opcode, pp, mmmmm, w, op, assertion); 1478 this.opReverse = opReverse; 1479 } 1480 1481 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, AMD64Address src) { 1482 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, null); 1483 asm.vexPrefix(dst, mask, src, size, pp, mmmmm, w, false); 1484 asm.emitByte(op); 1485 asm.emitOperandHelper(dst, src, 0); 1486 } 1487 1488 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register mask, Register src) { 1489 assert assertion.check((AMD64) asm.target.arch, size, src, mask, null); 1490 asm.vexPrefix(src, mask, dst, size, pp, mmmmm, w, false); 1491 asm.emitByte(opReverse); 1492 asm.emitOperandHelper(src, dst, 0); 1493 } 1494 } 1495 1496 /** 1497 * VEX-encoded instructions with an operand order of RVMI. 1498 */ 1499 public static final class VexRVMIOp extends VexOp { 1500 // @formatter:off 1501 public static final VexRVMIOp VSHUFPS = new VexRVMIOp("VSHUFPS", P_, M_0F, WIG, 0xC6); 1502 public static final VexRVMIOp VSHUFPD = new VexRVMIOp("VSHUFPD", P_66, M_0F, WIG, 0xC6); 1503 public static final VexRVMIOp VINSERTF128 = new VexRVMIOp("VINSERTF128", P_66, M_0F3A, W0, 0x18, VEXOpAssertion.AVX1_256ONLY); 1504 public static final VexRVMIOp VINSERTI128 = new VexRVMIOp("VINSERTI128", P_66, M_0F3A, W0, 0x38, VEXOpAssertion.AVX2_256ONLY); 1505 // @formatter:on 1506 1507 private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op) { 1508 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1509 } 1510 1511 private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1512 super(opcode, pp, mmmmm, w, op, assertion); 1513 } 1514 1515 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, int imm8) { 1516 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); 1517 assert (imm8 & 0xFF) == imm8; 1518 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1519 asm.emitByte(op); 1520 asm.emitModRM(dst, src2); 1521 asm.emitByte(imm8); 1522 } 1523 1524 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, int imm8) { 1525 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); 1526 assert (imm8 & 0xFF) == imm8; 1527 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1528 asm.emitByte(op); 1529 asm.emitOperandHelper(dst, src2, 1); 1530 asm.emitByte(imm8); 1531 } 1532 } 1533 1534 /** 1535 * VEX-encoded comparison operation with an operand order of RVMI. The immediate operand is a 1536 * comparison operator. 1537 */ 1538 public static final class VexFloatCompareOp extends VexOp { 1539 // @formatter:off 1540 public static final VexFloatCompareOp VCMPPS = new VexFloatCompareOp("VCMPPS", P_, M_0F, WIG, 0xC2); 1541 public static final VexFloatCompareOp VCMPPD = new VexFloatCompareOp("VCMPPD", P_66, M_0F, WIG, 0xC2); 1542 public static final VexFloatCompareOp VCMPSS = new VexFloatCompareOp("VCMPSS", P_F2, M_0F, WIG, 0xC2); 1543 public static final VexFloatCompareOp VCMPSD = new VexFloatCompareOp("VCMPSD", P_F2, M_0F, WIG, 0xC2); 1544 // @formatter:on 1545 1546 public enum Predicate { 1547 EQ_OQ(0x00), 1548 LT_OS(0x01), 1549 LE_OS(0x02), 1550 UNORD_Q(0x03), 1551 NEQ_UQ(0x04), 1552 NLT_US(0x05), 1553 NLE_US(0x06), 1554 ORD_Q(0x07), 1555 EQ_UQ(0x08), 1556 NGE_US(0x09), 1557 NGT_US(0x0a), 1558 FALSE_OQ(0x0b), 1559 NEQ_OQ(0x0c), 1560 GE_OS(0x0d), 1561 GT_OS(0x0e), 1562 TRUE_UQ(0x0f), 1563 EQ_OS(0x10), 1564 LT_OQ(0x11), 1565 LE_OQ(0x12), 1566 UNORD_S(0x13), 1567 NEQ_US(0x14), 1568 NLT_UQ(0x15), 1569 NLE_UQ(0x16), 1570 ORD_S(0x17), 1571 EQ_US(0x18), 1572 NGE_UQ(0x19), 1573 NGT_UQ(0x1a), 1574 FALSE_OS(0x1b), 1575 NEQ_OS(0x1c), 1576 GE_OQ(0x1d), 1577 GT_OQ(0x1e), 1578 TRUE_US(0x1f); 1579 1580 private int imm8; 1581 1582 Predicate(int imm8) { 1583 this.imm8 = imm8; 1584 } 1585 1586 public static Predicate getPredicate(Condition condition, boolean unorderedIsTrue) { 1587 if (unorderedIsTrue) { 1588 switch (condition) { 1589 case EQ: 1590 return EQ_UQ; 1591 case NE: 1592 return NEQ_UQ; 1593 case LT: 1594 return NGE_UQ; 1595 case LE: 1596 return NGT_UQ; 1597 case GT: 1598 return NLE_UQ; 1599 case GE: 1600 return NLT_UQ; 1601 default: 1602 throw GraalError.shouldNotReachHere(); 1603 } 1604 } else { 1605 switch (condition) { 1606 case EQ: 1607 return EQ_OQ; 1608 case NE: 1609 return NEQ_OQ; 1610 case LT: 1611 return LT_OQ; 1612 case LE: 1613 return LE_OQ; 1614 case GT: 1615 return GT_OQ; 1616 case GE: 1617 return GE_OQ; 1618 default: 1619 throw GraalError.shouldNotReachHere(); 1620 } 1621 } 1622 } 1623 } 1624 1625 private VexFloatCompareOp(String opcode, int pp, int mmmmm, int w, int op) { 1626 super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1627 } 1628 1629 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, Predicate p) { 1630 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); 1631 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1632 asm.emitByte(op); 1633 asm.emitModRM(dst, src2); 1634 asm.emitByte(p.imm8); 1635 } 1636 1637 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, Predicate p) { 1638 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); 1639 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1640 asm.emitByte(op); 1641 asm.emitOperandHelper(dst, src2, 1); 1642 asm.emitByte(p.imm8); 1643 } 1644 } 1645 1646 public final void addl(AMD64Address dst, int imm32) { 1647 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1648 } 1649 1650 public final void addl(Register dst, int imm32) { 1651 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1652 } 1653 1654 public final void addl(Register dst, Register src) { 1655 ADD.rmOp.emit(this, DWORD, dst, src); 1656 } 1657 1658 public final void addpd(Register dst, Register src) { 1659 SSEOp.ADD.emit(this, PD, dst, src); 1660 } 1661 1662 public final void addpd(Register dst, AMD64Address src) { 1663 SSEOp.ADD.emit(this, PD, dst, src); 1664 } 1665 1666 public final void addsd(Register dst, Register src) { 1667 SSEOp.ADD.emit(this, SD, dst, src); 1668 } 1669 1670 public final void addsd(Register dst, AMD64Address src) { 1671 SSEOp.ADD.emit(this, SD, dst, src); 1672 } 1673 1674 private void addrNop4() { 1675 // 4 bytes: NOP DWORD PTR [EAX+0] 1676 emitByte(0x0F); 1677 emitByte(0x1F); 1678 emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc); 1679 emitByte(0); // 8-bits offset (1 byte) 1680 } 1681 1682 private void addrNop5() { 1683 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 1684 emitByte(0x0F); 1685 emitByte(0x1F); 1686 emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4); 1687 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 1688 emitByte(0); // 8-bits offset (1 byte) 1689 } 1690 1691 private void addrNop7() { 1692 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 1693 emitByte(0x0F); 1694 emitByte(0x1F); 1695 emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc); 1696 emitInt(0); // 32-bits offset (4 bytes) 1697 } 1698 1699 private void addrNop8() { 1700 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 1701 emitByte(0x0F); 1702 emitByte(0x1F); 1703 emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4); 1704 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 1705 emitInt(0); // 32-bits offset (4 bytes) 1706 } 1707 1708 public final void andl(Register dst, int imm32) { 1709 AND.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1710 } 1711 1712 public final void andl(Register dst, Register src) { 1713 AND.rmOp.emit(this, DWORD, dst, src); 1714 } 1715 1716 public final void andpd(Register dst, Register src) { 1717 SSEOp.AND.emit(this, PD, dst, src); 1718 } 1719 1720 public final void andpd(Register dst, AMD64Address src) { 1721 SSEOp.AND.emit(this, PD, dst, src); 1722 } 1723 1724 public final void bsfq(Register dst, Register src) { 1725 prefixq(dst, src); 1726 emitByte(0x0F); 1727 emitByte(0xBC); 1728 emitModRM(dst, src); 1729 } 1730 1731 public final void bsrl(Register dst, Register src) { 1732 prefix(dst, src); 1733 emitByte(0x0F); 1734 emitByte(0xBD); 1735 emitModRM(dst, src); 1736 } 1737 1738 public final void bswapl(Register reg) { 1739 prefix(reg); 1740 emitByte(0x0F); 1741 emitModRM(1, reg); 1742 } 1743 1744 public final void cdql() { 1745 emitByte(0x99); 1746 } 1747 1748 public final void cmovl(ConditionFlag cc, Register dst, Register src) { 1749 prefix(dst, src); 1750 emitByte(0x0F); 1751 emitByte(0x40 | cc.getValue()); 1752 emitModRM(dst, src); 1753 } 1754 1755 public final void cmovl(ConditionFlag cc, Register dst, AMD64Address src) { 1756 prefix(src, dst); 1757 emitByte(0x0F); 1758 emitByte(0x40 | cc.getValue()); 1759 emitOperandHelper(dst, src, 0); 1760 } 1761 1762 public final void cmpb(Register dst, Register src) { 1763 CMP.byteRmOp.emit(this, BYTE, dst, src); 1764 } 1765 1766 public final void cmpw(Register dst, Register src) { 1767 CMP.rmOp.emit(this, WORD, dst, src); 1768 } 1769 1770 public final void cmpl(Register dst, int imm32) { 1771 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1772 } 1773 1774 public final void cmpl(Register dst, Register src) { 1775 CMP.rmOp.emit(this, DWORD, dst, src); 1776 } 1777 1778 public final void cmpl(Register dst, AMD64Address src) { 1779 CMP.rmOp.emit(this, DWORD, dst, src); 1780 } 1781 1782 public final void cmpl(AMD64Address dst, int imm32) { 1783 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1784 } 1785 1786 /** 1787 * The 8-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg into 1788 * adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the compared 1789 * values were equal, and cleared otherwise. 1790 */ 1791 public final void cmpxchgb(Register reg, AMD64Address adr) { // cmpxchg 1792 prefixb(adr, reg); 1793 emitByte(0x0F); 1794 emitByte(0xB0); 1795 emitOperandHelper(reg, adr, 0); 1796 } 1797 1798 /** 1799 * The 16-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg 1800 * into adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the 1801 * compared values were equal, and cleared otherwise. 1802 */ 1803 public final void cmpxchgw(Register reg, AMD64Address adr) { // cmpxchg 1804 emitByte(0x66); // Switch to 16-bit mode. 1805 prefix(adr, reg); 1806 emitByte(0x0F); 1807 emitByte(0xB1); 1808 emitOperandHelper(reg, adr, 0); 1809 } 1810 1811 /** 1812 * The 32-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg 1813 * into adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the 1814 * compared values were equal, and cleared otherwise. 1815 */ 1816 public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg 1817 prefix(adr, reg); 1818 emitByte(0x0F); 1819 emitByte(0xB1); 1820 emitOperandHelper(reg, adr, 0); 1821 } 1822 1823 public final void cvtsi2sdl(Register dst, Register src) { 1824 SSEOp.CVTSI2SD.emit(this, DWORD, dst, src); 1825 } 1826 1827 public final void cvttsd2sil(Register dst, Register src) { 1828 SSEOp.CVTTSD2SI.emit(this, DWORD, dst, src); 1829 } 1830 1831 public final void decl(AMD64Address dst) { 1832 prefix(dst); 1833 emitByte(0xFF); 1834 emitOperandHelper(1, dst, 0); 1835 } 1836 1837 public final void divsd(Register dst, Register src) { 1838 SSEOp.DIV.emit(this, SD, dst, src); 1839 } 1840 1841 public final void hlt() { 1842 emitByte(0xF4); 1843 } 1844 1845 public final void imull(Register dst, Register src, int value) { 1846 if (isByte(value)) { 1847 AMD64RMIOp.IMUL_SX.emit(this, DWORD, dst, src, value); 1848 } else { 1849 AMD64RMIOp.IMUL.emit(this, DWORD, dst, src, value); 1850 } 1851 } 1852 1853 public final void incl(AMD64Address dst) { 1854 prefix(dst); 1855 emitByte(0xFF); 1856 emitOperandHelper(0, dst, 0); 1857 } 1858 1859 public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) { 1860 int shortSize = 2; 1861 int longSize = 6; 1862 long disp = jumpTarget - position(); 1863 if (!forceDisp32 && isByte(disp - shortSize)) { 1864 // 0111 tttn #8-bit disp 1865 emitByte(0x70 | cc.getValue()); 1866 emitByte((int) ((disp - shortSize) & 0xFF)); 1867 } else { 1868 // 0000 1111 1000 tttn #32-bit disp 1869 assert isInt(disp - longSize) : "must be 32bit offset (call4)"; 1870 emitByte(0x0F); 1871 emitByte(0x80 | cc.getValue()); 1872 emitInt((int) (disp - longSize)); 1873 } 1874 } 1875 1876 public final void jcc(ConditionFlag cc, Label l) { 1877 assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc"; 1878 if (l.isBound()) { 1879 jcc(cc, l.position(), false); 1880 } else { 1881 // Note: could eliminate cond. jumps to this jump if condition 1882 // is the same however, seems to be rather unlikely case. 1883 // Note: use jccb() if label to be bound is very close to get 1884 // an 8-bit displacement 1885 l.addPatchAt(position()); 1886 emitByte(0x0F); 1887 emitByte(0x80 | cc.getValue()); 1888 emitInt(0); 1889 } 1890 1891 } 1892 1893 public final void jccb(ConditionFlag cc, Label l) { 1894 if (l.isBound()) { 1895 int shortSize = 2; 1896 int entry = l.position(); 1897 assert isByte(entry - (position() + shortSize)) : "Dispacement too large for a short jmp"; 1898 long disp = entry - position(); 1899 // 0111 tttn #8-bit disp 1900 emitByte(0x70 | cc.getValue()); 1901 emitByte((int) ((disp - shortSize) & 0xFF)); 1902 } else { 1903 l.addPatchAt(position()); 1904 emitByte(0x70 | cc.getValue()); 1905 emitByte(0); 1906 } 1907 } 1908 1909 public final void jmp(int jumpTarget, boolean forceDisp32) { 1910 int shortSize = 2; 1911 int longSize = 5; 1912 long disp = jumpTarget - position(); 1913 if (!forceDisp32 && isByte(disp - shortSize)) { 1914 emitByte(0xEB); 1915 emitByte((int) ((disp - shortSize) & 0xFF)); 1916 } else { 1917 emitByte(0xE9); 1918 emitInt((int) (disp - longSize)); 1919 } 1920 } 1921 1922 @Override 1923 public final void jmp(Label l) { 1924 if (l.isBound()) { 1925 jmp(l.position(), false); 1926 } else { 1927 // By default, forward jumps are always 32-bit displacements, since 1928 // we can't yet know where the label will be bound. If you're sure that 1929 // the forward jump will not run beyond 256 bytes, use jmpb to 1930 // force an 8-bit displacement. 1931 1932 l.addPatchAt(position()); 1933 emitByte(0xE9); 1934 emitInt(0); 1935 } 1936 } 1937 1938 public final void jmp(Register entry) { 1939 prefix(entry); 1940 emitByte(0xFF); 1941 emitModRM(4, entry); 1942 } 1943 1944 public final void jmp(AMD64Address adr) { 1945 prefix(adr); 1946 emitByte(0xFF); 1947 emitOperandHelper(AMD64.rsp, adr, 0); 1948 } 1949 1950 public final void jmpb(Label l) { 1951 if (l.isBound()) { 1952 int shortSize = 2; 1953 int entry = l.position(); 1954 assert isByte((entry - position()) + shortSize) : "Dispacement too large for a short jmp"; 1955 long offs = entry - position(); 1956 emitByte(0xEB); 1957 emitByte((int) ((offs - shortSize) & 0xFF)); 1958 } else { 1959 1960 l.addPatchAt(position()); 1961 emitByte(0xEB); 1962 emitByte(0); 1963 } 1964 } 1965 1966 public final void lead(Register dst, AMD64Address src) { 1967 prefix(src, dst); 1968 emitByte(0x8D); 1969 emitOperandHelper(dst, src, 0); 1970 } 1971 1972 public final void leaq(Register dst, AMD64Address src) { 1973 prefixq(src, dst); 1974 emitByte(0x8D); 1975 emitOperandHelper(dst, src, 0); 1976 } 1977 1978 public final void leave() { 1979 emitByte(0xC9); 1980 } 1981 1982 public final void lock() { 1983 emitByte(0xF0); 1984 } 1985 1986 public final void movapd(Register dst, Register src) { 1987 assert inRC(XMM, dst) && inRC(XMM, src); 1988 simdPrefix(dst, Register.None, src, PD, P_0F, false); 1989 emitByte(0x28); 1990 emitModRM(dst, src); 1991 } 1992 1993 public final void movaps(Register dst, Register src) { 1994 assert inRC(XMM, dst) && inRC(XMM, src); 1995 simdPrefix(dst, Register.None, src, PS, P_0F, false); 1996 emitByte(0x28); 1997 emitModRM(dst, src); 1998 } 1999 2000 public final void movb(AMD64Address dst, int imm8) { 2001 prefix(dst); 2002 emitByte(0xC6); 2003 emitOperandHelper(0, dst, 1); 2004 emitByte(imm8); 2005 } 2006 2007 public final void movb(AMD64Address dst, Register src) { 2008 assert inRC(CPU, src) : "must have byte register"; 2009 prefixb(dst, src); 2010 emitByte(0x88); 2011 emitOperandHelper(src, dst, 0); 2012 } 2013 2014 public final void movl(Register dst, int imm32) { 2015 movl(dst, imm32, false); 2016 } 2017 2018 public final void movl(Register dst, int imm32, boolean annotateImm) { 2019 int insnPos = position(); 2020 prefix(dst); 2021 emitByte(0xB8 + encode(dst)); 2022 int immPos = position(); 2023 emitInt(imm32); 2024 int nextInsnPos = position(); 2025 if (annotateImm && codePatchingAnnotationConsumer != null) { 2026 codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 2027 } 2028 } 2029 2030 public final void movl(Register dst, Register src) { 2031 prefix(dst, src); 2032 emitByte(0x8B); 2033 emitModRM(dst, src); 2034 } 2035 2036 public final void movl(Register dst, AMD64Address src) { 2037 prefix(src, dst); 2038 emitByte(0x8B); 2039 emitOperandHelper(dst, src, 0); 2040 } 2041 2042 /** 2043 * @param wide use 4 byte encoding for displacements that would normally fit in a byte 2044 */ 2045 public final void movl(Register dst, AMD64Address src, boolean wide) { 2046 prefix(src, dst); 2047 emitByte(0x8B); 2048 emitOperandHelper(dst, src, wide, 0); 2049 } 2050 2051 public final void movl(AMD64Address dst, int imm32) { 2052 prefix(dst); 2053 emitByte(0xC7); 2054 emitOperandHelper(0, dst, 4); 2055 emitInt(imm32); 2056 } 2057 2058 public final void movl(AMD64Address dst, Register src) { 2059 prefix(dst, src); 2060 emitByte(0x89); 2061 emitOperandHelper(src, dst, 0); 2062 } 2063 2064 /** 2065 * New CPUs require use of movsd and movss to avoid partial register stall when loading from 2066 * memory. But for old Opteron use movlpd instead of movsd. The selection is done in 2067 * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and 2068 * {@link AMD64MacroAssembler#movflt(Register, Register)}. 2069 */ 2070 public final void movlpd(Register dst, AMD64Address src) { 2071 assert inRC(XMM, dst); 2072 simdPrefix(dst, dst, src, PD, P_0F, false); 2073 emitByte(0x12); 2074 emitOperandHelper(dst, src, 0); 2075 } 2076 2077 public final void movlhps(Register dst, Register src) { 2078 assert inRC(XMM, dst) && inRC(XMM, src); 2079 simdPrefix(dst, src, src, PS, P_0F, false); 2080 emitByte(0x16); 2081 emitModRM(dst, src); 2082 } 2083 2084 public final void movq(Register dst, AMD64Address src) { 2085 movq(dst, src, false); 2086 } 2087 2088 public final void movq(Register dst, AMD64Address src, boolean force4BytesDisplacement) { 2089 if (inRC(XMM, dst)) { 2090 // Insn: MOVQ xmm, r/m64 2091 // Code: F3 0F 7E /r 2092 // An alternative instruction would be 66 REX.W 0F 6E /r. We prefer the REX.W free 2093 // format, because it would allow us to emit 2-bytes-prefixed vex-encoding instruction 2094 // when applicable. 2095 simdPrefix(dst, Register.None, src, SS, P_0F, false); 2096 emitByte(0x7E); 2097 emitOperandHelper(dst, src, force4BytesDisplacement, 0); 2098 } else { 2099 // gpr version of movq 2100 prefixq(src, dst); 2101 emitByte(0x8B); 2102 emitOperandHelper(dst, src, force4BytesDisplacement, 0); 2103 } 2104 } 2105 2106 public final void movq(Register dst, Register src) { 2107 assert inRC(CPU, dst) && inRC(CPU, src); 2108 prefixq(dst, src); 2109 emitByte(0x8B); 2110 emitModRM(dst, src); 2111 } 2112 2113 public final void movq(AMD64Address dst, Register src) { 2114 if (inRC(XMM, src)) { 2115 // Insn: MOVQ r/m64, xmm 2116 // Code: 66 0F D6 /r 2117 // An alternative instruction would be 66 REX.W 0F 7E /r. We prefer the REX.W free 2118 // format, because it would allow us to emit 2-bytes-prefixed vex-encoding instruction 2119 // when applicable. 2120 simdPrefix(src, Register.None, dst, PD, P_0F, false); 2121 emitByte(0xD6); 2122 emitOperandHelper(src, dst, 0); 2123 } else { 2124 // gpr version of movq 2125 prefixq(dst, src); 2126 emitByte(0x89); 2127 emitOperandHelper(src, dst, 0); 2128 } 2129 } 2130 2131 public final void movsbl(Register dst, AMD64Address src) { 2132 prefix(src, dst); 2133 emitByte(0x0F); 2134 emitByte(0xBE); 2135 emitOperandHelper(dst, src, 0); 2136 } 2137 2138 public final void movsbl(Register dst, Register src) { 2139 prefix(dst, false, src, true); 2140 emitByte(0x0F); 2141 emitByte(0xBE); 2142 emitModRM(dst, src); 2143 } 2144 2145 public final void movsbq(Register dst, AMD64Address src) { 2146 prefixq(src, dst); 2147 emitByte(0x0F); 2148 emitByte(0xBE); 2149 emitOperandHelper(dst, src, 0); 2150 } 2151 2152 public final void movsbq(Register dst, Register src) { 2153 prefixq(dst, src); 2154 emitByte(0x0F); 2155 emitByte(0xBE); 2156 emitModRM(dst, src); 2157 } 2158 2159 public final void movsd(Register dst, Register src) { 2160 AMD64RMOp.MOVSD.emit(this, SD, dst, src); 2161 } 2162 2163 public final void movsd(Register dst, AMD64Address src) { 2164 AMD64RMOp.MOVSD.emit(this, SD, dst, src); 2165 } 2166 2167 public final void movsd(AMD64Address dst, Register src) { 2168 AMD64MROp.MOVSD.emit(this, SD, dst, src); 2169 } 2170 2171 public final void movss(Register dst, Register src) { 2172 AMD64RMOp.MOVSS.emit(this, SS, dst, src); 2173 } 2174 2175 public final void movss(Register dst, AMD64Address src) { 2176 AMD64RMOp.MOVSS.emit(this, SS, dst, src); 2177 } 2178 2179 public final void movss(AMD64Address dst, Register src) { 2180 AMD64MROp.MOVSS.emit(this, SS, dst, src); 2181 } 2182 2183 public final void mulpd(Register dst, Register src) { 2184 SSEOp.MUL.emit(this, PD, dst, src); 2185 } 2186 2187 public final void mulpd(Register dst, AMD64Address src) { 2188 SSEOp.MUL.emit(this, PD, dst, src); 2189 } 2190 2191 public final void mulsd(Register dst, Register src) { 2192 SSEOp.MUL.emit(this, SD, dst, src); 2193 } 2194 2195 public final void mulsd(Register dst, AMD64Address src) { 2196 SSEOp.MUL.emit(this, SD, dst, src); 2197 } 2198 2199 public final void mulss(Register dst, Register src) { 2200 SSEOp.MUL.emit(this, SS, dst, src); 2201 } 2202 2203 public final void movswl(Register dst, AMD64Address src) { 2204 prefix(src, dst); 2205 emitByte(0x0F); 2206 emitByte(0xBF); 2207 emitOperandHelper(dst, src, 0); 2208 } 2209 2210 public final void movw(AMD64Address dst, int imm16) { 2211 emitByte(0x66); // switch to 16-bit mode 2212 prefix(dst); 2213 emitByte(0xC7); 2214 emitOperandHelper(0, dst, 2); 2215 emitShort(imm16); 2216 } 2217 2218 public final void movw(AMD64Address dst, Register src) { 2219 emitByte(0x66); 2220 prefix(dst, src); 2221 emitByte(0x89); 2222 emitOperandHelper(src, dst, 0); 2223 } 2224 2225 public final void movzbl(Register dst, AMD64Address src) { 2226 prefix(src, dst); 2227 emitByte(0x0F); 2228 emitByte(0xB6); 2229 emitOperandHelper(dst, src, 0); 2230 } 2231 2232 public final void movzbl(Register dst, Register src) { 2233 AMD64RMOp.MOVZXB.emit(this, DWORD, dst, src); 2234 } 2235 2236 public final void movzbq(Register dst, Register src) { 2237 AMD64RMOp.MOVZXB.emit(this, QWORD, dst, src); 2238 } 2239 2240 public final void movzwl(Register dst, AMD64Address src) { 2241 prefix(src, dst); 2242 emitByte(0x0F); 2243 emitByte(0xB7); 2244 emitOperandHelper(dst, src, 0); 2245 } 2246 2247 public final void negl(Register dst) { 2248 NEG.emit(this, DWORD, dst); 2249 } 2250 2251 public final void notl(Register dst) { 2252 NOT.emit(this, DWORD, dst); 2253 } 2254 2255 public final void notq(Register dst) { 2256 NOT.emit(this, QWORD, dst); 2257 } 2258 2259 @Override 2260 public final void ensureUniquePC() { 2261 nop(); 2262 } 2263 2264 public final void nop() { 2265 nop(1); 2266 } 2267 2268 public void nop(int count) { 2269 int i = count; 2270 if (UseNormalNop) { 2271 assert i > 0 : " "; 2272 // The fancy nops aren't currently recognized by debuggers making it a 2273 // pain to disassemble code while debugging. If assert are on clearly 2274 // speed is not an issue so simply use the single byte traditional nop 2275 // to do alignment. 2276 2277 for (; i > 0; i--) { 2278 emitByte(0x90); 2279 } 2280 return; 2281 } 2282 2283 if (UseAddressNop) { 2284 // 2285 // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD. 2286 // 1: 0x90 2287 // 2: 0x66 0x90 2288 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2289 // 4: 0x0F 0x1F 0x40 0x00 2290 // 5: 0x0F 0x1F 0x44 0x00 0x00 2291 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2292 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2293 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2294 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2295 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2296 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2297 2298 // The rest coding is AMD specific - use consecutive Address nops 2299 2300 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2301 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2302 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2303 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2304 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2305 // Size prefixes (0x66) are added for larger sizes 2306 2307 while (i >= 22) { 2308 i -= 11; 2309 emitByte(0x66); // size prefix 2310 emitByte(0x66); // size prefix 2311 emitByte(0x66); // size prefix 2312 addrNop8(); 2313 } 2314 // Generate first nop for size between 21-12 2315 switch (i) { 2316 case 21: 2317 i -= 11; 2318 emitByte(0x66); // size prefix 2319 emitByte(0x66); // size prefix 2320 emitByte(0x66); // size prefix 2321 addrNop8(); 2322 break; 2323 case 20: 2324 case 19: 2325 i -= 10; 2326 emitByte(0x66); // size prefix 2327 emitByte(0x66); // size prefix 2328 addrNop8(); 2329 break; 2330 case 18: 2331 case 17: 2332 i -= 9; 2333 emitByte(0x66); // size prefix 2334 addrNop8(); 2335 break; 2336 case 16: 2337 case 15: 2338 i -= 8; 2339 addrNop8(); 2340 break; 2341 case 14: 2342 case 13: 2343 i -= 7; 2344 addrNop7(); 2345 break; 2346 case 12: 2347 i -= 6; 2348 emitByte(0x66); // size prefix 2349 addrNop5(); 2350 break; 2351 default: 2352 assert i < 12; 2353 } 2354 2355 // Generate second nop for size between 11-1 2356 switch (i) { 2357 case 11: 2358 emitByte(0x66); // size prefix 2359 emitByte(0x66); // size prefix 2360 emitByte(0x66); // size prefix 2361 addrNop8(); 2362 break; 2363 case 10: 2364 emitByte(0x66); // size prefix 2365 emitByte(0x66); // size prefix 2366 addrNop8(); 2367 break; 2368 case 9: 2369 emitByte(0x66); // size prefix 2370 addrNop8(); 2371 break; 2372 case 8: 2373 addrNop8(); 2374 break; 2375 case 7: 2376 addrNop7(); 2377 break; 2378 case 6: 2379 emitByte(0x66); // size prefix 2380 addrNop5(); 2381 break; 2382 case 5: 2383 addrNop5(); 2384 break; 2385 case 4: 2386 addrNop4(); 2387 break; 2388 case 3: 2389 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2390 emitByte(0x66); // size prefix 2391 emitByte(0x66); // size prefix 2392 emitByte(0x90); // nop 2393 break; 2394 case 2: 2395 emitByte(0x66); // size prefix 2396 emitByte(0x90); // nop 2397 break; 2398 case 1: 2399 emitByte(0x90); // nop 2400 break; 2401 default: 2402 assert i == 0; 2403 } 2404 return; 2405 } 2406 2407 // Using nops with size prefixes "0x66 0x90". 2408 // From AMD Optimization Guide: 2409 // 1: 0x90 2410 // 2: 0x66 0x90 2411 // 3: 0x66 0x66 0x90 2412 // 4: 0x66 0x66 0x66 0x90 2413 // 5: 0x66 0x66 0x90 0x66 0x90 2414 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2415 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2416 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2417 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2418 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2419 // 2420 while (i > 12) { 2421 i -= 4; 2422 emitByte(0x66); // size prefix 2423 emitByte(0x66); 2424 emitByte(0x66); 2425 emitByte(0x90); // nop 2426 } 2427 // 1 - 12 nops 2428 if (i > 8) { 2429 if (i > 9) { 2430 i -= 1; 2431 emitByte(0x66); 2432 } 2433 i -= 3; 2434 emitByte(0x66); 2435 emitByte(0x66); 2436 emitByte(0x90); 2437 } 2438 // 1 - 8 nops 2439 if (i > 4) { 2440 if (i > 6) { 2441 i -= 1; 2442 emitByte(0x66); 2443 } 2444 i -= 3; 2445 emitByte(0x66); 2446 emitByte(0x66); 2447 emitByte(0x90); 2448 } 2449 switch (i) { 2450 case 4: 2451 emitByte(0x66); 2452 emitByte(0x66); 2453 emitByte(0x66); 2454 emitByte(0x90); 2455 break; 2456 case 3: 2457 emitByte(0x66); 2458 emitByte(0x66); 2459 emitByte(0x90); 2460 break; 2461 case 2: 2462 emitByte(0x66); 2463 emitByte(0x90); 2464 break; 2465 case 1: 2466 emitByte(0x90); 2467 break; 2468 default: 2469 assert i == 0; 2470 } 2471 } 2472 2473 public final void orl(Register dst, Register src) { 2474 OR.rmOp.emit(this, DWORD, dst, src); 2475 } 2476 2477 public final void orl(Register dst, int imm32) { 2478 OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2479 } 2480 2481 // Insn: VPACKUSWB xmm1, xmm2, xmm3/m128 2482 // ----- 2483 // Insn: VPACKUSWB xmm1, xmm1, xmm2 2484 2485 public final void packuswb(Register dst, Register src) { 2486 assert inRC(XMM, dst) && inRC(XMM, src); 2487 // Code: VEX.NDS.128.66.0F.WIG 67 /r 2488 simdPrefix(dst, dst, src, PD, P_0F, false); 2489 emitByte(0x67); 2490 emitModRM(dst, src); 2491 } 2492 2493 public final void pop(Register dst) { 2494 prefix(dst); 2495 emitByte(0x58 + encode(dst)); 2496 } 2497 2498 public void popfq() { 2499 emitByte(0x9D); 2500 } 2501 2502 public final void ptest(Register dst, Register src) { 2503 assert supports(CPUFeature.SSE4_1); 2504 assert inRC(XMM, dst) && inRC(XMM, src); 2505 simdPrefix(dst, Register.None, src, PD, P_0F38, false); 2506 emitByte(0x17); 2507 emitModRM(dst, src); 2508 } 2509 2510 public final void pcmpeqb(Register dst, Register src) { 2511 assert supports(CPUFeature.SSE2); 2512 assert inRC(XMM, dst) && inRC(XMM, src); 2513 simdPrefix(dst, dst, src, PD, P_0F, false); 2514 emitByte(0x74); 2515 emitModRM(dst, src); 2516 } 2517 2518 public final void pcmpeqw(Register dst, Register src) { 2519 assert supports(CPUFeature.SSE2); 2520 assert inRC(XMM, dst) && inRC(XMM, src); 2521 simdPrefix(dst, dst, src, PD, P_0F, false); 2522 emitByte(0x75); 2523 emitModRM(dst, src); 2524 } 2525 2526 public final void pcmpeqd(Register dst, Register src) { 2527 assert supports(CPUFeature.SSE2); 2528 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2529 simdPrefix(dst, dst, src, PD, P_0F, false); 2530 emitByte(0x76); 2531 emitModRM(dst, src); 2532 } 2533 2534 public final void pcmpestri(Register dst, AMD64Address src, int imm8) { 2535 assert supports(CPUFeature.SSE4_2); 2536 assert inRC(XMM, dst); 2537 simdPrefix(dst, Register.None, src, PD, P_0F3A, false); 2538 emitByte(0x61); 2539 emitOperandHelper(dst, src, 0); 2540 emitByte(imm8); 2541 } 2542 2543 public final void pcmpestri(Register dst, Register src, int imm8) { 2544 assert supports(CPUFeature.SSE4_2); 2545 assert inRC(XMM, dst) && inRC(XMM, src); 2546 simdPrefix(dst, Register.None, src, PD, P_0F3A, false); 2547 emitByte(0x61); 2548 emitModRM(dst, src); 2549 emitByte(imm8); 2550 } 2551 2552 public final void pmovmskb(Register dst, Register src) { 2553 assert supports(CPUFeature.SSE2); 2554 assert inRC(CPU, dst) && inRC(XMM, src); 2555 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2556 emitByte(0xD7); 2557 emitModRM(dst, src); 2558 } 2559 2560 // Insn: VPMOVZXBW xmm1, xmm2/m64 2561 2562 public final void pmovzxbw(Register dst, AMD64Address src) { 2563 assert supports(CPUFeature.SSE4_1); 2564 assert inRC(XMM, dst); 2565 simdPrefix(dst, Register.None, src, PD, P_0F38, false); 2566 emitByte(0x30); 2567 emitOperandHelper(dst, src, 0); 2568 } 2569 2570 public final void pmovzxbw(Register dst, Register src) { 2571 assert supports(CPUFeature.SSE4_1); 2572 assert inRC(XMM, dst) && inRC(XMM, src); 2573 simdPrefix(dst, Register.None, src, PD, P_0F38, false); 2574 emitByte(0x30); 2575 emitModRM(dst, src); 2576 } 2577 2578 public final void push(Register src) { 2579 prefix(src); 2580 emitByte(0x50 + encode(src)); 2581 } 2582 2583 public void pushfq() { 2584 emitByte(0x9c); 2585 } 2586 2587 public final void paddd(Register dst, Register src) { 2588 assert inRC(XMM, dst) && inRC(XMM, src); 2589 simdPrefix(dst, dst, src, PD, P_0F, false); 2590 emitByte(0xFE); 2591 emitModRM(dst, src); 2592 } 2593 2594 public final void paddq(Register dst, Register src) { 2595 assert inRC(XMM, dst) && inRC(XMM, src); 2596 simdPrefix(dst, dst, src, PD, P_0F, false); 2597 emitByte(0xD4); 2598 emitModRM(dst, src); 2599 } 2600 2601 public final void pextrw(Register dst, Register src, int imm8) { 2602 assert inRC(CPU, dst) && inRC(XMM, src); 2603 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2604 emitByte(0xC5); 2605 emitModRM(dst, src); 2606 emitByte(imm8); 2607 } 2608 2609 public final void pinsrw(Register dst, Register src, int imm8) { 2610 assert inRC(XMM, dst) && inRC(CPU, src); 2611 simdPrefix(dst, dst, src, PD, P_0F, false); 2612 emitByte(0xC4); 2613 emitModRM(dst, src); 2614 emitByte(imm8); 2615 } 2616 2617 public final void por(Register dst, Register src) { 2618 assert inRC(XMM, dst) && inRC(XMM, src); 2619 simdPrefix(dst, dst, src, PD, P_0F, false); 2620 emitByte(0xEB); 2621 emitModRM(dst, src); 2622 } 2623 2624 public final void pand(Register dst, Register src) { 2625 assert inRC(XMM, dst) && inRC(XMM, src); 2626 simdPrefix(dst, dst, src, PD, P_0F, false); 2627 emitByte(0xDB); 2628 emitModRM(dst, src); 2629 } 2630 2631 public final void pxor(Register dst, Register src) { 2632 assert inRC(XMM, dst) && inRC(XMM, src); 2633 simdPrefix(dst, dst, src, PD, P_0F, false); 2634 emitByte(0xEF); 2635 emitModRM(dst, src); 2636 } 2637 2638 public final void pslld(Register dst, int imm8) { 2639 assert isUByte(imm8) : "invalid value"; 2640 assert inRC(XMM, dst); 2641 // XMM6 is for /6 encoding: 66 0F 72 /6 ib 2642 simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false); 2643 emitByte(0x72); 2644 emitModRM(6, dst); 2645 emitByte(imm8 & 0xFF); 2646 } 2647 2648 public final void psllq(Register dst, Register shift) { 2649 assert inRC(XMM, dst) && inRC(XMM, shift); 2650 simdPrefix(dst, dst, shift, PD, P_0F, false); 2651 emitByte(0xF3); 2652 emitModRM(dst, shift); 2653 } 2654 2655 public final void psllq(Register dst, int imm8) { 2656 assert isUByte(imm8) : "invalid value"; 2657 assert inRC(XMM, dst); 2658 // XMM6 is for /6 encoding: 66 0F 73 /6 ib 2659 simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false); 2660 emitByte(0x73); 2661 emitModRM(6, dst); 2662 emitByte(imm8); 2663 } 2664 2665 public final void psrad(Register dst, int imm8) { 2666 assert isUByte(imm8) : "invalid value"; 2667 assert inRC(XMM, dst); 2668 // XMM4 is for /4 encoding: 66 0F 72 /4 ib 2669 simdPrefix(AMD64.xmm4, dst, dst, PD, P_0F, false); 2670 emitByte(0x72); 2671 emitModRM(4, dst); 2672 emitByte(imm8); 2673 } 2674 2675 public final void psrld(Register dst, int imm8) { 2676 assert isUByte(imm8) : "invalid value"; 2677 assert inRC(XMM, dst); 2678 // XMM2 is for /2 encoding: 66 0F 72 /2 ib 2679 simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false); 2680 emitByte(0x72); 2681 emitModRM(2, dst); 2682 emitByte(imm8); 2683 } 2684 2685 public final void psrlq(Register dst, int imm8) { 2686 assert isUByte(imm8) : "invalid value"; 2687 assert inRC(XMM, dst); 2688 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 2689 simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false); 2690 emitByte(0x73); 2691 emitModRM(2, dst); 2692 emitByte(imm8); 2693 } 2694 2695 public final void psrldq(Register dst, int imm8) { 2696 assert isUByte(imm8) : "invalid value"; 2697 assert inRC(XMM, dst); 2698 simdPrefix(AMD64.xmm3, dst, dst, PD, P_0F, false); 2699 emitByte(0x73); 2700 emitModRM(3, dst); 2701 emitByte(imm8); 2702 } 2703 2704 public final void pshufb(Register dst, Register src) { 2705 assert supports(CPUFeature.SSSE3); 2706 assert inRC(XMM, dst) && inRC(XMM, src); 2707 simdPrefix(dst, dst, src, PD, P_0F38, false); 2708 emitByte(0x00); 2709 emitModRM(dst, src); 2710 } 2711 2712 public final void pshuflw(Register dst, Register src, int imm8) { 2713 assert supports(CPUFeature.SSE2); 2714 assert isUByte(imm8) : "invalid value"; 2715 assert inRC(XMM, dst) && inRC(XMM, src); 2716 simdPrefix(dst, Register.None, src, SD, P_0F, false); 2717 emitByte(0x70); 2718 emitModRM(dst, src); 2719 emitByte(imm8); 2720 } 2721 2722 public final void pshufd(Register dst, Register src, int imm8) { 2723 assert isUByte(imm8) : "invalid value"; 2724 assert inRC(XMM, dst) && inRC(XMM, src); 2725 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2726 emitByte(0x70); 2727 emitModRM(dst, src); 2728 emitByte(imm8); 2729 } 2730 2731 public final void psubd(Register dst, Register src) { 2732 assert inRC(XMM, dst) && inRC(XMM, src); 2733 simdPrefix(dst, dst, src, PD, P_0F, false); 2734 emitByte(0xFA); 2735 emitModRM(dst, src); 2736 } 2737 2738 public final void punpcklbw(Register dst, Register src) { 2739 assert supports(CPUFeature.SSE2); 2740 assert inRC(XMM, dst) && inRC(XMM, src); 2741 simdPrefix(dst, dst, src, PD, P_0F, false); 2742 emitByte(0x60); 2743 emitModRM(dst, src); 2744 } 2745 2746 public final void rcpps(Register dst, Register src) { 2747 assert inRC(XMM, dst) && inRC(XMM, src); 2748 simdPrefix(dst, Register.None, src, PS, P_0F, false); 2749 emitByte(0x53); 2750 emitModRM(dst, src); 2751 } 2752 2753 public final void ret(int imm16) { 2754 if (imm16 == 0) { 2755 emitByte(0xC3); 2756 } else { 2757 emitByte(0xC2); 2758 emitShort(imm16); 2759 } 2760 } 2761 2762 public final void sarl(Register dst, int imm8) { 2763 prefix(dst); 2764 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2765 if (imm8 == 1) { 2766 emitByte(0xD1); 2767 emitModRM(7, dst); 2768 } else { 2769 emitByte(0xC1); 2770 emitModRM(7, dst); 2771 emitByte(imm8); 2772 } 2773 } 2774 2775 public final void shll(Register dst, int imm8) { 2776 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2777 prefix(dst); 2778 if (imm8 == 1) { 2779 emitByte(0xD1); 2780 emitModRM(4, dst); 2781 } else { 2782 emitByte(0xC1); 2783 emitModRM(4, dst); 2784 emitByte(imm8); 2785 } 2786 } 2787 2788 public final void shll(Register dst) { 2789 // Multiply dst by 2, CL times. 2790 prefix(dst); 2791 emitByte(0xD3); 2792 emitModRM(4, dst); 2793 } 2794 2795 // Insn: SHLX r32a, r/m32, r32b 2796 2797 public final void shlxl(Register dst, Register src1, Register src2) { 2798 VexGeneralPurposeRMVOp.SHLX.emit(this, AVXSize.DWORD, dst, src1, src2); 2799 } 2800 2801 public final void shrl(Register dst, int imm8) { 2802 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2803 prefix(dst); 2804 emitByte(0xC1); 2805 emitModRM(5, dst); 2806 emitByte(imm8); 2807 } 2808 2809 public final void shrl(Register dst) { 2810 // Unsigned divide dst by 2, CL times. 2811 prefix(dst); 2812 emitByte(0xD3); 2813 emitModRM(5, dst); 2814 } 2815 2816 public final void subl(AMD64Address dst, int imm32) { 2817 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2818 } 2819 2820 public final void subl(Register dst, int imm32) { 2821 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2822 } 2823 2824 public final void subl(Register dst, Register src) { 2825 SUB.rmOp.emit(this, DWORD, dst, src); 2826 } 2827 2828 public final void subpd(Register dst, Register src) { 2829 SSEOp.SUB.emit(this, PD, dst, src); 2830 } 2831 2832 public final void subsd(Register dst, Register src) { 2833 SSEOp.SUB.emit(this, SD, dst, src); 2834 } 2835 2836 public final void subsd(Register dst, AMD64Address src) { 2837 SSEOp.SUB.emit(this, SD, dst, src); 2838 } 2839 2840 public final void testl(Register dst, int imm32) { 2841 // not using emitArith because test 2842 // doesn't support sign-extension of 2843 // 8bit operands 2844 if (dst.encoding == 0) { 2845 emitByte(0xA9); 2846 } else { 2847 prefix(dst); 2848 emitByte(0xF7); 2849 emitModRM(0, dst); 2850 } 2851 emitInt(imm32); 2852 } 2853 2854 public final void testl(Register dst, Register src) { 2855 prefix(dst, src); 2856 emitByte(0x85); 2857 emitModRM(dst, src); 2858 } 2859 2860 public final void testl(Register dst, AMD64Address src) { 2861 prefix(src, dst); 2862 emitByte(0x85); 2863 emitOperandHelper(dst, src, 0); 2864 } 2865 2866 public final void unpckhpd(Register dst, Register src) { 2867 assert inRC(XMM, dst) && inRC(XMM, src); 2868 simdPrefix(dst, dst, src, PD, P_0F, false); 2869 emitByte(0x15); 2870 emitModRM(dst, src); 2871 } 2872 2873 public final void unpcklpd(Register dst, Register src) { 2874 assert inRC(XMM, dst) && inRC(XMM, src); 2875 simdPrefix(dst, dst, src, PD, P_0F, false); 2876 emitByte(0x14); 2877 emitModRM(dst, src); 2878 } 2879 2880 public final void xorl(Register dst, Register src) { 2881 XOR.rmOp.emit(this, DWORD, dst, src); 2882 } 2883 2884 public final void xorpd(Register dst, Register src) { 2885 SSEOp.XOR.emit(this, PD, dst, src); 2886 } 2887 2888 public final void xorps(Register dst, Register src) { 2889 SSEOp.XOR.emit(this, PS, dst, src); 2890 } 2891 2892 protected final void decl(Register dst) { 2893 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 2894 prefix(dst); 2895 emitByte(0xFF); 2896 emitModRM(1, dst); 2897 } 2898 2899 protected final void incl(Register dst) { 2900 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 2901 prefix(dst); 2902 emitByte(0xFF); 2903 emitModRM(0, dst); 2904 } 2905 2906 public final void addq(Register dst, int imm32) { 2907 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 2908 } 2909 2910 public final void addq(AMD64Address dst, int imm32) { 2911 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 2912 } 2913 2914 public final void addq(Register dst, Register src) { 2915 ADD.rmOp.emit(this, QWORD, dst, src); 2916 } 2917 2918 public final void addq(AMD64Address dst, Register src) { 2919 ADD.mrOp.emit(this, QWORD, dst, src); 2920 } 2921 2922 public final void andq(Register dst, int imm32) { 2923 AND.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 2924 } 2925 2926 public final void bsrq(Register dst, Register src) { 2927 prefixq(dst, src); 2928 emitByte(0x0F); 2929 emitByte(0xBD); 2930 emitModRM(dst, src); 2931 } 2932 2933 public final void bswapq(Register reg) { 2934 prefixq(reg); 2935 emitByte(0x0F); 2936 emitByte(0xC8 + encode(reg)); 2937 } 2938 2939 public final void cdqq() { 2940 rexw(); 2941 emitByte(0x99); 2942 } 2943 2944 public final void cmovq(ConditionFlag cc, Register dst, Register src) { 2945 prefixq(dst, src); 2946 emitByte(0x0F); 2947 emitByte(0x40 | cc.getValue()); 2948 emitModRM(dst, src); 2949 } 2950 2951 public final void setb(ConditionFlag cc, Register dst) { 2952 prefix(dst, true); 2953 emitByte(0x0F); 2954 emitByte(0x90 | cc.getValue()); 2955 emitModRM(0, dst); 2956 } 2957 2958 public final void cmovq(ConditionFlag cc, Register dst, AMD64Address src) { 2959 prefixq(src, dst); 2960 emitByte(0x0F); 2961 emitByte(0x40 | cc.getValue()); 2962 emitOperandHelper(dst, src, 0); 2963 } 2964 2965 public final void cmpq(Register dst, int imm32) { 2966 CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 2967 } 2968 2969 public final void cmpq(Register dst, Register src) { 2970 CMP.rmOp.emit(this, QWORD, dst, src); 2971 } 2972 2973 public final void cmpq(Register dst, AMD64Address src) { 2974 CMP.rmOp.emit(this, QWORD, dst, src); 2975 } 2976 2977 public final void cmpxchgq(Register reg, AMD64Address adr) { 2978 prefixq(adr, reg); 2979 emitByte(0x0F); 2980 emitByte(0xB1); 2981 emitOperandHelper(reg, adr, 0); 2982 } 2983 2984 public final void cvtdq2pd(Register dst, Register src) { 2985 assert inRC(XMM, dst) && inRC(XMM, src); 2986 simdPrefix(dst, Register.None, src, SS, P_0F, false); 2987 emitByte(0xE6); 2988 emitModRM(dst, src); 2989 } 2990 2991 public final void cvtsi2sdq(Register dst, Register src) { 2992 SSEOp.CVTSI2SD.emit(this, QWORD, dst, src); 2993 } 2994 2995 public final void cvttsd2siq(Register dst, Register src) { 2996 SSEOp.CVTTSD2SI.emit(this, QWORD, dst, src); 2997 } 2998 2999 public final void cvttpd2dq(Register dst, Register src) { 3000 assert inRC(XMM, dst) && inRC(XMM, src); 3001 simdPrefix(dst, Register.None, src, PD, P_0F, false); 3002 emitByte(0xE6); 3003 emitModRM(dst, src); 3004 } 3005 3006 public final void decq(Register dst) { 3007 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 3008 prefixq(dst); 3009 emitByte(0xFF); 3010 emitModRM(1, dst); 3011 } 3012 3013 public final void decq(AMD64Address dst) { 3014 DEC.emit(this, QWORD, dst); 3015 } 3016 3017 public final void imulq(Register dst, Register src) { 3018 prefixq(dst, src); 3019 emitByte(0x0F); 3020 emitByte(0xAF); 3021 emitModRM(dst, src); 3022 } 3023 3024 public final void incq(Register dst) { 3025 // Don't use it directly. Use Macroincrementq() instead. 3026 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3027 prefixq(dst); 3028 emitByte(0xFF); 3029 emitModRM(0, dst); 3030 } 3031 3032 public final void incq(AMD64Address dst) { 3033 INC.emit(this, QWORD, dst); 3034 } 3035 3036 public final void movq(Register dst, long imm64) { 3037 movq(dst, imm64, false); 3038 } 3039 3040 public final void movq(Register dst, long imm64, boolean annotateImm) { 3041 int insnPos = position(); 3042 prefixq(dst); 3043 emitByte(0xB8 + encode(dst)); 3044 int immPos = position(); 3045 emitLong(imm64); 3046 int nextInsnPos = position(); 3047 if (annotateImm && codePatchingAnnotationConsumer != null) { 3048 codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 3049 } 3050 } 3051 3052 public final void movslq(Register dst, int imm32) { 3053 prefixq(dst); 3054 emitByte(0xC7); 3055 emitModRM(0, dst); 3056 emitInt(imm32); 3057 } 3058 3059 public final void movdq(Register dst, AMD64Address src) { 3060 AMD64RMOp.MOVQ.emit(this, QWORD, dst, src); 3061 } 3062 3063 public final void movdq(AMD64Address dst, Register src) { 3064 AMD64MROp.MOVQ.emit(this, QWORD, dst, src); 3065 } 3066 3067 public final void movdq(Register dst, Register src) { 3068 if (inRC(XMM, dst) && inRC(CPU, src)) { 3069 AMD64RMOp.MOVQ.emit(this, QWORD, dst, src); 3070 } else if (inRC(XMM, src) && inRC(CPU, dst)) { 3071 AMD64MROp.MOVQ.emit(this, QWORD, dst, src); 3072 } else { 3073 throw new InternalError("should not reach here"); 3074 } 3075 } 3076 3077 public final void movdl(Register dst, Register src) { 3078 if (inRC(XMM, dst) && inRC(CPU, src)) { 3079 AMD64RMOp.MOVD.emit(this, DWORD, dst, src); 3080 } else if (inRC(XMM, src) && inRC(CPU, dst)) { 3081 AMD64MROp.MOVD.emit(this, DWORD, dst, src); 3082 } else { 3083 throw new InternalError("should not reach here"); 3084 } 3085 } 3086 3087 public final void movdl(Register dst, AMD64Address src) { 3088 AMD64RMOp.MOVD.emit(this, DWORD, dst, src); 3089 } 3090 3091 public final void movddup(Register dst, Register src) { 3092 assert supports(CPUFeature.SSE3); 3093 assert inRC(XMM, dst) && inRC(XMM, src); 3094 simdPrefix(dst, Register.None, src, SD, P_0F, false); 3095 emitByte(0x12); 3096 emitModRM(dst, src); 3097 } 3098 3099 public final void movdqu(Register dst, AMD64Address src) { 3100 assert inRC(XMM, dst); 3101 simdPrefix(dst, Register.None, src, SS, P_0F, false); 3102 emitByte(0x6F); 3103 emitOperandHelper(dst, src, 0); 3104 } 3105 3106 public final void movdqu(Register dst, Register src) { 3107 assert inRC(XMM, dst) && inRC(XMM, src); 3108 simdPrefix(dst, Register.None, src, SS, P_0F, false); 3109 emitByte(0x6F); 3110 emitModRM(dst, src); 3111 } 3112 3113 // Insn: VMOVDQU xmm2/m128, xmm1 3114 3115 public final void movdqu(AMD64Address dst, Register src) { 3116 assert inRC(XMM, src); 3117 // Code: VEX.128.F3.0F.WIG 7F /r 3118 simdPrefix(src, Register.None, dst, SS, P_0F, false); 3119 emitByte(0x7F); 3120 emitOperandHelper(src, dst, 0); 3121 } 3122 3123 public final void movslq(AMD64Address dst, int imm32) { 3124 prefixq(dst); 3125 emitByte(0xC7); 3126 emitOperandHelper(0, dst, 4); 3127 emitInt(imm32); 3128 } 3129 3130 public final void movslq(Register dst, AMD64Address src) { 3131 prefixq(src, dst); 3132 emitByte(0x63); 3133 emitOperandHelper(dst, src, 0); 3134 } 3135 3136 public final void movslq(Register dst, Register src) { 3137 prefixq(dst, src); 3138 emitByte(0x63); 3139 emitModRM(dst, src); 3140 } 3141 3142 public final void negq(Register dst) { 3143 prefixq(dst); 3144 emitByte(0xF7); 3145 emitModRM(3, dst); 3146 } 3147 3148 public final void orq(Register dst, Register src) { 3149 OR.rmOp.emit(this, QWORD, dst, src); 3150 } 3151 3152 public final void shlq(Register dst, int imm8) { 3153 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3154 prefixq(dst); 3155 if (imm8 == 1) { 3156 emitByte(0xD1); 3157 emitModRM(4, dst); 3158 } else { 3159 emitByte(0xC1); 3160 emitModRM(4, dst); 3161 emitByte(imm8); 3162 } 3163 } 3164 3165 public final void shlq(Register dst) { 3166 // Multiply dst by 2, CL times. 3167 prefixq(dst); 3168 emitByte(0xD3); 3169 emitModRM(4, dst); 3170 } 3171 3172 public final void shrq(Register dst, int imm8) { 3173 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3174 prefixq(dst); 3175 if (imm8 == 1) { 3176 emitByte(0xD1); 3177 emitModRM(5, dst); 3178 } else { 3179 emitByte(0xC1); 3180 emitModRM(5, dst); 3181 emitByte(imm8); 3182 } 3183 } 3184 3185 public final void shrq(Register dst) { 3186 prefixq(dst); 3187 emitByte(0xD3); 3188 // Unsigned divide dst by 2, CL times. 3189 emitModRM(5, dst); 3190 } 3191 3192 public final void sbbq(Register dst, Register src) { 3193 SBB.rmOp.emit(this, QWORD, dst, src); 3194 } 3195 3196 public final void subq(Register dst, int imm32) { 3197 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3198 } 3199 3200 public final void subq(AMD64Address dst, int imm32) { 3201 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3202 } 3203 3204 public final void subqWide(Register dst, int imm32) { 3205 // don't use the sign-extending version, forcing a 32-bit immediate 3206 SUB.getMIOpcode(QWORD, false).emit(this, QWORD, dst, imm32); 3207 } 3208 3209 public final void subq(Register dst, Register src) { 3210 SUB.rmOp.emit(this, QWORD, dst, src); 3211 } 3212 3213 public final void testq(Register dst, Register src) { 3214 prefixq(dst, src); 3215 emitByte(0x85); 3216 emitModRM(dst, src); 3217 } 3218 3219 public final void btrq(Register src, int imm8) { 3220 prefixq(src); 3221 emitByte(0x0F); 3222 emitByte(0xBA); 3223 emitModRM(6, src); 3224 emitByte(imm8); 3225 } 3226 3227 public final void xaddb(AMD64Address dst, Register src) { 3228 prefixb(dst, src); 3229 emitByte(0x0F); 3230 emitByte(0xC0); 3231 emitOperandHelper(src, dst, 0); 3232 } 3233 3234 public final void xaddw(AMD64Address dst, Register src) { 3235 emitByte(0x66); // Switch to 16-bit mode. 3236 prefix(dst, src); 3237 emitByte(0x0F); 3238 emitByte(0xC1); 3239 emitOperandHelper(src, dst, 0); 3240 } 3241 3242 public final void xaddl(AMD64Address dst, Register src) { 3243 prefix(dst, src); 3244 emitByte(0x0F); 3245 emitByte(0xC1); 3246 emitOperandHelper(src, dst, 0); 3247 } 3248 3249 public final void xaddq(AMD64Address dst, Register src) { 3250 prefixq(dst, src); 3251 emitByte(0x0F); 3252 emitByte(0xC1); 3253 emitOperandHelper(src, dst, 0); 3254 } 3255 3256 public final void xchgb(Register dst, AMD64Address src) { 3257 prefixb(src, dst); 3258 emitByte(0x86); 3259 emitOperandHelper(dst, src, 0); 3260 } 3261 3262 public final void xchgw(Register dst, AMD64Address src) { 3263 emitByte(0x66); 3264 prefix(src, dst); 3265 emitByte(0x87); 3266 emitOperandHelper(dst, src, 0); 3267 } 3268 3269 public final void xchgl(Register dst, AMD64Address src) { 3270 prefix(src, dst); 3271 emitByte(0x87); 3272 emitOperandHelper(dst, src, 0); 3273 } 3274 3275 public final void xchgq(Register dst, AMD64Address src) { 3276 prefixq(src, dst); 3277 emitByte(0x87); 3278 emitOperandHelper(dst, src, 0); 3279 } 3280 3281 public final void membar(int barriers) { 3282 if (target.isMP) { 3283 // We only have to handle StoreLoad 3284 if ((barriers & STORE_LOAD) != 0) { 3285 // All usable chips support "locked" instructions which suffice 3286 // as barriers, and are much faster than the alternative of 3287 // using cpuid instruction. We use here a locked add [rsp],0. 3288 // This is conveniently otherwise a no-op except for blowing 3289 // flags. 3290 // Any change to this code may need to revisit other places in 3291 // the code where this idiom is used, in particular the 3292 // orderAccess code. 3293 lock(); 3294 addl(new AMD64Address(AMD64.rsp, 0), 0); // Assert the lock# signal here 3295 } 3296 } 3297 } 3298 3299 @Override 3300 protected final void patchJumpTarget(int branch, int branchTarget) { 3301 int op = getByte(branch); 3302 assert op == 0xE8 // call 3303 || op == 0x00 // jump table entry 3304 || op == 0xE9 // jmp 3305 || op == 0xEB // short jmp 3306 || (op & 0xF0) == 0x70 // short jcc 3307 || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc 3308 : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op; 3309 3310 if (op == 0x00) { 3311 int offsetToJumpTableBase = getShort(branch + 1); 3312 int jumpTableBase = branch - offsetToJumpTableBase; 3313 int imm32 = branchTarget - jumpTableBase; 3314 emitInt(imm32, branch); 3315 } else if (op == 0xEB || (op & 0xF0) == 0x70) { 3316 3317 // short offset operators (jmp and jcc) 3318 final int imm8 = branchTarget - (branch + 2); 3319 /* 3320 * Since a wrongly patched short branch can potentially lead to working but really bad 3321 * behaving code we should always fail with an exception instead of having an assert. 3322 */ 3323 if (!NumUtil.isByte(imm8)) { 3324 throw new InternalError("branch displacement out of range: " + imm8); 3325 } 3326 emitByte(imm8, branch + 1); 3327 3328 } else { 3329 3330 int off = 1; 3331 if (op == 0x0F) { 3332 off = 2; 3333 } 3334 3335 int imm32 = branchTarget - (branch + 4 + off); 3336 emitInt(imm32, branch + off); 3337 } 3338 } 3339 3340 public void nullCheck(AMD64Address address) { 3341 testl(AMD64.rax, address); 3342 } 3343 3344 @Override 3345 public void align(int modulus) { 3346 if (position() % modulus != 0) { 3347 nop(modulus - (position() % modulus)); 3348 } 3349 } 3350 3351 /** 3352 * Emits a direct call instruction. Note that the actual call target is not specified, because 3353 * all calls need patching anyway. Therefore, 0 is emitted as the call target, and the user is 3354 * responsible to add the call address to the appropriate patching tables. 3355 */ 3356 public final void call() { 3357 annotatePatchingImmediate(1, 4); 3358 emitByte(0xE8); 3359 emitInt(0); 3360 } 3361 3362 public final void call(Register src) { 3363 prefix(src); 3364 emitByte(0xFF); 3365 emitModRM(2, src); 3366 } 3367 3368 public final void int3() { 3369 emitByte(0xCC); 3370 } 3371 3372 public final void pause() { 3373 emitByte(0xF3); 3374 emitByte(0x90); 3375 } 3376 3377 private void emitx87(int b1, int b2, int i) { 3378 assert 0 <= i && i < 8 : "illegal stack offset"; 3379 emitByte(b1); 3380 emitByte(b2 + i); 3381 } 3382 3383 public final void fldd(AMD64Address src) { 3384 emitByte(0xDD); 3385 emitOperandHelper(0, src, 0); 3386 } 3387 3388 public final void flds(AMD64Address src) { 3389 emitByte(0xD9); 3390 emitOperandHelper(0, src, 0); 3391 } 3392 3393 public final void fldln2() { 3394 emitByte(0xD9); 3395 emitByte(0xED); 3396 } 3397 3398 public final void fldlg2() { 3399 emitByte(0xD9); 3400 emitByte(0xEC); 3401 } 3402 3403 public final void fyl2x() { 3404 emitByte(0xD9); 3405 emitByte(0xF1); 3406 } 3407 3408 public final void fstps(AMD64Address src) { 3409 emitByte(0xD9); 3410 emitOperandHelper(3, src, 0); 3411 } 3412 3413 public final void fstpd(AMD64Address src) { 3414 emitByte(0xDD); 3415 emitOperandHelper(3, src, 0); 3416 } 3417 3418 private void emitFPUArith(int b1, int b2, int i) { 3419 assert 0 <= i && i < 8 : "illegal FPU register: " + i; 3420 emitByte(b1); 3421 emitByte(b2 + i); 3422 } 3423 3424 public void ffree(int i) { 3425 emitFPUArith(0xDD, 0xC0, i); 3426 } 3427 3428 public void fincstp() { 3429 emitByte(0xD9); 3430 emitByte(0xF7); 3431 } 3432 3433 public void fxch(int i) { 3434 emitFPUArith(0xD9, 0xC8, i); 3435 } 3436 3437 public void fnstswAX() { 3438 emitByte(0xDF); 3439 emitByte(0xE0); 3440 } 3441 3442 public void fwait() { 3443 emitByte(0x9B); 3444 } 3445 3446 public void fprem() { 3447 emitByte(0xD9); 3448 emitByte(0xF8); 3449 } 3450 3451 public final void fsin() { 3452 emitByte(0xD9); 3453 emitByte(0xFE); 3454 } 3455 3456 public final void fcos() { 3457 emitByte(0xD9); 3458 emitByte(0xFF); 3459 } 3460 3461 public final void fptan() { 3462 emitByte(0xD9); 3463 emitByte(0xF2); 3464 } 3465 3466 public final void fstp(int i) { 3467 emitx87(0xDD, 0xD8, i); 3468 } 3469 3470 @Override 3471 public AMD64Address makeAddress(Register base, int displacement) { 3472 return new AMD64Address(base, displacement); 3473 } 3474 3475 @Override 3476 public AMD64Address getPlaceholder(int instructionStartPosition) { 3477 return new AMD64Address(AMD64.rip, Register.None, Scale.Times1, 0, instructionStartPosition); 3478 } 3479 3480 private void prefetchPrefix(AMD64Address src) { 3481 prefix(src); 3482 emitByte(0x0F); 3483 } 3484 3485 public void prefetchnta(AMD64Address src) { 3486 prefetchPrefix(src); 3487 emitByte(0x18); 3488 emitOperandHelper(0, src, 0); 3489 } 3490 3491 void prefetchr(AMD64Address src) { 3492 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 3493 prefetchPrefix(src); 3494 emitByte(0x0D); 3495 emitOperandHelper(0, src, 0); 3496 } 3497 3498 public void prefetcht0(AMD64Address src) { 3499 assert supports(CPUFeature.SSE); 3500 prefetchPrefix(src); 3501 emitByte(0x18); 3502 emitOperandHelper(1, src, 0); 3503 } 3504 3505 public void prefetcht1(AMD64Address src) { 3506 assert supports(CPUFeature.SSE); 3507 prefetchPrefix(src); 3508 emitByte(0x18); 3509 emitOperandHelper(2, src, 0); 3510 } 3511 3512 public void prefetcht2(AMD64Address src) { 3513 assert supports(CPUFeature.SSE); 3514 prefix(src); 3515 emitByte(0x0f); 3516 emitByte(0x18); 3517 emitOperandHelper(3, src, 0); 3518 } 3519 3520 public void prefetchw(AMD64Address src) { 3521 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 3522 prefix(src); 3523 emitByte(0x0f); 3524 emitByte(0x0D); 3525 emitOperandHelper(1, src, 0); 3526 } 3527 3528 public void rdtsc() { 3529 emitByte(0x0F); 3530 emitByte(0x31); 3531 } 3532 3533 /** 3534 * Emits an instruction which is considered to be illegal. This is used if we deliberately want 3535 * to crash the program (debugging etc.). 3536 */ 3537 public void illegal() { 3538 emitByte(0x0f); 3539 emitByte(0x0b); 3540 } 3541 3542 public void lfence() { 3543 emitByte(0x0f); 3544 emitByte(0xae); 3545 emitByte(0xe8); 3546 } 3547 3548 public final void vptest(Register dst, Register src) { 3549 VexRMOp.VPTEST.emit(this, AVXSize.YMM, dst, src); 3550 } 3551 3552 public final void vpxor(Register dst, Register nds, Register src) { 3553 VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src); 3554 } 3555 3556 public final void vpxor(Register dst, Register nds, AMD64Address src) { 3557 VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src); 3558 } 3559 3560 public final void vmovdqu(Register dst, AMD64Address src) { 3561 VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src); 3562 } 3563 3564 public final void vmovdqu(AMD64Address dst, Register src) { 3565 assert inRC(XMM, src); 3566 VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src); 3567 } 3568 3569 public final void vpmovzxbw(Register dst, AMD64Address src) { 3570 assert supports(CPUFeature.AVX2); 3571 VexRMOp.VPMOVZXBW.emit(this, AVXSize.YMM, dst, src); 3572 } 3573 3574 public final void vzeroupper() { 3575 emitVEX(L128, P_, M_0F, W0, 0, 0, true); 3576 emitByte(0x77); 3577 } 3578 3579 // Insn: KORTESTD k1, k2 3580 3581 // This instruction produces ZF or CF flags 3582 public final void kortestd(Register src1, Register src2) { 3583 assert supports(CPUFeature.AVX512BW); 3584 assert inRC(MASK, src1) && inRC(MASK, src2); 3585 // Code: VEX.L0.66.0F.W1 98 /r 3586 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, true); 3587 emitByte(0x98); 3588 emitModRM(src1, src2); 3589 } 3590 3591 // Insn: KORTESTQ k1, k2 3592 3593 // This instruction produces ZF or CF flags 3594 public final void kortestq(Register src1, Register src2) { 3595 assert supports(CPUFeature.AVX512BW); 3596 assert inRC(MASK, src1) && inRC(MASK, src2); 3597 // Code: VEX.L0.0F.W1 98 /r 3598 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_, M_0F, W1, true); 3599 emitByte(0x98); 3600 emitModRM(src1, src2); 3601 } 3602 3603 public final void kmovd(Register dst, Register src) { 3604 assert supports(CPUFeature.AVX512BW); 3605 assert inRC(MASK, dst) || inRC(CPU, dst); 3606 assert inRC(MASK, src) || inRC(CPU, src); 3607 assert !(inRC(CPU, dst) && inRC(CPU, src)); 3608 3609 if (inRC(MASK, dst)) { 3610 if (inRC(MASK, src)) { 3611 // kmovd(KRegister dst, KRegister src): 3612 // Insn: KMOVD k1, k2/m32 3613 // Code: VEX.L0.66.0F.W1 90 /r 3614 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_66, M_0F, W1, true); 3615 emitByte(0x90); 3616 emitModRM(dst, src); 3617 } else { 3618 // kmovd(KRegister dst, Register src) 3619 // Insn: KMOVD k1, r32 3620 // Code: VEX.L0.F2.0F.W0 92 /r 3621 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, true); 3622 emitByte(0x92); 3623 emitModRM(dst, src); 3624 } 3625 } else { 3626 if (inRC(MASK, src)) { 3627 // kmovd(Register dst, KRegister src) 3628 // Insn: KMOVD r32, k1 3629 // Code: VEX.L0.F2.0F.W0 93 /r 3630 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, true); 3631 emitByte(0x93); 3632 emitModRM(dst, src); 3633 } else { 3634 throw GraalError.shouldNotReachHere(); 3635 } 3636 } 3637 } 3638 3639 public final void kmovq(Register dst, Register src) { 3640 assert supports(CPUFeature.AVX512BW); 3641 assert inRC(MASK, dst) || inRC(CPU, dst); 3642 assert inRC(MASK, src) || inRC(CPU, src); 3643 assert !(inRC(CPU, dst) && inRC(CPU, src)); 3644 3645 if (inRC(MASK, dst)) { 3646 if (inRC(MASK, src)) { 3647 // kmovq(KRegister dst, KRegister src): 3648 // Insn: KMOVQ k1, k2/m64 3649 // Code: VEX.L0.0F.W1 90 /r 3650 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_, M_0F, W1, true); 3651 emitByte(0x90); 3652 emitModRM(dst, src); 3653 } else { 3654 // kmovq(KRegister dst, Register src) 3655 // Insn: KMOVQ k1, r64 3656 // Code: VEX.L0.F2.0F.W1 92 /r 3657 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, true); 3658 emitByte(0x92); 3659 emitModRM(dst, src); 3660 } 3661 } else { 3662 if (inRC(MASK, src)) { 3663 // kmovq(Register dst, KRegister src) 3664 // Insn: KMOVQ r64, k1 3665 // Code: VEX.L0.F2.0F.W1 93 /r 3666 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, true); 3667 emitByte(0x93); 3668 emitModRM(dst, src); 3669 } else { 3670 throw GraalError.shouldNotReachHere(); 3671 } 3672 } 3673 } 3674 3675 // Insn: KTESTD k1, k2 3676 3677 public final void ktestd(Register src1, Register src2) { 3678 assert supports(CPUFeature.AVX512BW); 3679 assert inRC(MASK, src1) && inRC(MASK, src2); 3680 // Code: VEX.L0.66.0F.W1 99 /r 3681 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, true); 3682 emitByte(0x99); 3683 emitModRM(src1, src2); 3684 } 3685 3686 public final void evmovdqu64(Register dst, AMD64Address src) { 3687 assert supports(CPUFeature.AVX512F); 3688 assert inRC(XMM, dst); 3689 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F3, M_0F, W1, Z0, B0); 3690 emitByte(0x6F); 3691 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3692 } 3693 3694 // Insn: VPMOVZXBW zmm1, m256 3695 3696 public final void evpmovzxbw(Register dst, AMD64Address src) { 3697 assert supports(CPUFeature.AVX512BW); 3698 assert inRC(XMM, dst); 3699 // Code: EVEX.512.66.0F38.WIG 30 /r 3700 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0); 3701 emitByte(0x30); 3702 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3703 } 3704 3705 public final void evpcmpeqb(Register kdst, Register nds, AMD64Address src) { 3706 assert supports(CPUFeature.AVX512BW); 3707 assert inRC(MASK, kdst) && inRC(XMM, nds); 3708 evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F, WIG, Z0, B0); 3709 emitByte(0x74); 3710 emitEVEXOperandHelper(kdst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3711 } 3712 3713 // Insn: VMOVDQU16 zmm1 {k1}{z}, zmm2/m512 3714 // ----- 3715 // Insn: VMOVDQU16 zmm1, m512 3716 3717 public final void evmovdqu16(Register dst, AMD64Address src) { 3718 assert supports(CPUFeature.AVX512BW); 3719 assert inRC(XMM, dst); 3720 // Code: EVEX.512.F2.0F.W1 6F /r 3721 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0); 3722 emitByte(0x6F); 3723 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3724 } 3725 3726 // Insn: VMOVDQU16 zmm1, k1:z, m512 3727 3728 public final void evmovdqu16(Register dst, Register mask, AMD64Address src) { 3729 assert supports(CPUFeature.AVX512BW); 3730 assert inRC(XMM, dst) && inRC(MASK, mask); 3731 // Code: EVEX.512.F2.0F.W1 6F /r 3732 evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z1, B0); 3733 emitByte(0x6F); 3734 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3735 } 3736 3737 // Insn: VMOVDQU16 zmm2/m512 {k1}{z}, zmm1 3738 // ----- 3739 // Insn: VMOVDQU16 m512, zmm1 3740 3741 public final void evmovdqu16(AMD64Address dst, Register src) { 3742 assert supports(CPUFeature.AVX512BW); 3743 assert inRC(XMM, src); 3744 // Code: EVEX.512.F2.0F.W1 7F /r 3745 evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0); 3746 emitByte(0x7F); 3747 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3748 } 3749 3750 // Insn: VMOVDQU16 m512, k1, zmm1 3751 3752 public final void evmovdqu16(AMD64Address dst, Register mask, Register src) { 3753 assert supports(CPUFeature.AVX512BW); 3754 assert inRC(MASK, mask) && inRC(XMM, src); 3755 // Code: EVEX.512.F2.0F.W1 7F /r 3756 evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0); 3757 emitByte(0x7F); 3758 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3759 } 3760 3761 // Insn: VPBROADCASTW zmm1 {k1}{z}, reg 3762 // ----- 3763 // Insn: VPBROADCASTW zmm1, reg 3764 3765 public final void evpbroadcastw(Register dst, Register src) { 3766 assert supports(CPUFeature.AVX512BW); 3767 assert inRC(XMM, dst) && inRC(CPU, src); 3768 // Code: EVEX.512.66.0F38.W0 7B /r 3769 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, W0, Z0, B0); 3770 emitByte(0x7B); 3771 emitModRM(dst, src); 3772 } 3773 3774 // Insn: VPCMPUW k1 {k2}, zmm2, zmm3/m512, imm8 3775 // ----- 3776 // Insn: VPCMPUW k1, zmm2, zmm3, imm8 3777 3778 public final void evpcmpuw(Register kdst, Register nds, Register src, int vcc) { 3779 assert supports(CPUFeature.AVX512BW); 3780 assert inRC(MASK, kdst) && inRC(XMM, nds) && inRC(XMM, src); 3781 // Code: EVEX.NDS.512.66.0F3A.W1 3E /r ib 3782 evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F3A, W1, Z0, B0); 3783 emitByte(0x3E); 3784 emitModRM(kdst, src); 3785 emitByte(vcc); 3786 } 3787 3788 // Insn: VPCMPUW k1 {k2}, zmm2, zmm3/m512, imm8 3789 // ----- 3790 // Insn: VPCMPUW k1, k2, zmm2, zmm3, imm8 3791 3792 public final void evpcmpuw(Register kdst, Register mask, Register nds, Register src, int vcc) { 3793 assert supports(CPUFeature.AVX512BW); 3794 assert inRC(MASK, kdst) && inRC(MASK, mask); 3795 assert inRC(XMM, nds) && inRC(XMM, src); 3796 // Code: EVEX.NDS.512.66.0F3A.W1 3E /r ib 3797 evexPrefix(kdst, mask, nds, src, AVXSize.ZMM, P_66, M_0F3A, W1, Z0, B0); 3798 emitByte(0x3E); 3799 emitModRM(kdst, src); 3800 emitByte(vcc); 3801 } 3802 3803 // Insn: VPMOVWB ymm1/m256 {k1}{z}, zmm2 3804 // ----- 3805 // Insn: VPMOVWB m256, zmm2 3806 3807 public final void evpmovwb(AMD64Address dst, Register src) { 3808 assert supports(CPUFeature.AVX512BW); 3809 assert inRC(XMM, src); 3810 // Code: EVEX.512.F3.0F38.W0 30 /r 3811 evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0); 3812 emitByte(0x30); 3813 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3814 } 3815 3816 // Insn: VPMOVWB m256, k1, zmm2 3817 3818 public final void evpmovwb(AMD64Address dst, Register mask, Register src) { 3819 assert supports(CPUFeature.AVX512BW); 3820 assert inRC(MASK, mask) && inRC(XMM, src); 3821 // Code: EVEX.512.F3.0F38.W0 30 /r 3822 evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0); 3823 emitByte(0x30); 3824 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3825 } 3826 3827 // Insn: VPMOVZXBW zmm1 {k1}{z}, ymm2/m256 3828 // ----- 3829 // Insn: VPMOVZXBW zmm1, k1, m256 3830 3831 public final void evpmovzxbw(Register dst, Register mask, AMD64Address src) { 3832 assert supports(CPUFeature.AVX512BW); 3833 assert inRC(MASK, mask) && inRC(XMM, dst); 3834 // Code: EVEX.512.66.0F38.WIG 30 /r 3835 evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0); 3836 emitByte(0x30); 3837 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3838 } 3839 3840 }