< prev index next >

src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64Assembler.java

Print this page
rev 52509 : [mq]: graal

*** 41,50 **** --- 41,51 ---- import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC; import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG; import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT; import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0; import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0; + import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1; import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.BYTE; import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.DWORD; import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PD; import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PS; import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.QWORD;
*** 1021,1031 **** } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) { assert assertion.check((AMD64) asm.target.arch, size, dst, null, src); assert op != 0x1A || op != 0x5A; ! asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w); asm.emitByte(op); asm.emitModRM(dst, src); } } --- 1022,1032 ---- } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) { assert assertion.check((AMD64) asm.target.arch, size, dst, null, src); assert op != 0x1A || op != 0x5A; ! asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); asm.emitByte(op); asm.emitModRM(dst, src); } }
*** 1082,1092 **** super(opcode, pp, mmmmm, w, op, assertion); } public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) { assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); ! asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w); asm.emitByte(op); asm.emitOperandHelper(dst, src, 0); } } --- 1083,1093 ---- super(opcode, pp, mmmmm, w, op, assertion); } public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) { assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); ! asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); asm.emitByte(op); asm.emitOperandHelper(dst, src, 0); } }
*** 1121,1138 **** this.opReverse = opReverse; } public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src) { assert assertion.check((AMD64) asm.target.arch, size, src, null, null); ! asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w); asm.emitByte(opReverse); asm.emitOperandHelper(src, dst, 0); } public void emitReverse(AMD64Assembler asm, AVXSize size, Register dst, Register src) { assert assertion.check((AMD64) asm.target.arch, size, src, null, dst); ! asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w); asm.emitByte(opReverse); asm.emitModRM(src, dst); } } --- 1122,1139 ---- this.opReverse = opReverse; } public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src) { assert assertion.check((AMD64) asm.target.arch, size, src, null, null); ! asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); asm.emitByte(opReverse); asm.emitOperandHelper(src, dst, 0); } public void emitReverse(AMD64Assembler asm, AVXSize size, Register dst, Register src) { assert assertion.check((AMD64) asm.target.arch, size, src, null, dst); ! asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); asm.emitByte(opReverse); asm.emitModRM(src, dst); } }
*** 1156,1174 **** } @Override public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { assert assertion.check((AMD64) asm.target.arch, size, dst, null, src); ! asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w); asm.emitByte(op); asm.emitModRM(dst, src); asm.emitByte(imm8); } public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src, int imm8) { assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); ! asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w); asm.emitByte(op); asm.emitOperandHelper(dst, src, 1); asm.emitByte(imm8); } } --- 1157,1175 ---- } @Override public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { assert assertion.check((AMD64) asm.target.arch, size, dst, null, src); ! asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); asm.emitByte(op); asm.emitModRM(dst, src); asm.emitByte(imm8); } public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src, int imm8) { assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); ! asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); asm.emitByte(op); asm.emitOperandHelper(dst, src, 1); asm.emitByte(imm8); } }
*** 1191,1209 **** } @Override public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { assert assertion.check((AMD64) asm.target.arch, size, src, null, dst); ! asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w); asm.emitByte(op); asm.emitModRM(src, dst); asm.emitByte(imm8); } public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src, int imm8) { assert assertion.check((AMD64) asm.target.arch, size, src, null, null); ! asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w); asm.emitByte(op); asm.emitOperandHelper(src, dst, 1); asm.emitByte(imm8); } } --- 1192,1210 ---- } @Override public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { assert assertion.check((AMD64) asm.target.arch, size, src, null, dst); ! asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); asm.emitByte(op); asm.emitModRM(src, dst); asm.emitByte(imm8); } public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src, int imm8) { assert assertion.check((AMD64) asm.target.arch, size, src, null, null); ! asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); asm.emitByte(op); asm.emitOperandHelper(src, dst, 1); asm.emitByte(imm8); } }
*** 1222,1240 **** super(opcode, pp, mmmmm, w, op, assertion); } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, Register src2) { assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, src2); ! asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w); asm.emitByte(op); asm.emitModRM(dst, src2); asm.emitByte(mask.encoding() << 4); } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, AMD64Address src2) { assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, null); ! asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w); asm.emitByte(op); asm.emitOperandHelper(dst, src2, 0); asm.emitByte(mask.encoding() << 4); } } --- 1223,1241 ---- super(opcode, pp, mmmmm, w, op, assertion); } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, Register src2) { assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, src2); ! asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); asm.emitByte(op); asm.emitModRM(dst, src2); asm.emitByte(mask.encoding() << 4); } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, AMD64Address src2) { assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, null); ! asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); asm.emitByte(op); asm.emitOperandHelper(dst, src2, 0); asm.emitByte(mask.encoding() << 4); } }
*** 1318,1341 **** super(opcode, pp, mmmmm, w, op, assertion); } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); ! asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w); asm.emitByte(op); asm.emitModRM(dst, src2); } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) { assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); ! asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w); asm.emitByte(op); asm.emitOperandHelper(dst, src2, 0); } } ! public static final class VexGeneralPurposeRVMOp extends VexOp { // @formatter:off public static final VexGeneralPurposeRVMOp ANDN = new VexGeneralPurposeRVMOp("ANDN", P_, M_0F38, WIG, 0xF2, VEXOpAssertion.BMI1); public static final VexGeneralPurposeRVMOp MULX = new VexGeneralPurposeRVMOp("MULX", P_F2, M_0F38, WIG, 0xF6, VEXOpAssertion.BMI2); public static final VexGeneralPurposeRVMOp PDEP = new VexGeneralPurposeRVMOp("PDEP", P_F2, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); public static final VexGeneralPurposeRVMOp PEXT = new VexGeneralPurposeRVMOp("PEXT", P_F3, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); --- 1319,1342 ---- super(opcode, pp, mmmmm, w, op, assertion); } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); ! asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); asm.emitByte(op); asm.emitModRM(dst, src2); } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) { assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); ! asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); asm.emitByte(op); asm.emitOperandHelper(dst, src2, 0); } } ! public static final class VexGeneralPurposeRVMOp extends VexRVMOp { // @formatter:off public static final VexGeneralPurposeRVMOp ANDN = new VexGeneralPurposeRVMOp("ANDN", P_, M_0F38, WIG, 0xF2, VEXOpAssertion.BMI1); public static final VexGeneralPurposeRVMOp MULX = new VexGeneralPurposeRVMOp("MULX", P_F2, M_0F38, WIG, 0xF6, VEXOpAssertion.BMI2); public static final VexGeneralPurposeRVMOp PDEP = new VexGeneralPurposeRVMOp("PDEP", P_F2, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); public static final VexGeneralPurposeRVMOp PEXT = new VexGeneralPurposeRVMOp("PEXT", P_F3, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
*** 1343,1364 **** private VexGeneralPurposeRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { super(opcode, pp, mmmmm, w, op, assertion); } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, src2, null); assert size == AVXSize.DWORD || size == AVXSize.QWORD; ! asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1); asm.emitByte(op); asm.emitModRM(dst, src2); } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) { assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, null, null); assert size == AVXSize.DWORD || size == AVXSize.QWORD; ! asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1); asm.emitByte(op); asm.emitOperandHelper(dst, src2, 0); } } --- 1344,1367 ---- private VexGeneralPurposeRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { super(opcode, pp, mmmmm, w, op, assertion); } + @Override public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, src2, null); assert size == AVXSize.DWORD || size == AVXSize.QWORD; ! asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); asm.emitByte(op); asm.emitModRM(dst, src2); } + @Override public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) { assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, null, null); assert size == AVXSize.DWORD || size == AVXSize.QWORD; ! asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); asm.emitByte(op); asm.emitOperandHelper(dst, src2, 0); } }
*** 1376,1399 **** } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, src1, null); assert size == AVXSize.DWORD || size == AVXSize.QWORD; ! asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1); asm.emitByte(op); asm.emitModRM(dst, src1); } public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src1, Register src2) { assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, null, null); assert size == AVXSize.DWORD || size == AVXSize.QWORD; ! asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1); asm.emitByte(op); asm.emitOperandHelper(dst, src1, 0); } } /** * VEX-encoded shift instructions with an operand order of either RVM or VMI. */ public static final class VexShiftOp extends VexRVMOp implements VexRRIOp { // @formatter:off --- 1379,1432 ---- } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, src1, null); assert size == AVXSize.DWORD || size == AVXSize.QWORD; ! asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); asm.emitByte(op); asm.emitModRM(dst, src1); } public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src1, Register src2) { assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, null, null); assert size == AVXSize.DWORD || size == AVXSize.QWORD; ! asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); asm.emitByte(op); asm.emitOperandHelper(dst, src1, 0); } } + public static final class VexGeneralPurposeRMOp extends VexRMOp { + // @formatter:off + public static final VexGeneralPurposeRMOp BLSI = new VexGeneralPurposeRMOp("BLSI", P_, M_0F38, WIG, 0xF3, 3, VEXOpAssertion.BMI1); + public static final VexGeneralPurposeRMOp BLSMSK = new VexGeneralPurposeRMOp("BLSMSK", P_, M_0F38, WIG, 0xF3, 2, VEXOpAssertion.BMI1); + public static final VexGeneralPurposeRMOp BLSR = new VexGeneralPurposeRMOp("BLSR", P_, M_0F38, WIG, 0xF3, 1, VEXOpAssertion.BMI1); + // @formatter:on + private final int ext; + + private VexGeneralPurposeRMOp(String opcode, int pp, int mmmmm, int w, int op, int ext, VEXOpAssertion assertion) { + super(opcode, pp, mmmmm, w, op, assertion); + this.ext = ext; + } + + @Override + public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) { + assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); + asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); + asm.emitByte(op); + asm.emitModRM(ext, src); + } + + @Override + public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) { + assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); + asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); + asm.emitByte(op); + asm.emitOperandHelper(ext, src, 0); + } + } + /** * VEX-encoded shift instructions with an operand order of either RVM or VMI. */ public static final class VexShiftOp extends VexRVMOp implements VexRRIOp { // @formatter:off
*** 1417,1427 **** } @Override public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { assert assertion.check((AMD64) asm.target.arch, size, null, dst, src); ! asm.vexPrefix(null, dst, src, size, pp, mmmmm, w); asm.emitByte(immOp); asm.emitModRM(r, src); asm.emitByte(imm8); } } --- 1450,1460 ---- } @Override public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { assert assertion.check((AMD64) asm.target.arch, size, null, dst, src); ! asm.vexPrefix(null, dst, src, size, pp, mmmmm, w, false); asm.emitByte(immOp); asm.emitModRM(r, src); asm.emitByte(imm8); } }
*** 1445,1462 **** this.opReverse = opReverse; } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, AMD64Address src) { assert assertion.check((AMD64) asm.target.arch, size, dst, mask, null); ! asm.vexPrefix(dst, mask, src, size, pp, mmmmm, w); asm.emitByte(op); asm.emitOperandHelper(dst, src, 0); } public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register mask, Register src) { assert assertion.check((AMD64) asm.target.arch, size, src, mask, null); ! asm.vexPrefix(src, mask, dst, size, pp, mmmmm, w); asm.emitByte(opReverse); asm.emitOperandHelper(src, dst, 0); } } --- 1478,1495 ---- this.opReverse = opReverse; } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, AMD64Address src) { assert assertion.check((AMD64) asm.target.arch, size, dst, mask, null); ! asm.vexPrefix(dst, mask, src, size, pp, mmmmm, w, false); asm.emitByte(op); asm.emitOperandHelper(dst, src, 0); } public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register mask, Register src) { assert assertion.check((AMD64) asm.target.arch, size, src, mask, null); ! asm.vexPrefix(src, mask, dst, size, pp, mmmmm, w, false); asm.emitByte(opReverse); asm.emitOperandHelper(src, dst, 0); } }
*** 1480,1499 **** } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, int imm8) { assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); assert (imm8 & 0xFF) == imm8; ! asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w); asm.emitByte(op); asm.emitModRM(dst, src2); asm.emitByte(imm8); } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, int imm8) { assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); assert (imm8 & 0xFF) == imm8; ! asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w); asm.emitByte(op); asm.emitOperandHelper(dst, src2, 1); asm.emitByte(imm8); } } --- 1513,1532 ---- } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, int imm8) { assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); assert (imm8 & 0xFF) == imm8; ! asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); asm.emitByte(op); asm.emitModRM(dst, src2); asm.emitByte(imm8); } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, int imm8) { assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); assert (imm8 & 0xFF) == imm8; ! asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); asm.emitByte(op); asm.emitOperandHelper(dst, src2, 1); asm.emitByte(imm8); } }
*** 1593,1611 **** super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, Predicate p) { assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); ! asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w); asm.emitByte(op); asm.emitModRM(dst, src2); asm.emitByte(p.imm8); } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, Predicate p) { assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); ! asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w); asm.emitByte(op); asm.emitOperandHelper(dst, src2, 1); asm.emitByte(p.imm8); } } --- 1626,1644 ---- super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, Predicate p) { assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); ! asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); asm.emitByte(op); asm.emitModRM(dst, src2); asm.emitByte(p.imm8); } public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, Predicate p) { assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); ! asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); asm.emitByte(op); asm.emitOperandHelper(dst, src2, 1); asm.emitByte(p.imm8); } }
*** 1941,1958 **** public final void lock() { emitByte(0xF0); } public final void movapd(Register dst, Register src) { ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, Register.None, src, PD, P_0F, false); emitByte(0x28); emitModRM(dst, src); } public final void movaps(Register dst, Register src) { ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, Register.None, src, PS, P_0F, false); emitByte(0x28); emitModRM(dst, src); } --- 1974,1991 ---- public final void lock() { emitByte(0xF0); } public final void movapd(Register dst, Register src) { ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, Register.None, src, PD, P_0F, false); emitByte(0x28); emitModRM(dst, src); } public final void movaps(Register dst, Register src) { ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, Register.None, src, PS, P_0F, false); emitByte(0x28); emitModRM(dst, src); }
*** 1962,1972 **** emitOperandHelper(0, dst, 1); emitByte(imm8); } public final void movb(AMD64Address dst, Register src) { ! assert src.getRegisterCategory().equals(CPU) : "must have byte register"; prefixb(dst, src); emitByte(0x88); emitOperandHelper(src, dst, 0); } --- 1995,2005 ---- emitOperandHelper(0, dst, 1); emitByte(imm8); } public final void movb(AMD64Address dst, Register src) { ! assert inRC(CPU, src) : "must have byte register"; prefixb(dst, src); emitByte(0x88); emitOperandHelper(src, dst, 0); }
*** 2025,2073 **** * memory. But for old Opteron use movlpd instead of movsd. The selection is done in * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and * {@link AMD64MacroAssembler#movflt(Register, Register)}. */ public final void movlpd(Register dst, AMD64Address src) { ! assert dst.getRegisterCategory().equals(XMM); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0x12); emitOperandHelper(dst, src, 0); } public final void movlhps(Register dst, Register src) { ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, src, src, PS, P_0F, false); emitByte(0x16); emitModRM(dst, src); } public final void movq(Register dst, AMD64Address src) { movq(dst, src, false); } ! public final void movq(Register dst, AMD64Address src, boolean wide) { ! if (dst.getRegisterCategory().equals(XMM)) { simdPrefix(dst, Register.None, src, SS, P_0F, false); emitByte(0x7E); ! emitOperandHelper(dst, src, wide, 0); } else { // gpr version of movq prefixq(src, dst); emitByte(0x8B); ! emitOperandHelper(dst, src, wide, 0); } } public final void movq(Register dst, Register src) { prefixq(dst, src); emitByte(0x8B); emitModRM(dst, src); } public final void movq(AMD64Address dst, Register src) { ! if (src.getRegisterCategory().equals(XMM)) { ! simdPrefix(src, Register.None, dst, PD, P_0F, true); emitByte(0xD6); emitOperandHelper(src, dst, 0); } else { // gpr version of movq prefixq(dst, src); --- 2058,2117 ---- * memory. But for old Opteron use movlpd instead of movsd. The selection is done in * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and * {@link AMD64MacroAssembler#movflt(Register, Register)}. */ public final void movlpd(Register dst, AMD64Address src) { ! assert inRC(XMM, dst); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0x12); emitOperandHelper(dst, src, 0); } public final void movlhps(Register dst, Register src) { ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, src, src, PS, P_0F, false); emitByte(0x16); emitModRM(dst, src); } public final void movq(Register dst, AMD64Address src) { movq(dst, src, false); } ! public final void movq(Register dst, AMD64Address src, boolean force4BytesDisplacement) { ! if (inRC(XMM, dst)) { ! // Insn: MOVQ xmm, r/m64 ! // Code: F3 0F 7E /r ! // An alternative instruction would be 66 REX.W 0F 6E /r. We prefer the REX.W free ! // format, because it would allow us to emit 2-bytes-prefixed vex-encoding instruction ! // when applicable. simdPrefix(dst, Register.None, src, SS, P_0F, false); emitByte(0x7E); ! emitOperandHelper(dst, src, force4BytesDisplacement, 0); } else { // gpr version of movq prefixq(src, dst); emitByte(0x8B); ! emitOperandHelper(dst, src, force4BytesDisplacement, 0); } } public final void movq(Register dst, Register src) { + assert inRC(CPU, dst) && inRC(CPU, src); prefixq(dst, src); emitByte(0x8B); emitModRM(dst, src); } public final void movq(AMD64Address dst, Register src) { ! if (inRC(XMM, src)) { ! // Insn: MOVQ r/m64, xmm ! // Code: 66 0F D6 /r ! // An alternative instruction would be 66 REX.W 0F 7E /r. We prefer the REX.W free ! // format, because it would allow us to emit 2-bytes-prefixed vex-encoding instruction ! // when applicable. ! simdPrefix(src, Register.None, dst, PD, P_0F, false); emitByte(0xD6); emitOperandHelper(src, dst, 0); } else { // gpr version of movq prefixq(dst, src);
*** 2424,2433 **** --- 2468,2489 ---- public final void orl(Register dst, int imm32) { OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); } + // Insn: VPACKUSWB xmm1, xmm2, xmm3/m128 + // ----- + // Insn: VPACKUSWB xmm1, xmm1, xmm2 + + public final void packuswb(Register dst, Register src) { + assert inRC(XMM, dst) && inRC(XMM, src); + // Code: VEX.NDS.128.66.0F.WIG 67 /r + simdPrefix(dst, dst, src, PD, P_0F, false); + emitByte(0x67); + emitModRM(dst, src); + } + public final void pop(Register dst) { prefix(dst); emitByte(0x58 + encode(dst)); }
*** 2435,2663 **** emitByte(0x9D); } public final void ptest(Register dst, Register src) { assert supports(CPUFeature.SSE4_1); ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, Register.None, src, PD, P_0F38, false); emitByte(0x17); emitModRM(dst, src); } public final void pcmpeqb(Register dst, Register src) { assert supports(CPUFeature.SSE2); ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0x74); emitModRM(dst, src); } public final void pcmpeqw(Register dst, Register src) { assert supports(CPUFeature.SSE2); ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0x75); emitModRM(dst, src); } public final void pcmpestri(Register dst, AMD64Address src, int imm8) { assert supports(CPUFeature.SSE4_2); ! assert dst.getRegisterCategory().equals(XMM); simdPrefix(dst, Register.None, src, PD, P_0F3A, false); emitByte(0x61); emitOperandHelper(dst, src, 0); emitByte(imm8); } public final void pcmpestri(Register dst, Register src, int imm8) { assert supports(CPUFeature.SSE4_2); ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, Register.None, src, PD, P_0F3A, false); emitByte(0x61); emitModRM(dst, src); emitByte(imm8); } public final void pmovmskb(Register dst, Register src) { assert supports(CPUFeature.SSE2); ! assert dst.getRegisterCategory().equals(CPU) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, Register.None, src, PD, P_0F, false); emitByte(0xD7); emitModRM(dst, src); } public final void pmovzxbw(Register dst, AMD64Address src) { ! assert supports(CPUFeature.SSE4_2); ! assert dst.getRegisterCategory().equals(XMM); ! // XXX legacy_mode should be: _legacy_mode_bw simdPrefix(dst, Register.None, src, PD, P_0F38, false); emitByte(0x30); emitOperandHelper(dst, src, 0); } public final void push(Register src) { prefix(src); emitByte(0x50 + encode(src)); } public void pushfq() { emitByte(0x9c); } public final void paddd(Register dst, Register src) { ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0xFE); emitModRM(dst, src); } public final void paddq(Register dst, Register src) { ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0xD4); emitModRM(dst, src); } public final void pextrw(Register dst, Register src, int imm8) { ! assert dst.getRegisterCategory().equals(CPU) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, Register.None, src, PD, P_0F, false); emitByte(0xC5); emitModRM(dst, src); emitByte(imm8); } public final void pinsrw(Register dst, Register src, int imm8) { ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(CPU); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0xC4); emitModRM(dst, src); emitByte(imm8); } public final void por(Register dst, Register src) { ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0xEB); emitModRM(dst, src); } public final void pand(Register dst, Register src) { ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0xDB); emitModRM(dst, src); } public final void pxor(Register dst, Register src) { ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0xEF); emitModRM(dst, src); } public final void pslld(Register dst, int imm8) { assert isUByte(imm8) : "invalid value"; ! assert dst.getRegisterCategory().equals(XMM); // XMM6 is for /6 encoding: 66 0F 72 /6 ib simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false); emitByte(0x72); emitModRM(6, dst); emitByte(imm8 & 0xFF); } public final void psllq(Register dst, Register shift) { ! assert dst.getRegisterCategory().equals(XMM) && shift.getRegisterCategory().equals(XMM); simdPrefix(dst, dst, shift, PD, P_0F, false); emitByte(0xF3); emitModRM(dst, shift); } public final void psllq(Register dst, int imm8) { assert isUByte(imm8) : "invalid value"; ! assert dst.getRegisterCategory().equals(XMM); // XMM6 is for /6 encoding: 66 0F 73 /6 ib simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false); emitByte(0x73); emitModRM(6, dst); emitByte(imm8); } public final void psrad(Register dst, int imm8) { assert isUByte(imm8) : "invalid value"; ! assert dst.getRegisterCategory().equals(XMM); // XMM4 is for /4 encoding: 66 0F 72 /4 ib simdPrefix(AMD64.xmm4, dst, dst, PD, P_0F, false); emitByte(0x72); emitModRM(4, dst); emitByte(imm8); } public final void psrld(Register dst, int imm8) { assert isUByte(imm8) : "invalid value"; ! assert dst.getRegisterCategory().equals(XMM); // XMM2 is for /2 encoding: 66 0F 72 /2 ib simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false); emitByte(0x72); emitModRM(2, dst); emitByte(imm8); } public final void psrlq(Register dst, int imm8) { assert isUByte(imm8) : "invalid value"; ! assert dst.getRegisterCategory().equals(XMM); // XMM2 is for /2 encoding: 66 0F 73 /2 ib simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false); emitByte(0x73); emitModRM(2, dst); emitByte(imm8); } public final void psrldq(Register dst, int imm8) { assert isUByte(imm8) : "invalid value"; ! assert dst.getRegisterCategory().equals(XMM); simdPrefix(AMD64.xmm3, dst, dst, PD, P_0F, false); emitByte(0x73); emitModRM(3, dst); emitByte(imm8); } public final void pshufb(Register dst, Register src) { assert supports(CPUFeature.SSSE3); ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, dst, src, PD, P_0F38, false); emitByte(0x00); emitModRM(dst, src); } public final void pshuflw(Register dst, Register src, int imm8) { assert supports(CPUFeature.SSE2); assert isUByte(imm8) : "invalid value"; ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, Register.None, src, SD, P_0F, false); emitByte(0x70); emitModRM(dst, src); emitByte(imm8); } public final void pshufd(Register dst, Register src, int imm8) { assert isUByte(imm8) : "invalid value"; ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, Register.None, src, PD, P_0F, false); emitByte(0x70); emitModRM(dst, src); emitByte(imm8); } public final void psubd(Register dst, Register src) { ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0xFA); emitModRM(dst, src); } public final void rcpps(Register dst, Register src) { ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, Register.None, src, PS, P_0F, false); emitByte(0x53); emitModRM(dst, src); } --- 2491,2744 ---- emitByte(0x9D); } public final void ptest(Register dst, Register src) { assert supports(CPUFeature.SSE4_1); ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, Register.None, src, PD, P_0F38, false); emitByte(0x17); emitModRM(dst, src); } public final void pcmpeqb(Register dst, Register src) { assert supports(CPUFeature.SSE2); ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0x74); emitModRM(dst, src); } public final void pcmpeqw(Register dst, Register src) { assert supports(CPUFeature.SSE2); ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0x75); emitModRM(dst, src); } + public final void pcmpeqd(Register dst, Register src) { + assert supports(CPUFeature.SSE2); + assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); + simdPrefix(dst, dst, src, PD, P_0F, false); + emitByte(0x76); + emitModRM(dst, src); + } + public final void pcmpestri(Register dst, AMD64Address src, int imm8) { assert supports(CPUFeature.SSE4_2); ! assert inRC(XMM, dst); simdPrefix(dst, Register.None, src, PD, P_0F3A, false); emitByte(0x61); emitOperandHelper(dst, src, 0); emitByte(imm8); } public final void pcmpestri(Register dst, Register src, int imm8) { assert supports(CPUFeature.SSE4_2); ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, Register.None, src, PD, P_0F3A, false); emitByte(0x61); emitModRM(dst, src); emitByte(imm8); } public final void pmovmskb(Register dst, Register src) { assert supports(CPUFeature.SSE2); ! assert inRC(CPU, dst) && inRC(XMM, src); simdPrefix(dst, Register.None, src, PD, P_0F, false); emitByte(0xD7); emitModRM(dst, src); } + // Insn: VPMOVZXBW xmm1, xmm2/m64 + public final void pmovzxbw(Register dst, AMD64Address src) { ! assert supports(CPUFeature.SSE4_1); ! assert inRC(XMM, dst); simdPrefix(dst, Register.None, src, PD, P_0F38, false); emitByte(0x30); emitOperandHelper(dst, src, 0); } + public final void pmovzxbw(Register dst, Register src) { + assert supports(CPUFeature.SSE4_1); + assert inRC(XMM, dst) && inRC(XMM, src); + simdPrefix(dst, Register.None, src, PD, P_0F38, false); + emitByte(0x30); + emitModRM(dst, src); + } + public final void push(Register src) { prefix(src); emitByte(0x50 + encode(src)); } public void pushfq() { emitByte(0x9c); } public final void paddd(Register dst, Register src) { ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0xFE); emitModRM(dst, src); } public final void paddq(Register dst, Register src) { ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0xD4); emitModRM(dst, src); } public final void pextrw(Register dst, Register src, int imm8) { ! assert inRC(CPU, dst) && inRC(XMM, src); simdPrefix(dst, Register.None, src, PD, P_0F, false); emitByte(0xC5); emitModRM(dst, src); emitByte(imm8); } public final void pinsrw(Register dst, Register src, int imm8) { ! assert inRC(XMM, dst) && inRC(CPU, src); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0xC4); emitModRM(dst, src); emitByte(imm8); } public final void por(Register dst, Register src) { ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0xEB); emitModRM(dst, src); } public final void pand(Register dst, Register src) { ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0xDB); emitModRM(dst, src); } public final void pxor(Register dst, Register src) { ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0xEF); emitModRM(dst, src); } public final void pslld(Register dst, int imm8) { assert isUByte(imm8) : "invalid value"; ! assert inRC(XMM, dst); // XMM6 is for /6 encoding: 66 0F 72 /6 ib simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false); emitByte(0x72); emitModRM(6, dst); emitByte(imm8 & 0xFF); } public final void psllq(Register dst, Register shift) { ! assert inRC(XMM, dst) && inRC(XMM, shift); simdPrefix(dst, dst, shift, PD, P_0F, false); emitByte(0xF3); emitModRM(dst, shift); } public final void psllq(Register dst, int imm8) { assert isUByte(imm8) : "invalid value"; ! assert inRC(XMM, dst); // XMM6 is for /6 encoding: 66 0F 73 /6 ib simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false); emitByte(0x73); emitModRM(6, dst); emitByte(imm8); } public final void psrad(Register dst, int imm8) { assert isUByte(imm8) : "invalid value"; ! assert inRC(XMM, dst); // XMM4 is for /4 encoding: 66 0F 72 /4 ib simdPrefix(AMD64.xmm4, dst, dst, PD, P_0F, false); emitByte(0x72); emitModRM(4, dst); emitByte(imm8); } public final void psrld(Register dst, int imm8) { assert isUByte(imm8) : "invalid value"; ! assert inRC(XMM, dst); // XMM2 is for /2 encoding: 66 0F 72 /2 ib simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false); emitByte(0x72); emitModRM(2, dst); emitByte(imm8); } public final void psrlq(Register dst, int imm8) { assert isUByte(imm8) : "invalid value"; ! assert inRC(XMM, dst); // XMM2 is for /2 encoding: 66 0F 73 /2 ib simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false); emitByte(0x73); emitModRM(2, dst); emitByte(imm8); } public final void psrldq(Register dst, int imm8) { assert isUByte(imm8) : "invalid value"; ! assert inRC(XMM, dst); simdPrefix(AMD64.xmm3, dst, dst, PD, P_0F, false); emitByte(0x73); emitModRM(3, dst); emitByte(imm8); } public final void pshufb(Register dst, Register src) { assert supports(CPUFeature.SSSE3); ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, dst, src, PD, P_0F38, false); emitByte(0x00); emitModRM(dst, src); } public final void pshuflw(Register dst, Register src, int imm8) { assert supports(CPUFeature.SSE2); assert isUByte(imm8) : "invalid value"; ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, Register.None, src, SD, P_0F, false); emitByte(0x70); emitModRM(dst, src); emitByte(imm8); } public final void pshufd(Register dst, Register src, int imm8) { assert isUByte(imm8) : "invalid value"; ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, Register.None, src, PD, P_0F, false); emitByte(0x70); emitModRM(dst, src); emitByte(imm8); } public final void psubd(Register dst, Register src) { ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0xFA); emitModRM(dst, src); } + public final void punpcklbw(Register dst, Register src) { + assert supports(CPUFeature.SSE2); + assert inRC(XMM, dst) && inRC(XMM, src); + simdPrefix(dst, dst, src, PD, P_0F, false); + emitByte(0x60); + emitModRM(dst, src); + } + public final void rcpps(Register dst, Register src) { ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, Register.None, src, PS, P_0F, false); emitByte(0x53); emitModRM(dst, src); }
*** 2701,2710 **** --- 2782,2797 ---- prefix(dst); emitByte(0xD3); emitModRM(4, dst); } + // Insn: SHLX r32a, r/m32, r32b + + public final void shlxl(Register dst, Register src1, Register src2) { + VexGeneralPurposeRMVOp.SHLX.emit(this, AVXSize.DWORD, dst, src1, src2); + } + public final void shrl(Register dst, int imm8) { assert isShiftCount(imm8 >> 1) : "illegal shift count"; prefix(dst); emitByte(0xC1); emitModRM(5, dst);
*** 2767,2784 **** emitByte(0x85); emitOperandHelper(dst, src, 0); } public final void unpckhpd(Register dst, Register src) { ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0x15); emitModRM(dst, src); } public final void unpcklpd(Register dst, Register src) { ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0x14); emitModRM(dst, src); } --- 2854,2871 ---- emitByte(0x85); emitOperandHelper(dst, src, 0); } public final void unpckhpd(Register dst, Register src) { ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0x15); emitModRM(dst, src); } public final void unpcklpd(Register dst, Register src) { ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, dst, src, PD, P_0F, false); emitByte(0x14); emitModRM(dst, src); }
*** 2885,2895 **** emitByte(0xB1); emitOperandHelper(reg, adr, 0); } public final void cvtdq2pd(Register dst, Register src) { ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, Register.None, src, SS, P_0F, false); emitByte(0xE6); emitModRM(dst, src); } --- 2972,2982 ---- emitByte(0xB1); emitOperandHelper(reg, adr, 0); } public final void cvtdq2pd(Register dst, Register src) { ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, Register.None, src, SS, P_0F, false); emitByte(0xE6); emitModRM(dst, src); }
*** 2900,2917 **** public final void cvttsd2siq(Register dst, Register src) { SSEOp.CVTTSD2SI.emit(this, QWORD, dst, src); } public final void cvttpd2dq(Register dst, Register src) { ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, Register.None, src, PD, P_0F, false); emitByte(0xE6); emitModRM(dst, src); } public final void decq(Register dst) { ! // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) prefixq(dst); emitByte(0xFF); emitModRM(1, dst); } --- 2987,3004 ---- public final void cvttsd2siq(Register dst, Register src) { SSEOp.CVTTSD2SI.emit(this, QWORD, dst, src); } public final void cvttpd2dq(Register dst, Register src) { ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, Register.None, src, PD, P_0F, false); emitByte(0xE6); emitModRM(dst, src); } public final void decq(Register dst) { ! // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) prefixq(dst); emitByte(0xFF); emitModRM(1, dst); }
*** 2968,2990 **** public final void movdq(AMD64Address dst, Register src) { AMD64MROp.MOVQ.emit(this, QWORD, dst, src); } public final void movdq(Register dst, Register src) { ! if (dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(CPU)) { AMD64RMOp.MOVQ.emit(this, QWORD, dst, src); ! } else if (src.getRegisterCategory().equals(XMM) && dst.getRegisterCategory().equals(CPU)) { AMD64MROp.MOVQ.emit(this, QWORD, dst, src); } else { throw new InternalError("should not reach here"); } } public final void movdl(Register dst, Register src) { ! if (dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(CPU)) { AMD64RMOp.MOVD.emit(this, DWORD, dst, src); ! } else if (src.getRegisterCategory().equals(XMM) && dst.getRegisterCategory().equals(CPU)) { AMD64MROp.MOVD.emit(this, DWORD, dst, src); } else { throw new InternalError("should not reach here"); } } --- 3055,3077 ---- public final void movdq(AMD64Address dst, Register src) { AMD64MROp.MOVQ.emit(this, QWORD, dst, src); } public final void movdq(Register dst, Register src) { ! if (inRC(XMM, dst) && inRC(CPU, src)) { AMD64RMOp.MOVQ.emit(this, QWORD, dst, src); ! } else if (inRC(XMM, src) && inRC(CPU, dst)) { AMD64MROp.MOVQ.emit(this, QWORD, dst, src); } else { throw new InternalError("should not reach here"); } } public final void movdl(Register dst, Register src) { ! if (inRC(XMM, dst) && inRC(CPU, src)) { AMD64RMOp.MOVD.emit(this, DWORD, dst, src); ! } else if (inRC(XMM, src) && inRC(CPU, dst)) { AMD64MROp.MOVD.emit(this, DWORD, dst, src); } else { throw new InternalError("should not reach here"); } }
*** 2993,3022 **** AMD64RMOp.MOVD.emit(this, DWORD, dst, src); } public final void movddup(Register dst, Register src) { assert supports(CPUFeature.SSE3); ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, Register.None, src, SD, P_0F, false); emitByte(0x12); emitModRM(dst, src); } public final void movdqu(Register dst, AMD64Address src) { ! assert dst.getRegisterCategory().equals(XMM); simdPrefix(dst, Register.None, src, SS, P_0F, false); emitByte(0x6F); emitOperandHelper(dst, src, 0); } public final void movdqu(Register dst, Register src) { ! assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); simdPrefix(dst, Register.None, src, SS, P_0F, false); emitByte(0x6F); emitModRM(dst, src); } public final void movslq(AMD64Address dst, int imm32) { prefixq(dst); emitByte(0xC7); emitOperandHelper(0, dst, 4); emitInt(imm32); --- 3080,3119 ---- AMD64RMOp.MOVD.emit(this, DWORD, dst, src); } public final void movddup(Register dst, Register src) { assert supports(CPUFeature.SSE3); ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, Register.None, src, SD, P_0F, false); emitByte(0x12); emitModRM(dst, src); } public final void movdqu(Register dst, AMD64Address src) { ! assert inRC(XMM, dst); simdPrefix(dst, Register.None, src, SS, P_0F, false); emitByte(0x6F); emitOperandHelper(dst, src, 0); } public final void movdqu(Register dst, Register src) { ! assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, Register.None, src, SS, P_0F, false); emitByte(0x6F); emitModRM(dst, src); } + // Insn: VMOVDQU xmm2/m128, xmm1 + + public final void movdqu(AMD64Address dst, Register src) { + assert inRC(XMM, src); + // Code: VEX.128.F3.0F.WIG 7F /r + simdPrefix(src, Register.None, dst, SS, P_0F, false); + emitByte(0x7F); + emitOperandHelper(src, dst, 0); + } + public final void movslq(AMD64Address dst, int imm32) { prefixq(dst); emitByte(0xC7); emitOperandHelper(0, dst, 4); emitInt(imm32);
*** 3193,3204 **** @Override protected final void patchJumpTarget(int branch, int branchTarget) { int op = getByte(branch); assert op == 0xE8 // call ! || ! op == 0x00 // jump table entry || op == 0xE9 // jmp || op == 0xEB // short jmp || (op & 0xF0) == 0x70 // short jcc || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op; --- 3290,3300 ---- @Override protected final void patchJumpTarget(int branch, int branchTarget) { int op = getByte(branch); assert op == 0xE8 // call ! || op == 0x00 // jump table entry || op == 0xE9 // jmp || op == 0xEB // short jmp || (op & 0xF0) == 0x70 // short jcc || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op;
*** 3455,3531 **** public final void vmovdqu(Register dst, AMD64Address src) { VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src); } public final void vpmovzxbw(Register dst, AMD64Address src) { VexRMOp.VPMOVZXBW.emit(this, AVXSize.YMM, dst, src); } public final void vzeroupper() { ! emitVEX(L128, P_, M_0F, W0, 0, 0); emitByte(0x77); } // This instruction produces ZF or CF flags public final void kortestq(Register src1, Register src2) { assert supports(CPUFeature.AVX512BW); ! assert src1.getRegisterCategory().equals(MASK) && src2.getRegisterCategory().equals(MASK); ! vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_, M_0F, W1); emitByte(0x98); emitModRM(src1, src2); } public final void kmovq(Register dst, Register src) { assert supports(CPUFeature.AVX512BW); ! assert dst.getRegisterCategory().equals(MASK) || dst.getRegisterCategory().equals(CPU); ! assert src.getRegisterCategory().equals(MASK) || src.getRegisterCategory().equals(CPU); ! assert !(dst.getRegisterCategory().equals(CPU) && src.getRegisterCategory().equals(CPU)); ! ! if (dst.getRegisterCategory().equals(MASK)) { ! if (src.getRegisterCategory().equals(MASK)) { ! // kmovq(KRegister dst, KRegister src) ! vexPrefix(dst, Register.None, src, AVXSize.XMM, P_, M_0F, W1); emitByte(0x90); emitModRM(dst, src); } else { // kmovq(KRegister dst, Register src) ! vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1); emitByte(0x92); emitModRM(dst, src); } } else { ! if (src.getRegisterCategory().equals(MASK)) { // kmovq(Register dst, KRegister src) ! vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1); emitByte(0x93); emitModRM(dst, src); } else { throw GraalError.shouldNotReachHere(); } } } public final void evmovdqu64(Register dst, AMD64Address src) { assert supports(CPUFeature.AVX512F); ! assert dst.getRegisterCategory().equals(XMM); evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F3, M_0F, W1, Z0, B0); emitByte(0x6F); emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); } public final void evpmovzxbw(Register dst, AMD64Address src) { assert supports(CPUFeature.AVX512BW); ! assert dst.getRegisterCategory().equals(XMM); evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0); emitByte(0x30); emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); } public final void evpcmpeqb(Register kdst, Register nds, AMD64Address src) { assert supports(CPUFeature.AVX512BW); ! assert kdst.getRegisterCategory().equals(MASK) && nds.getRegisterCategory().equals(XMM); evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F, WIG, Z0, B0); emitByte(0x74); emitEVEXOperandHelper(kdst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); } } --- 3551,3832 ---- public final void vmovdqu(Register dst, AMD64Address src) { VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src); } + public final void vmovdqu(AMD64Address dst, Register src) { + assert inRC(XMM, src); + VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src); + } + public final void vpmovzxbw(Register dst, AMD64Address src) { + assert supports(CPUFeature.AVX2); VexRMOp.VPMOVZXBW.emit(this, AVXSize.YMM, dst, src); } public final void vzeroupper() { ! emitVEX(L128, P_, M_0F, W0, 0, 0, true); emitByte(0x77); } + // Insn: KORTESTD k1, k2 + + // This instruction produces ZF or CF flags + public final void kortestd(Register src1, Register src2) { + assert supports(CPUFeature.AVX512BW); + assert inRC(MASK, src1) && inRC(MASK, src2); + // Code: VEX.L0.66.0F.W1 98 /r + vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, true); + emitByte(0x98); + emitModRM(src1, src2); + } + + // Insn: KORTESTQ k1, k2 + // This instruction produces ZF or CF flags public final void kortestq(Register src1, Register src2) { assert supports(CPUFeature.AVX512BW); ! assert inRC(MASK, src1) && inRC(MASK, src2); ! // Code: VEX.L0.0F.W1 98 /r ! vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_, M_0F, W1, true); emitByte(0x98); emitModRM(src1, src2); } + public final void kmovd(Register dst, Register src) { + assert supports(CPUFeature.AVX512BW); + assert inRC(MASK, dst) || inRC(CPU, dst); + assert inRC(MASK, src) || inRC(CPU, src); + assert !(inRC(CPU, dst) && inRC(CPU, src)); + + if (inRC(MASK, dst)) { + if (inRC(MASK, src)) { + // kmovd(KRegister dst, KRegister src): + // Insn: KMOVD k1, k2/m32 + // Code: VEX.L0.66.0F.W1 90 /r + vexPrefix(dst, Register.None, src, AVXSize.XMM, P_66, M_0F, W1, true); + emitByte(0x90); + emitModRM(dst, src); + } else { + // kmovd(KRegister dst, Register src) + // Insn: KMOVD k1, r32 + // Code: VEX.L0.F2.0F.W0 92 /r + vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, true); + emitByte(0x92); + emitModRM(dst, src); + } + } else { + if (inRC(MASK, src)) { + // kmovd(Register dst, KRegister src) + // Insn: KMOVD r32, k1 + // Code: VEX.L0.F2.0F.W0 93 /r + vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, true); + emitByte(0x93); + emitModRM(dst, src); + } else { + throw GraalError.shouldNotReachHere(); + } + } + } + public final void kmovq(Register dst, Register src) { assert supports(CPUFeature.AVX512BW); ! assert inRC(MASK, dst) || inRC(CPU, dst); ! assert inRC(MASK, src) || inRC(CPU, src); ! assert !(inRC(CPU, dst) && inRC(CPU, src)); ! ! if (inRC(MASK, dst)) { ! if (inRC(MASK, src)) { ! // kmovq(KRegister dst, KRegister src): ! // Insn: KMOVQ k1, k2/m64 ! // Code: VEX.L0.0F.W1 90 /r ! vexPrefix(dst, Register.None, src, AVXSize.XMM, P_, M_0F, W1, true); emitByte(0x90); emitModRM(dst, src); } else { // kmovq(KRegister dst, Register src) ! // Insn: KMOVQ k1, r64 ! // Code: VEX.L0.F2.0F.W1 92 /r ! vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, true); emitByte(0x92); emitModRM(dst, src); } } else { ! if (inRC(MASK, src)) { // kmovq(Register dst, KRegister src) ! // Insn: KMOVQ r64, k1 ! // Code: VEX.L0.F2.0F.W1 93 /r ! vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, true); emitByte(0x93); emitModRM(dst, src); } else { throw GraalError.shouldNotReachHere(); } } } + // Insn: KTESTD k1, k2 + + public final void ktestd(Register src1, Register src2) { + assert supports(CPUFeature.AVX512BW); + assert inRC(MASK, src1) && inRC(MASK, src2); + // Code: VEX.L0.66.0F.W1 99 /r + vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, true); + emitByte(0x99); + emitModRM(src1, src2); + } + public final void evmovdqu64(Register dst, AMD64Address src) { assert supports(CPUFeature.AVX512F); ! assert inRC(XMM, dst); evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F3, M_0F, W1, Z0, B0); emitByte(0x6F); emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); } + // Insn: VPMOVZXBW zmm1, m256 + public final void evpmovzxbw(Register dst, AMD64Address src) { assert supports(CPUFeature.AVX512BW); ! assert inRC(XMM, dst); ! // Code: EVEX.512.66.0F38.WIG 30 /r evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0); emitByte(0x30); emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); } public final void evpcmpeqb(Register kdst, Register nds, AMD64Address src) { assert supports(CPUFeature.AVX512BW); ! assert inRC(MASK, kdst) && inRC(XMM, nds); evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F, WIG, Z0, B0); emitByte(0x74); emitEVEXOperandHelper(kdst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); } + + // Insn: VMOVDQU16 zmm1 {k1}{z}, zmm2/m512 + // ----- + // Insn: VMOVDQU16 zmm1, m512 + + public final void evmovdqu16(Register dst, AMD64Address src) { + assert supports(CPUFeature.AVX512BW); + assert inRC(XMM, dst); + // Code: EVEX.512.F2.0F.W1 6F /r + evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0); + emitByte(0x6F); + emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); + } + + // Insn: VMOVDQU16 zmm1, k1:z, m512 + + public final void evmovdqu16(Register dst, Register mask, AMD64Address src) { + assert supports(CPUFeature.AVX512BW); + assert inRC(XMM, dst) && inRC(MASK, mask); + // Code: EVEX.512.F2.0F.W1 6F /r + evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z1, B0); + emitByte(0x6F); + emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); + } + + // Insn: VMOVDQU16 zmm2/m512 {k1}{z}, zmm1 + // ----- + // Insn: VMOVDQU16 m512, zmm1 + + public final void evmovdqu16(AMD64Address dst, Register src) { + assert supports(CPUFeature.AVX512BW); + assert inRC(XMM, src); + // Code: EVEX.512.F2.0F.W1 7F /r + evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0); + emitByte(0x7F); + emitEVEXOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); + } + + // Insn: VMOVDQU16 m512, k1, zmm1 + + public final void evmovdqu16(AMD64Address dst, Register mask, Register src) { + assert supports(CPUFeature.AVX512BW); + assert inRC(MASK, mask) && inRC(XMM, src); + // Code: EVEX.512.F2.0F.W1 7F /r + evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0); + emitByte(0x7F); + emitEVEXOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); + } + + // Insn: VPBROADCASTW zmm1 {k1}{z}, reg + // ----- + // Insn: VPBROADCASTW zmm1, reg + + public final void evpbroadcastw(Register dst, Register src) { + assert supports(CPUFeature.AVX512BW); + assert inRC(XMM, dst) && inRC(CPU, src); + // Code: EVEX.512.66.0F38.W0 7B /r + evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, W0, Z0, B0); + emitByte(0x7B); + emitModRM(dst, src); + } + + // Insn: VPCMPUW k1 {k2}, zmm2, zmm3/m512, imm8 + // ----- + // Insn: VPCMPUW k1, zmm2, zmm3, imm8 + + public final void evpcmpuw(Register kdst, Register nds, Register src, int vcc) { + assert supports(CPUFeature.AVX512BW); + assert inRC(MASK, kdst) && inRC(XMM, nds) && inRC(XMM, src); + // Code: EVEX.NDS.512.66.0F3A.W1 3E /r ib + evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F3A, W1, Z0, B0); + emitByte(0x3E); + emitModRM(kdst, src); + emitByte(vcc); + } + + // Insn: VPCMPUW k1 {k2}, zmm2, zmm3/m512, imm8 + // ----- + // Insn: VPCMPUW k1, k2, zmm2, zmm3, imm8 + + public final void evpcmpuw(Register kdst, Register mask, Register nds, Register src, int vcc) { + assert supports(CPUFeature.AVX512BW); + assert inRC(MASK, kdst) && inRC(MASK, mask); + assert inRC(XMM, nds) && inRC(XMM, src); + // Code: EVEX.NDS.512.66.0F3A.W1 3E /r ib + evexPrefix(kdst, mask, nds, src, AVXSize.ZMM, P_66, M_0F3A, W1, Z0, B0); + emitByte(0x3E); + emitModRM(kdst, src); + emitByte(vcc); + } + + // Insn: VPMOVWB ymm1/m256 {k1}{z}, zmm2 + // ----- + // Insn: VPMOVWB m256, zmm2 + + public final void evpmovwb(AMD64Address dst, Register src) { + assert supports(CPUFeature.AVX512BW); + assert inRC(XMM, src); + // Code: EVEX.512.F3.0F38.W0 30 /r + evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0); + emitByte(0x30); + emitEVEXOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); + } + + // Insn: VPMOVWB m256, k1, zmm2 + + public final void evpmovwb(AMD64Address dst, Register mask, Register src) { + assert supports(CPUFeature.AVX512BW); + assert inRC(MASK, mask) && inRC(XMM, src); + // Code: EVEX.512.F3.0F38.W0 30 /r + evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0); + emitByte(0x30); + emitEVEXOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); + } + + // Insn: VPMOVZXBW zmm1 {k1}{z}, ymm2/m256 + // ----- + // Insn: VPMOVZXBW zmm1, k1, m256 + + public final void evpmovzxbw(Register dst, Register mask, AMD64Address src) { + assert supports(CPUFeature.AVX512BW); + assert inRC(MASK, mask) && inRC(XMM, dst); + // Code: EVEX.512.66.0F38.WIG 30 /r + evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0); + emitByte(0x30); + emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); + } + }
< prev index next >