--- old/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXArithmetic.java 2013-04-05 19:13:30.000000000 -0400 +++ new/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXArithmetic.java 2013-04-05 19:13:30.000000000 -0400 @@ -30,15 +30,18 @@ import com.oracle.graal.asm.ptx.*; import com.oracle.graal.graph.*; import com.oracle.graal.lir.*; +import com.oracle.graal.lir.LIRInstruction.Def; +import com.oracle.graal.lir.LIRInstruction.Opcode; +import com.oracle.graal.lir.LIRInstruction.Use; import com.oracle.graal.lir.asm.*; // @formatter:off public enum PTXArithmetic { IADD, ISUB, IMUL, IDIV, IDIVREM, IREM, IUDIV, IUREM, IAND, IOR, IXOR, ISHL, ISHR, IUSHR, LADD, LSUB, LMUL, LDIV, LDIVREM, LREM, LUDIV, LUREM, LAND, LOR, LXOR, LSHL, LSHR, LUSHR, - FADD, FSUB, FMUL, FDIV, FAND, FOR, FXOR, - DADD, DSUB, DMUL, DDIV, DAND, DOR, DXOR, - INEG, LNEG, + FADD, FSUB, FMUL, FDIV, FREM, FAND, FOR, FXOR, + DADD, DSUB, DMUL, DDIV, DREM, DAND, DOR, DXOR, + INEG, LNEG, FNEG, DNEG, I2L, L2I, I2B, I2C, I2S, F2D, D2F, I2F, I2D, F2I, D2I, @@ -46,6 +49,47 @@ MOV_I2F, MOV_L2D, MOV_F2I, MOV_D2L; + /** + * Unary operation with separate source and destination operand. + */ + public static class Unary2Op extends PTXLIRInstruction { + @Opcode private final PTXArithmetic opcode; + @Def({REG}) protected AllocatableValue result; + @Use({REG, STACK}) protected AllocatableValue x; + + public Unary2Op(PTXArithmetic opcode, AllocatableValue result, AllocatableValue x) { + this.opcode = opcode; + this.result = result; + this.x = x; + } + + @Override + public void emitCode(TargetMethodAssembler tasm, PTXAssembler masm) { + PTXMove.move(tasm, masm, result, x); + emit(tasm, masm, opcode, result, x, null); + } + } + + /** + * Unary operation with single operand for source and destination. + */ + public static class Unary1Op extends PTXLIRInstruction { + @Opcode private final PTXArithmetic opcode; + @Def({REG, HINT}) protected AllocatableValue result; + @Use({REG, STACK}) protected AllocatableValue x; + + public Unary1Op(PTXArithmetic opcode, AllocatableValue result, AllocatableValue x) { + this.opcode = opcode; + this.result = result; + this.x = x; + } + + @Override + public void emitCode(TargetMethodAssembler tasm, PTXAssembler masm) { + emit(tasm, masm, opcode, result); + } + } + public static class Op1Reg extends PTXLIRInstruction { @Opcode private final PTXArithmetic opcode; @Def({REG, HINT}) protected Value result; @@ -213,11 +257,13 @@ } } - - @SuppressWarnings("unused") protected static void emit(TargetMethodAssembler tasm, PTXAssembler masm, PTXArithmetic opcode, Value result) { switch (opcode) { - default: throw GraalInternalError.shouldNotReachHere(); + case L2I: masm.cvt_s32_s64(asLongReg(result), asIntReg(result)); break; + case I2C: masm.cvt_b16_s32(asIntReg(result), asIntReg(result)); break; + default: + System.err.println("missing: " + opcode); + throw GraalInternalError.shouldNotReachHere(); } } @@ -227,8 +273,22 @@ Register a = asIntReg(src); Register d = asIntReg(dst); switch (opcode) { - case INEG: masm.neg_s32(d, a); break; + case INEG: masm.neg_s32(d, a); break; + case FNEG: masm.neg_f32(d, a); break; + case DNEG: masm.neg_f64(d, a); break; + case I2L: masm.cvt_s64_s32(d, a); break; + case I2C: masm.cvt_b16_s32(d, a); break; + case I2B: masm.cvt_s8_s32(d, a); break; + case I2F: masm.cvt_f32_s32(d, a); break; + case I2D: masm.cvt_f64_s32(d, a); break; + case F2I: masm.cvt_s32_f32(d, a); break; + case F2L: masm.cvt_s64_f32(d, a); break; + case F2D: masm.cvt_f64_f32(d, a); break; + case D2I: masm.cvt_s32_f64(d, a); break; + case D2L: masm.cvt_s64_f64(d, a); break; + case D2F: masm.cvt_f32_f64(d, a); break; default: + System.err.println("missing: " + opcode); throw GraalInternalError.shouldNotReachHere(); } } else if (isConstant(src)) { @@ -252,33 +312,75 @@ public static void emit(TargetMethodAssembler tasm, PTXAssembler masm, PTXArithmetic opcode, Value dst, Value src1, Value src2, LIRFrameState info) { int exceptionOffset = -1; if (isConstant(src1)) { - int a = tasm.asIntConst(src1); - Register b = asIntReg(src2); - Register d = asIntReg(dst); switch (opcode) { - case ISUB: masm.sub_s32(d, a, b); break; - case IAND: masm.and_b32(d, b, a); break; - default: throw GraalInternalError.shouldNotReachHere(); + case ISUB: masm.sub_s32(asIntReg(dst), tasm.asIntConst(src1), asIntReg(src2)); break; + case IAND: masm.and_b32(asIntReg(dst), asIntReg(src2), tasm.asIntConst(src1)); break; + case IDIV: masm.div_s32(asIntReg(dst), tasm.asIntConst(src1), asIntReg(src2)); break; + case FSUB: masm.sub_f32(asFloatReg(dst), tasm.asFloatConst(src1), asFloatReg(src2)); break; + case FDIV: masm.div_f32(asFloatReg(dst), tasm.asFloatConst(src1), asFloatReg(src2)); break; + case DSUB: masm.sub_f64(asDoubleReg(dst), tasm.asDoubleConst(src1), asDoubleReg(src2)); break; + case DDIV: masm.div_f64(asDoubleReg(dst), tasm.asDoubleConst(src1), asDoubleReg(src2)); break; + default: + throw GraalInternalError.shouldNotReachHere(); } } else if (isConstant(src2)) { - Register a = asIntReg(src1); - int b = tasm.asIntConst(src2); - Register d = asIntReg(dst); switch (opcode) { - case IADD: masm.add_s32(d, a, b); break; - case IAND: masm.and_b32(d, a, b); break; - case IUSHR: masm.shr_u32(d, a, b); break; - default: throw GraalInternalError.shouldNotReachHere(); + case IADD: masm.add_s32(asIntReg(dst), asIntReg(src1), tasm.asIntConst(src2)); break; + case ISUB: masm.sub_s32(asIntReg(dst), asIntReg(src1), tasm.asIntConst(src2)); break; + case IMUL: masm.mul_s32(asIntReg(dst), asIntReg(src1), tasm.asIntConst(src2)); break; + case IAND: masm.and_b32(asIntReg(dst), asIntReg(src1), tasm.asIntConst(src2)); break; + case ISHL: masm.shl_s32(asIntReg(dst), asIntReg(src1), tasm.asIntConst(src2)); break; + case ISHR: masm.shr_s32(asIntReg(dst), asIntReg(src1), tasm.asIntConst(src2)); break; + case IUSHR: masm.shr_u32(asIntReg(dst), asIntReg(src1), tasm.asIntConst(src2)); break; + case IXOR: masm.xor_b32(asIntReg(dst), asIntReg(src1), tasm.asIntConst(src2)); break; + case LXOR: masm.xor_b64(asLongReg(dst), asLongReg(src1), tasm.asLongConst(src2)); break; + case LUSHR: masm.shr_u64(asLongReg(dst), asLongReg(src1), tasm.asLongConst(src2)); break; + case FADD: masm.add_f32(asFloatReg(dst), asFloatReg(src1), tasm.asFloatConst(src2)); break; + case FMUL: masm.mul_f32(asFloatReg(dst), asFloatReg(src1), tasm.asFloatConst(src2)); break; + case FDIV: masm.div_f32(asFloatReg(dst), asFloatReg(src1), tasm.asFloatConst(src2)); break; + case DADD: masm.add_f64(asDoubleReg(dst), asDoubleReg(src1), tasm.asDoubleConst(src2)); break; + case DMUL: masm.mul_f64(asDoubleReg(dst), asDoubleReg(src1), tasm.asDoubleConst(src2)); break; + case DDIV: masm.div_f64(asDoubleReg(dst), asDoubleReg(src1), tasm.asDoubleConst(src2)); break; + default: + throw GraalInternalError.shouldNotReachHere(); } } else { - Register a = asIntReg(src1); - Register b = asIntReg(src2); - Register d = asIntReg(dst); switch (opcode) { - case IADD: masm.add_s32(d, a, b); break; - case ISUB: masm.sub_s32(d, a, b); break; - case IMUL: masm.mul_s32(d, a, b); break; - default: throw GraalInternalError.shouldNotReachHere(); + case IADD: masm.add_s32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; + case ISUB: masm.sub_s32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; + case IMUL: masm.mul_s32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; + case IDIV: masm.div_s32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; + case IAND: masm.and_b32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; + case IOR: masm.or_b32 (asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; + case IXOR: masm.xor_b32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; + case ISHL: masm.shl_s32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; + case ISHR: masm.shr_s32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; + case IUSHR: masm.shr_u32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; + case IREM: masm.rem_s32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; + case LADD: masm.add_s64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; + case LSUB: masm.sub_s64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; + case LMUL: masm.mul_s64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; + case LDIV: masm.div_s64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; + case LAND: masm.and_b64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; + case LOR: masm.or_b64 (asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; + case LXOR: masm.xor_b64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; + case LSHL: masm.shl_s64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; + case LSHR: masm.shr_s64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; + case LUSHR: masm.shr_u64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; + case LREM: masm.rem_s64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; + case FADD: masm.add_f32(asFloatReg(dst), asFloatReg(src1), asFloatReg(src2)); break; + case FSUB: masm.sub_f32(asFloatReg(dst), asFloatReg(src1), asFloatReg(src2)); break; + case FMUL: masm.mul_f32(asFloatReg(dst), asFloatReg(src1), asFloatReg(src2)); break; + case FDIV: masm.div_f32(asFloatReg(dst), asFloatReg(src1), asFloatReg(src2)); break; + case FREM: masm.div_f32(asFloatReg(dst), asFloatReg(src1), asFloatReg(src2)); break; + case DADD: masm.add_f64(asDoubleReg(dst), asDoubleReg(src1), asDoubleReg(src2)); break; + case DSUB: masm.sub_f64(asDoubleReg(dst), asDoubleReg(src1), asDoubleReg(src2)); break; + case DMUL: masm.mul_f64(asDoubleReg(dst), asDoubleReg(src1), asDoubleReg(src2)); break; + case DDIV: masm.div_f64(asDoubleReg(dst), asDoubleReg(src1), asDoubleReg(src2)); break; + case DREM: masm.div_f64(asDoubleReg(dst), asDoubleReg(src1), asDoubleReg(src2)); break; + default: + System.err.println("missing: " + opcode); + throw GraalInternalError.shouldNotReachHere(); } }