graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXArithmetic.java

Print this page
rev 8592 : Graal PTX enhancements

@@ -28,26 +28,70 @@
 import com.oracle.graal.api.code.*;
 import com.oracle.graal.api.meta.*;
 import com.oracle.graal.asm.ptx.*;
 import com.oracle.graal.graph.*;
 import com.oracle.graal.lir.*;
+import com.oracle.graal.lir.LIRInstruction.Def;
+import com.oracle.graal.lir.LIRInstruction.Opcode;
+import com.oracle.graal.lir.LIRInstruction.Use;
 import com.oracle.graal.lir.asm.*;
 
 // @formatter:off
 public enum PTXArithmetic {
     IADD, ISUB, IMUL, IDIV, IDIVREM, IREM, IUDIV, IUREM, IAND, IOR, IXOR, ISHL, ISHR, IUSHR,
     LADD, LSUB, LMUL, LDIV, LDIVREM, LREM, LUDIV, LUREM, LAND, LOR, LXOR, LSHL, LSHR, LUSHR,
-    FADD, FSUB, FMUL, FDIV, FAND, FOR, FXOR,
-    DADD, DSUB, DMUL, DDIV, DAND, DOR, DXOR,
-    INEG, LNEG,
+    FADD, FSUB, FMUL, FDIV, FREM, FAND, FOR, FXOR,
+    DADD, DSUB, DMUL, DDIV, DREM, DAND, DOR, DXOR,
+    INEG, LNEG, FNEG, DNEG,
     I2L, L2I, I2B, I2C, I2S,
     F2D, D2F,
     I2F, I2D, F2I, D2I,
     L2F, L2D, F2L, D2L,
     MOV_I2F, MOV_L2D, MOV_F2I, MOV_D2L;
 
 
+    /**
+     * Unary operation with separate source and destination operand. 
+     */
+    public static class Unary2Op extends PTXLIRInstruction {
+        @Opcode private final PTXArithmetic opcode;
+        @Def({REG}) protected AllocatableValue result;
+        @Use({REG, STACK}) protected AllocatableValue x;
+
+        public Unary2Op(PTXArithmetic opcode, AllocatableValue result, AllocatableValue x) {
+            this.opcode = opcode;
+            this.result = result;
+            this.x = x;
+        }
+
+        @Override
+        public void emitCode(TargetMethodAssembler tasm, PTXAssembler masm) {
+            PTXMove.move(tasm, masm, result, x);
+            emit(tasm, masm, opcode, result, x, null);
+        }
+    }
+
+    /**
+     * Unary operation with single operand for source and destination. 
+     */
+    public static class Unary1Op extends PTXLIRInstruction {
+        @Opcode private final PTXArithmetic opcode;
+        @Def({REG, HINT}) protected AllocatableValue result;
+        @Use({REG, STACK}) protected AllocatableValue x;
+
+        public Unary1Op(PTXArithmetic opcode, AllocatableValue result, AllocatableValue x) {
+            this.opcode = opcode;
+            this.result = result;
+            this.x = x;
+        }
+
+        @Override
+        public void emitCode(TargetMethodAssembler tasm, PTXAssembler masm) {
+            emit(tasm, masm, opcode, result);
+        }
+    }
+
     public static class Op1Reg extends PTXLIRInstruction {
         @Opcode private final PTXArithmetic opcode;
         @Def({REG, HINT}) protected Value result;
         @Use({REG}) protected Value x;
 

@@ -211,26 +255,42 @@
             super.verify();
             verifyKind(opcode, result, x, y);
         }
     }
 
-
-    @SuppressWarnings("unused")
     protected static void emit(TargetMethodAssembler tasm, PTXAssembler masm, PTXArithmetic opcode, Value result) {
         switch (opcode) {
-            default:   throw GraalInternalError.shouldNotReachHere();
+        case L2I:  masm.cvt_s32_s64(asLongReg(result), asIntReg(result)); break;
+        case I2C:  masm.cvt_b16_s32(asIntReg(result), asIntReg(result)); break;
+            default:   
+                System.err.println("missing: "  + opcode);
+                throw GraalInternalError.shouldNotReachHere();
         }
     }
 
     public static void emit(TargetMethodAssembler tasm, PTXAssembler masm, PTXArithmetic opcode, Value dst, Value src, LIRFrameState info) {
         int exceptionOffset = -1;
         if (isRegister(src)) {
             Register a = asIntReg(src);
             Register d = asIntReg(dst);
             switch (opcode) {
                 case INEG: masm.neg_s32(d, a); break;
+                case FNEG: masm.neg_f32(d, a);     break;
+                case DNEG: masm.neg_f64(d, a);     break;
+                case I2L:  masm.cvt_s64_s32(d, a); break;
+                case I2C:  masm.cvt_b16_s32(d, a); break;
+                case I2B:  masm.cvt_s8_s32(d, a);  break;
+                case I2F:  masm.cvt_f32_s32(d, a); break;
+                case I2D:  masm.cvt_f64_s32(d, a); break;
+                case F2I:  masm.cvt_s32_f32(d, a); break;
+                case F2L:  masm.cvt_s64_f32(d, a); break;
+                case F2D:  masm.cvt_f64_f32(d, a); break;
+                case D2I:  masm.cvt_s32_f64(d, a); break;
+                case D2L:  masm.cvt_s64_f64(d, a); break;
+                case D2F:  masm.cvt_f32_f64(d, a); break;
                 default:
+                        System.err.println("missing: "  + opcode);
                     throw GraalInternalError.shouldNotReachHere();
             }
         } else if (isConstant(src)) {
             switch (opcode) {
                 case ISUB: masm.sub_s32(asIntReg(dst), asIntReg(dst), tasm.asIntConst(src)); break;

@@ -250,37 +310,79 @@
     }
 
     public static void emit(TargetMethodAssembler tasm, PTXAssembler masm, PTXArithmetic opcode, Value dst, Value src1, Value src2, LIRFrameState info) {
         int exceptionOffset = -1;
         if (isConstant(src1)) {
-            int      a = tasm.asIntConst(src1);
-            Register b = asIntReg(src2);
-            Register d = asIntReg(dst);
             switch (opcode) {
-            case ISUB:  masm.sub_s32(d, a, b); break;
-            case IAND:  masm.and_b32(d, b, a); break;
-            default:    throw GraalInternalError.shouldNotReachHere();
+            case ISUB:  masm.sub_s32(asIntReg(dst),    tasm.asIntConst(src1),    asIntReg(src2));         break;
+            case IAND:  masm.and_b32(asIntReg(dst),    asIntReg(src2),           tasm.asIntConst(src1));  break;
+            case IDIV:  masm.div_s32(asIntReg(dst),    tasm.asIntConst(src1),    asIntReg(src2));         break;
+            case FSUB:  masm.sub_f32(asFloatReg(dst),  tasm.asFloatConst(src1),  asFloatReg(src2));       break;
+            case FDIV:  masm.div_f32(asFloatReg(dst),  tasm.asFloatConst(src1),  asFloatReg(src2));       break;
+            case DSUB:  masm.sub_f64(asDoubleReg(dst), tasm.asDoubleConst(src1), asDoubleReg(src2));      break;
+            case DDIV:  masm.div_f64(asDoubleReg(dst), tasm.asDoubleConst(src1), asDoubleReg(src2));      break;
+            default:    
+                throw GraalInternalError.shouldNotReachHere();
             }
         } else if (isConstant(src2)) {
-            Register a = asIntReg(src1);
-            int      b = tasm.asIntConst(src2);
-            Register d = asIntReg(dst);
             switch (opcode) {
-            case IADD:  masm.add_s32(d, a, b); break;
-            case IAND:  masm.and_b32(d, a, b); break;
-            case IUSHR: masm.shr_u32(d, a, b); break;
-            default:    throw GraalInternalError.shouldNotReachHere();
+            case IADD:  masm.add_s32(asIntReg(dst),    asIntReg(src1),    tasm.asIntConst(src2));    break;
+            case ISUB:  masm.sub_s32(asIntReg(dst),    asIntReg(src1),    tasm.asIntConst(src2));    break;
+            case IMUL:  masm.mul_s32(asIntReg(dst),    asIntReg(src1),    tasm.asIntConst(src2));    break;
+            case IAND:  masm.and_b32(asIntReg(dst),    asIntReg(src1),    tasm.asIntConst(src2));    break;
+            case ISHL:  masm.shl_s32(asIntReg(dst),    asIntReg(src1),    tasm.asIntConst(src2));    break;
+            case ISHR:  masm.shr_s32(asIntReg(dst),    asIntReg(src1),    tasm.asIntConst(src2));    break;
+            case IUSHR: masm.shr_u32(asIntReg(dst),    asIntReg(src1),    tasm.asIntConst(src2));    break;
+            case IXOR:  masm.xor_b32(asIntReg(dst),    asIntReg(src1),    tasm.asIntConst(src2));    break;
+            case LXOR:  masm.xor_b64(asLongReg(dst),   asLongReg(src1),   tasm.asLongConst(src2));   break;
+            case LUSHR: masm.shr_u64(asLongReg(dst),   asLongReg(src1),   tasm.asLongConst(src2));   break;
+            case FADD:  masm.add_f32(asFloatReg(dst),  asFloatReg(src1),  tasm.asFloatConst(src2));  break;
+            case FMUL:  masm.mul_f32(asFloatReg(dst),  asFloatReg(src1),  tasm.asFloatConst(src2));  break;
+            case FDIV:  masm.div_f32(asFloatReg(dst),  asFloatReg(src1),  tasm.asFloatConst(src2));  break;
+            case DADD:  masm.add_f64(asDoubleReg(dst), asDoubleReg(src1), tasm.asDoubleConst(src2)); break;
+            case DMUL:  masm.mul_f64(asDoubleReg(dst), asDoubleReg(src1), tasm.asDoubleConst(src2)); break;
+            case DDIV:  masm.div_f64(asDoubleReg(dst), asDoubleReg(src1), tasm.asDoubleConst(src2)); break;
+            default:    
+                throw GraalInternalError.shouldNotReachHere();
             }
         } else {
-            Register a = asIntReg(src1);
-            Register b = asIntReg(src2);
-            Register d = asIntReg(dst);
             switch (opcode) {
-            case IADD:  masm.add_s32(d, a, b); break;
-            case ISUB:  masm.sub_s32(d, a, b); break;
-            case IMUL:  masm.mul_s32(d, a, b); break;
-            default:    throw GraalInternalError.shouldNotReachHere();
+            case IADD:  masm.add_s32(asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
+            case ISUB:  masm.sub_s32(asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
+            case IMUL:  masm.mul_s32(asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
+            case IDIV:  masm.div_s32(asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
+            case IAND:  masm.and_b32(asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
+            case IOR:   masm.or_b32 (asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
+            case IXOR:  masm.xor_b32(asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
+            case ISHL:  masm.shl_s32(asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
+            case ISHR:  masm.shr_s32(asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
+            case IUSHR: masm.shr_u32(asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
+            case IREM:  masm.rem_s32(asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
+            case LADD:  masm.add_s64(asLongReg(dst),   asLongReg(src1),   asLongReg(src2));   break;
+            case LSUB:  masm.sub_s64(asLongReg(dst),   asLongReg(src1),   asLongReg(src2));   break;
+            case LMUL:  masm.mul_s64(asLongReg(dst),   asLongReg(src1),   asLongReg(src2));   break;
+            case LDIV:  masm.div_s64(asLongReg(dst),   asLongReg(src1),   asLongReg(src2));   break;
+            case LAND:  masm.and_b64(asLongReg(dst),   asLongReg(src1),   asLongReg(src2));   break;
+            case LOR:   masm.or_b64 (asLongReg(dst),   asLongReg(src1),   asLongReg(src2));   break;
+            case LXOR:  masm.xor_b64(asLongReg(dst),   asLongReg(src1),   asLongReg(src2));   break;
+            case LSHL:  masm.shl_s64(asLongReg(dst),   asLongReg(src1),   asLongReg(src2));   break;
+            case LSHR:  masm.shr_s64(asLongReg(dst),   asLongReg(src1),   asLongReg(src2));   break;
+            case LUSHR: masm.shr_u64(asLongReg(dst),   asLongReg(src1),   asLongReg(src2));   break;
+            case LREM:  masm.rem_s64(asLongReg(dst),   asLongReg(src1),   asLongReg(src2));   break;
+            case FADD:  masm.add_f32(asFloatReg(dst),  asFloatReg(src1),  asFloatReg(src2));  break;
+            case FSUB:  masm.sub_f32(asFloatReg(dst),  asFloatReg(src1),  asFloatReg(src2));  break;
+            case FMUL:  masm.mul_f32(asFloatReg(dst),  asFloatReg(src1),  asFloatReg(src2));  break;
+            case FDIV:  masm.div_f32(asFloatReg(dst),  asFloatReg(src1),  asFloatReg(src2));  break;
+            case FREM:  masm.div_f32(asFloatReg(dst),  asFloatReg(src1),  asFloatReg(src2));  break;
+            case DADD:  masm.add_f64(asDoubleReg(dst), asDoubleReg(src1), asDoubleReg(src2)); break;
+            case DSUB:  masm.sub_f64(asDoubleReg(dst), asDoubleReg(src1), asDoubleReg(src2)); break;
+            case DMUL:  masm.mul_f64(asDoubleReg(dst), asDoubleReg(src1), asDoubleReg(src2)); break;
+            case DDIV:  masm.div_f64(asDoubleReg(dst), asDoubleReg(src1), asDoubleReg(src2)); break;
+            case DREM:  masm.div_f64(asDoubleReg(dst), asDoubleReg(src1), asDoubleReg(src2)); break;
+            default:   
+                System.err.println("missing: "  + opcode);
+                throw GraalInternalError.shouldNotReachHere();
             }
         }
 
         if (info != null) {
             assert exceptionOffset != -1;