graal-webrev Sdiff graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail

graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java

  68 import com.oracle.graal.lir.hsail.HSAILControlFlow.DeoptimizingOp;
  69 import com.oracle.graal.nodes.*;
  70 import com.oracle.graal.nodes.StructuredGraph.GuardsStage;
  71 import com.oracle.graal.nodes.extended.*;
  72 import com.oracle.graal.nodes.java.*;
  73 import com.oracle.graal.nodes.spi.*;
  74 import com.oracle.graal.nodes.virtual.*;
  75 import com.oracle.graal.options.*;
  76 import com.oracle.graal.phases.*;
  77 import com.oracle.graal.phases.tiers.*;
  78 import com.oracle.graal.virtual.nodes.*;
  79 
  80 /**
  81  * HSAIL specific backend.
  82  */
  83 public class HSAILHotSpotBackend extends HotSpotBackend {
  84 
  85     public static class Options {
  86 
  87         // @formatter:off
  88         @Option(help = "Number of donor threads for HSAIL kernel dispatch")
  89         static public final OptionValue<Integer> HsailDonorThreads = new OptionValue<>(4);
  90         // @formatter:on
  91     }
  92 
  93     private Map<String, String> paramTypeMap = new HashMap<>();
  94     private final boolean deviceInitialized;
  95     // TODO: get maximum Concurrency from okra
  96     private int maxDeoptIndex = 8 * 40 * 64;   // see gpu_hsail.hpp
  97 
  98     public HSAILHotSpotBackend(HotSpotGraalRuntime runtime, HotSpotProviders providers) {
  99         super(runtime, providers);
 100         paramTypeMap.put("HotSpotResolvedPrimitiveType<int>", "s32");
 101         paramTypeMap.put("HotSpotResolvedPrimitiveType<float>", "f32");
 102         paramTypeMap.put("HotSpotResolvedPrimitiveType<double>", "f64");
 103         paramTypeMap.put("HotSpotResolvedPrimitiveType<long>", "s64");
 104 
 105         /*
 106          * The order of the conjunction below is important: the OkraUtil call may provision the
 107          * native library required by the initialize() call
 108          */
 109         deviceInitialized = OkraUtil.okraLibExists() && initialize();

 352         if (hostCode.getAssumptions() != null) {
 353             for (Assumption assumption : hostCode.getAssumptions().getAssumptions()) {
 354                 if (assumption != null) {
 355                     mergedAssumptions.record(assumption);
 356                 }
 357             }
 358         }
 359         if (hsailCode.getAssumptions() != null) {
 360             for (Assumption assumption : hsailCode.getAssumptions().getAssumptions()) {
 361                 if (assumption != null) {
 362                     mergedAssumptions.record(assumption);
 363                 }
 364             }
 365         }
 366         if (!mergedAssumptions.isEmpty()) {
 367             result.setAssumptions(mergedAssumptions);
 368         }
 369         return result;
 370     }
 371 
 372     private static final ThreadLocal<DonorThreadPool> donorThreadPool = new ThreadLocal<DonorThreadPool>() {
 373         @Override
 374         protected DonorThreadPool initialValue() {
 375             return new DonorThreadPool();
 376         }
 377     };
 378 
 379     public boolean executeKernel(HotSpotInstalledCode kernel, int jobSize, Object[] args) throws InvalidInstalledCodeException {
 380         if (!deviceInitialized) {
 381             throw new GraalInternalError("Cannot execute GPU kernel if device is not initialized");
 382         }
 383         int[] oopMapArray = ((HSAILHotSpotNmethod) kernel).getOopMapArray();
 384 
 385         // Pass donorThreadPoolArray if this kernel uses allocation, otherwise null
 386         Thread[] donorThreadArray = ((HSAILHotSpotNmethod) kernel).getUsesAllocationFlag() ? donorThreadPool.get().getThreads() : null;
 387         return executeKernel0(kernel, jobSize, args, donorThreadArray, HsailAllocBytesPerWorkitem.getValue(), oopMapArray);
 388     }
 389 
 390     private static native boolean executeKernel0(HotSpotInstalledCode kernel, int jobSize, Object[] args, Thread[] donorThreads, int allocBytesPerWorkitem, int[] oopMapArray)
 391                     throws InvalidInstalledCodeException;
 392 
 393     /**
 394      * Use the HSAIL register set when the compilation target is HSAIL.
 395      */
 396     @Override
 397     public FrameMap newFrameMap(RegisterConfig registerConfig) {
 398         return new HSAILFrameMap(getCodeCache(), registerConfig);
 399     }
 400 
 401     @Override
 402     public LIRGeneratorTool newLIRGenerator(CallingConvention cc, LIRGenerationResult lirGenRes) {
 403         return new HSAILHotSpotLIRGenerator(getProviders(), getRuntime().getConfig(), cc, lirGenRes);
 404     }
 405 
 406     @Override
 407     public LIRGenerationResult newLIRGenerationResult(LIR lir, FrameMap frameMap, ResolvedJavaMethod method, Object stub) {
 408         return new HSAILHotSpotLIRGenerationResult(lir, frameMap);
 409     }
 410

 616          * loaded up front but will be loaded as needed.
 617          */
 618         for (int i = 0; i < nonConstantParamCount; i++) {
 619             asm.emitString("ld_kernarg_" + paramHsailSizes[i] + "  " + HSAIL.mapRegister(cc.getArgument(i)) + ", [" + paramNames[i] + "];");
 620         }
 621 
 622         /*
 623          * Emit the workitemaid instruction for loading the hidden gid parameter. This is assigned
 624          * the register as if it were the last of the nonConstant parameters.
 625          */
 626         String workItemReg = "$s" + Integer.toString(asRegister(cc.getArgument(nonConstantParamCount)).encoding());
 627         asm.emitString("workitemabsid_u32 " + workItemReg + ", 0;");
 628 
 629         final String deoptInProgressLabel = "@LHandleDeoptInProgress";
 630 
 631         if (useHSAILDeoptimization) {
 632             // Aliases for d16
 633             RegisterValue d16_deoptInfo = HSAIL.d16.asValue(wordLIRKind);
 634 
 635             // Aliases for d17
 636             RegisterValue d17_donorThreadIndex = HSAIL.d17.asValue(wordLIRKind);
 637             RegisterValue d17_safepointFlagAddrIndex = d17_donorThreadIndex;
 638 
 639             // Aliases for s34
 640             RegisterValue s34_deoptOccurred = HSAIL.s34.asValue(LIRKind.value(Kind.Int));
 641             RegisterValue s34_donorThreadIndex = s34_deoptOccurred;
 642 
 643             asm.emitLoadKernelArg(d16_deoptInfo, asm.getDeoptInfoName(), "u64");
 644             asm.emitComment("// Check if a deopt or safepoint has occurred and abort if true before doing any work");
 645 
 646             if (useHSAILSafepoints) {
 647                 // Load address of _notice_safepoints field
 648                 asm.emitLoad(wordKind, d17_safepointFlagAddrIndex, new HSAILAddressValue(wordLIRKind, d16_deoptInfo, config.hsailNoticeSafepointsOffset).toAddress());
 649                 // Load int value from that field
 650                 asm.emitLoadAcquire(s34_deoptOccurred, new HSAILAddressValue(wordLIRKind, d17_safepointFlagAddrIndex, 0).toAddress());
 651                 asm.emitCompare(Kind.Int, s34_deoptOccurred, Constant.forInt(0), "ne", false, false);
 652                 asm.cbr(deoptInProgressLabel);
 653             }
 654             asm.emitLoadAcquire(s34_deoptOccurred, new HSAILAddressValue(wordLIRKind, d16_deoptInfo, config.hsailDeoptOccurredOffset).toAddress());
 655             asm.emitCompare(Kind.Int, s34_deoptOccurred, Constant.forInt(0), "ne", false, false);
 656             asm.cbr(deoptInProgressLabel);
 657             // load thread register if this kernel performs allocation
 658             if (usesAllocation) {
 659                 RegisterValue threadReg = getProviders().getRegisters().getThreadRegister().asValue(wordLIRKind);
 660                 assert HsailDonorThreads.getValue() > 0;
 661                 asm.emitLoad(wordKind, threadReg, new HSAILAddressValue(wordLIRKind, d16_deoptInfo, config.hsailCurTlabInfoOffset).toAddress());
 662                 if (HsailDonorThreads.getValue() != 1) {
 663                     asm.emitComment("// map workitem to a donor thread");
 664                     asm.emitString(String.format("rem_u32  $%s, %s, %d;", s34_donorThreadIndex.getRegister(), workItemReg, HsailDonorThreads.getValue()));
 665                     asm.emitConvert(d17_donorThreadIndex, s34_donorThreadIndex, wordKind, Kind.Int);
 666                     asm.emit("mad", threadReg, d17_donorThreadIndex, Constant.forInt(8), threadReg);
 667                 } else {
 668                     // workitem is already mapped to solitary donor thread
 669                 }
 670                 asm.emitComment("// $" + getProviders().getRegisters().getThreadRegister() + " will point to holder of tlab thread info for this workitem");
 671             }
 672         }
 673 
 674         /*
 675          * Note the logic used for this spillseg size is to leave space and then go back and patch
 676          * in the correct size once we have generated all the instructions. This should probably be
 677          * done in a more robust way by implementing something like asm.insertString.
 678          */
 679         int spillsegDeclarationPosition = asm.position() + 1;
 680         String spillsegTemplate = "align 4 spill_u8 %spillseg[123456];";
 681         asm.emitString(spillsegTemplate);
 682         // Emit object array load prologue here.
 683         if (isObjectLambda) {
 684             boolean useCompressedOops = config.useCompressedOops;
 685             final int arrayElementsOffset = HotSpotGraalRuntime.getArrayBaseOffset(wordKind);
 686             String iterationObjArgReg = HSAIL.mapRegister(cc.getArgument(nonConstantParamCount - 1));
 687             /*
 688              * iterationObjArgReg will be the highest $d register in use (it is the last parameter)

  68 import com.oracle.graal.lir.hsail.HSAILControlFlow.DeoptimizingOp;
  69 import com.oracle.graal.nodes.*;
  70 import com.oracle.graal.nodes.StructuredGraph.GuardsStage;
  71 import com.oracle.graal.nodes.extended.*;
  72 import com.oracle.graal.nodes.java.*;
  73 import com.oracle.graal.nodes.spi.*;
  74 import com.oracle.graal.nodes.virtual.*;
  75 import com.oracle.graal.options.*;
  76 import com.oracle.graal.phases.*;
  77 import com.oracle.graal.phases.tiers.*;
  78 import com.oracle.graal.virtual.nodes.*;
  79 
  80 /**
  81  * HSAIL specific backend.
  82  */
  83 public class HSAILHotSpotBackend extends HotSpotBackend {
  84 
  85     public static class Options {
  86 
  87         // @formatter:off
  88         @Option(help = "Number of TLABs used for HSAIL kernels which allocate")
  89         static public final OptionValue<Integer> HsailKernelTlabs = new OptionValue<>(4);
  90         // @formatter:on
  91     }
  92 
  93     private Map<String, String> paramTypeMap = new HashMap<>();
  94     private final boolean deviceInitialized;
  95     // TODO: get maximum Concurrency from okra
  96     private int maxDeoptIndex = 8 * 40 * 64;   // see gpu_hsail.hpp
  97 
  98     public HSAILHotSpotBackend(HotSpotGraalRuntime runtime, HotSpotProviders providers) {
  99         super(runtime, providers);
 100         paramTypeMap.put("HotSpotResolvedPrimitiveType<int>", "s32");
 101         paramTypeMap.put("HotSpotResolvedPrimitiveType<float>", "f32");
 102         paramTypeMap.put("HotSpotResolvedPrimitiveType<double>", "f64");
 103         paramTypeMap.put("HotSpotResolvedPrimitiveType<long>", "s64");
 104 
 105         /*
 106          * The order of the conjunction below is important: the OkraUtil call may provision the
 107          * native library required by the initialize() call
 108          */
 109         deviceInitialized = OkraUtil.okraLibExists() && initialize();

 352         if (hostCode.getAssumptions() != null) {
 353             for (Assumption assumption : hostCode.getAssumptions().getAssumptions()) {
 354                 if (assumption != null) {
 355                     mergedAssumptions.record(assumption);
 356                 }
 357             }
 358         }
 359         if (hsailCode.getAssumptions() != null) {
 360             for (Assumption assumption : hsailCode.getAssumptions().getAssumptions()) {
 361                 if (assumption != null) {
 362                     mergedAssumptions.record(assumption);
 363                 }
 364             }
 365         }
 366         if (!mergedAssumptions.isEmpty()) {
 367             result.setAssumptions(mergedAssumptions);
 368         }
 369         return result;
 370     }
 371 







 372     public boolean executeKernel(HotSpotInstalledCode kernel, int jobSize, Object[] args) throws InvalidInstalledCodeException {
 373         if (!deviceInitialized) {
 374             throw new GraalInternalError("Cannot execute GPU kernel if device is not initialized");
 375         }
 376         int[] oopMapArray = ((HSAILHotSpotNmethod) kernel).getOopMapArray();
 377 
 378         // Pass HsailKernelTlabs number if this kernel uses allocation, otherwise 0
 379         int numTlabs = ((HSAILHotSpotNmethod) kernel).getUsesAllocationFlag() ? HsailKernelTlabs.getValue() : 0;
 380         return executeKernel0(kernel, jobSize, args, numTlabs, HsailAllocBytesPerWorkitem.getValue(), oopMapArray);
 381     }
 382 
 383     private static native boolean executeKernel0(HotSpotInstalledCode kernel, int jobSize, Object[] args, int numTlabs, int allocBytesPerWorkitem, int[] oopMapArray)
 384                     throws InvalidInstalledCodeException;
 385 
 386     /**
 387      * Use the HSAIL register set when the compilation target is HSAIL.
 388      */
 389     @Override
 390     public FrameMap newFrameMap(RegisterConfig registerConfig) {
 391         return new HSAILFrameMap(getCodeCache(), registerConfig);
 392     }
 393 
 394     @Override
 395     public LIRGeneratorTool newLIRGenerator(CallingConvention cc, LIRGenerationResult lirGenRes) {
 396         return new HSAILHotSpotLIRGenerator(getProviders(), getRuntime().getConfig(), cc, lirGenRes);
 397     }
 398 
 399     @Override
 400     public LIRGenerationResult newLIRGenerationResult(LIR lir, FrameMap frameMap, ResolvedJavaMethod method, Object stub) {
 401         return new HSAILHotSpotLIRGenerationResult(lir, frameMap);
 402     }
 403

 609          * loaded up front but will be loaded as needed.
 610          */
 611         for (int i = 0; i < nonConstantParamCount; i++) {
 612             asm.emitString("ld_kernarg_" + paramHsailSizes[i] + "  " + HSAIL.mapRegister(cc.getArgument(i)) + ", [" + paramNames[i] + "];");
 613         }
 614 
 615         /*
 616          * Emit the workitemaid instruction for loading the hidden gid parameter. This is assigned
 617          * the register as if it were the last of the nonConstant parameters.
 618          */
 619         String workItemReg = "$s" + Integer.toString(asRegister(cc.getArgument(nonConstantParamCount)).encoding());
 620         asm.emitString("workitemabsid_u32 " + workItemReg + ", 0;");
 621 
 622         final String deoptInProgressLabel = "@LHandleDeoptInProgress";
 623 
 624         if (useHSAILDeoptimization) {
 625             // Aliases for d16
 626             RegisterValue d16_deoptInfo = HSAIL.d16.asValue(wordLIRKind);
 627 
 628             // Aliases for d17
 629             RegisterValue d17_tlabIndex = HSAIL.d17.asValue(wordLIRKind);
 630             RegisterValue d17_safepointFlagAddrIndex = d17_tlabIndex;
 631 
 632             // Aliases for s34
 633             RegisterValue s34_deoptOccurred = HSAIL.s34.asValue(LIRKind.value(Kind.Int));
 634             RegisterValue s34_tlabIndex = s34_deoptOccurred;
 635 
 636             asm.emitLoadKernelArg(d16_deoptInfo, asm.getDeoptInfoName(), "u64");
 637             asm.emitComment("// Check if a deopt or safepoint has occurred and abort if true before doing any work");
 638 
 639             if (useHSAILSafepoints) {
 640                 // Load address of _notice_safepoints field
 641                 asm.emitLoad(wordKind, d17_safepointFlagAddrIndex, new HSAILAddressValue(wordLIRKind, d16_deoptInfo, config.hsailNoticeSafepointsOffset).toAddress());
 642                 // Load int value from that field
 643                 asm.emitLoadAcquire(s34_deoptOccurred, new HSAILAddressValue(wordLIRKind, d17_safepointFlagAddrIndex, 0).toAddress());
 644                 asm.emitCompare(Kind.Int, s34_deoptOccurred, Constant.forInt(0), "ne", false, false);
 645                 asm.cbr(deoptInProgressLabel);
 646             }
 647             asm.emitLoadAcquire(s34_deoptOccurred, new HSAILAddressValue(wordLIRKind, d16_deoptInfo, config.hsailDeoptOccurredOffset).toAddress());
 648             asm.emitCompare(Kind.Int, s34_deoptOccurred, Constant.forInt(0), "ne", false, false);
 649             asm.cbr(deoptInProgressLabel);
 650             // load thread register if this kernel performs allocation
 651             if (usesAllocation) {
 652                 RegisterValue threadReg = getProviders().getRegisters().getThreadRegister().asValue(wordLIRKind);
 653                 assert HsailKernelTlabs.getValue() > 0;
 654                 asm.emitLoad(wordKind, threadReg, new HSAILAddressValue(wordLIRKind, d16_deoptInfo, config.hsailCurTlabInfoOffset).toAddress());
 655                 if (HsailKernelTlabs.getValue() != 1) {
 656                     asm.emitComment("// map workitem to a tlab");
 657                     asm.emitString(String.format("rem_u32  $%s, %s, %d;", s34_tlabIndex.getRegister(), workItemReg, HsailKernelTlabs.getValue()));
 658                     asm.emitConvert(d17_tlabIndex, s34_tlabIndex, wordKind, Kind.Int);
 659                     asm.emit("mad", threadReg, d17_tlabIndex, Constant.forInt(8), threadReg);
 660                 } else {
 661                     // workitem is already mapped to solitary tlab
 662                 }
 663                 asm.emitComment("// $" + getProviders().getRegisters().getThreadRegister() + " will point to holder of tlab thread info for this workitem");
 664             }
 665         }
 666 
 667         /*
 668          * Note the logic used for this spillseg size is to leave space and then go back and patch
 669          * in the correct size once we have generated all the instructions. This should probably be
 670          * done in a more robust way by implementing something like asm.insertString.
 671          */
 672         int spillsegDeclarationPosition = asm.position() + 1;
 673         String spillsegTemplate = "align 4 spill_u8 %spillseg[123456];";
 674         asm.emitString(spillsegTemplate);
 675         // Emit object array load prologue here.
 676         if (isObjectLambda) {
 677             boolean useCompressedOops = config.useCompressedOops;
 678             final int arrayElementsOffset = HotSpotGraalRuntime.getArrayBaseOffset(wordKind);
 679             String iterationObjArgReg = HSAIL.mapRegister(cc.getArgument(nonConstantParamCount - 1));
 680             /*
 681              * iterationObjArgReg will be the highest $d register in use (it is the last parameter)