1 /*
   2  * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 package com.oracle.graal.hotspot.hsail;
  24 
  25 import static com.oracle.graal.api.code.CallingConvention.Type.*;
  26 import static com.oracle.graal.api.code.CodeUtil.*;
  27 import static com.oracle.graal.api.code.ValueUtil.*;
  28 import static com.oracle.graal.api.meta.LocationIdentity.*;
  29 import static com.oracle.graal.compiler.GraalCompiler.*;
  30 import static com.oracle.graal.hotspot.hsail.HSAILHotSpotBackend.Options.*;
  31 import static com.oracle.graal.hotspot.hsail.replacements.HSAILNewObjectSnippets.Options.*;
  32 
  33 import java.lang.reflect.*;
  34 import java.util.*;
  35 import java.util.Map.Entry;
  36 import java.util.function.*;
  37 import java.util.stream.*;
  38 
  39 import com.amd.okra.*;
  40 import com.oracle.graal.api.code.*;
  41 import com.oracle.graal.api.code.Assumptions.Assumption;
  42 import com.oracle.graal.api.code.CallingConvention.Type;
  43 import com.oracle.graal.api.code.CompilationResult.Call;
  44 import com.oracle.graal.api.code.CompilationResult.CodeAnnotation;
  45 import com.oracle.graal.api.code.CompilationResult.DataPatch;
  46 import com.oracle.graal.api.code.CompilationResult.ExceptionHandler;
  47 import com.oracle.graal.api.code.CompilationResult.Infopoint;
  48 import com.oracle.graal.api.code.CompilationResult.Mark;
  49 import com.oracle.graal.api.meta.*;
  50 import com.oracle.graal.asm.*;
  51 import com.oracle.graal.asm.hsail.*;
  52 import com.oracle.graal.compiler.common.*;
  53 import com.oracle.graal.compiler.common.cfg.*;
  54 import com.oracle.graal.compiler.common.type.*;
  55 import com.oracle.graal.debug.*;
  56 import com.oracle.graal.debug.Debug.Scope;
  57 import com.oracle.graal.gpu.*;
  58 import com.oracle.graal.hotspot.*;
  59 import com.oracle.graal.hotspot.bridge.CompilerToVM.CodeInstallResult;
  60 import com.oracle.graal.hotspot.meta.*;
  61 import com.oracle.graal.hotspot.nodes.*;
  62 import com.oracle.graal.hsail.*;
  63 import com.oracle.graal.java.*;
  64 import com.oracle.graal.lir.*;
  65 import com.oracle.graal.lir.asm.*;
  66 import com.oracle.graal.lir.gen.*;
  67 import com.oracle.graal.lir.hsail.*;
  68 import com.oracle.graal.lir.hsail.HSAILControlFlow.DeoptimizingOp;
  69 import com.oracle.graal.nodes.*;
  70 import com.oracle.graal.nodes.StructuredGraph.GuardsStage;
  71 import com.oracle.graal.nodes.extended.*;
  72 import com.oracle.graal.nodes.java.*;
  73 import com.oracle.graal.nodes.spi.*;
  74 import com.oracle.graal.nodes.virtual.*;
  75 import com.oracle.graal.options.*;
  76 import com.oracle.graal.phases.*;
  77 import com.oracle.graal.phases.tiers.*;
  78 import com.oracle.graal.virtual.nodes.*;
  79 
  80 /**
  81  * HSAIL specific backend.
  82  */
  83 public class HSAILHotSpotBackend extends HotSpotBackend {
  84 
  85     public static class Options {
  86 
  87         // @formatter:off
  88         @Option(help = "Number of donor threads for HSAIL kernel dispatch")
  89         static public final OptionValue<Integer> HsailDonorThreads = new OptionValue<>(4);
  90         // @formatter:on
  91     }
  92 
  93     private Map<String, String> paramTypeMap = new HashMap<>();
  94     private final boolean deviceInitialized;
  95     // TODO: get maximum Concurrency from okra
  96     private int maxDeoptIndex = 8 * 40 * 64;   // see gpu_hsail.hpp
  97 
  98     public HSAILHotSpotBackend(HotSpotGraalRuntime runtime, HotSpotProviders providers) {
  99         super(runtime, providers);
 100         paramTypeMap.put("HotSpotResolvedPrimitiveType<int>", "s32");
 101         paramTypeMap.put("HotSpotResolvedPrimitiveType<float>", "f32");
 102         paramTypeMap.put("HotSpotResolvedPrimitiveType<double>", "f64");
 103         paramTypeMap.put("HotSpotResolvedPrimitiveType<long>", "s64");
 104 
 105         /*
 106          * The order of the conjunction below is important: the OkraUtil call may provision the
 107          * native library required by the initialize() call
 108          */
 109         deviceInitialized = OkraUtil.okraLibExists() && initialize();
 110     }
 111 
 112     @Override
 113     public boolean shouldAllocateRegisters() {
 114         return true;
 115     }
 116 
 117     /**
 118      * Initializes the GPU device.
 119      *
 120      * @return whether or not initialization was successful
 121      */
 122     private static native boolean initialize();
 123 
 124     /**
 125      * Determines if the GPU device (or simulator) is available and initialized.
 126      */
 127     public boolean isDeviceInitialized() {
 128         return deviceInitialized;
 129     }
 130 
 131     /**
 132      * Completes the initialization of the HSAIL backend. This includes initializing the providers
 133      * and registering any method substitutions specified by the HSAIL backend.
 134      */
 135     @Override
 136     public void completeInitialization() {
 137         final HotSpotProviders providers = getProviders();
 138         HotSpotVMConfig config = getRuntime().getConfig();
 139         // Initialize the lowering provider.
 140         final HotSpotLoweringProvider lowerer = (HotSpotLoweringProvider) providers.getLowerer();
 141         lowerer.initialize(providers, config);
 142 
 143         // Register the replacements used by the HSAIL backend.
 144         HSAILHotSpotReplacementsImpl replacements = (HSAILHotSpotReplacementsImpl) providers.getReplacements();
 145         replacements.completeInitialization();
 146     }
 147 
 148     /**
 149      * Compiles and installs a given method to a GPU binary.
 150      */
 151     public HotSpotNmethod compileAndInstallKernel(Method method) {
 152         ResolvedJavaMethod javaMethod = getProviders().getMetaAccess().lookupJavaMethod(method);
 153         HotSpotNmethod nm = installKernel(javaMethod, compileKernel(javaMethod, true));
 154         try (Scope s = Debug.scope("HostCodeGen")) {
 155             if (Debug.isLogEnabled()) {
 156                 DisassemblerProvider dis = getRuntime().getHostBackend().getDisassembler();
 157                 if (dis != null) {
 158                     String disasm = dis.disassemble(nm);
 159                     Debug.log("host code generated for %s%n%s", javaMethod, disasm);
 160                 } else {
 161                     Debug.log("host code disassembler is null");
 162                 }
 163             }
 164         } catch (Throwable e) {
 165             throw Debug.handle(e);
 166         }
 167         return nm;
 168     }
 169 
 170     /**
 171      * Compiles a given method to HSAIL code.
 172      *
 173      * @param makeBinary specifies whether a GPU binary should also be generated for the HSAIL code.
 174      *            If true, the returned value is guaranteed to have a non-zero
 175      *            {@linkplain ExternalCompilationResult#getEntryPoint() entry point}.
 176      * @return the HSAIL code compiled from {@code method}'s bytecode
 177      */
 178     public ExternalCompilationResult compileKernel(ResolvedJavaMethod method, boolean makeBinary) {
 179         StructuredGraph graph = new StructuredGraph(method);
 180         HotSpotProviders providers = getProviders();
 181         MetaAccessProvider metaAccess = getProviders().getMetaAccess();
 182 
 183         // changed this from default to help us generate deopts when needed
 184         OptimisticOptimizations optimisticOpts = OptimisticOptimizations.ALL;
 185         optimisticOpts.remove(OptimisticOptimizations.Optimization.UseExceptionProbabilityForOperations);
 186         new GraphBuilderPhase.Instance(metaAccess, GraphBuilderConfiguration.getSnippetDefault(), optimisticOpts).apply(graph);
 187         PhaseSuite<HighTierContext> graphBuilderSuite = providers.getSuites().getDefaultGraphBuilderSuite();
 188         CallingConvention cc = getCallingConvention(providers.getCodeCache(), Type.JavaCallee, graph.method(), false);
 189 
 190         // append special HSAILNonNullParametersPhase
 191         int numArgs = cc.getArguments().length;
 192         graphBuilderSuite.appendPhase(new HSAILNonNullParametersPhase(numArgs));
 193 
 194         Suites suites = providers.getSuites().getDefaultSuites();
 195         ExternalCompilationResult hsailCode = compileGraph(graph, null, cc, method, providers, this, this.getTarget(), null, graphBuilderSuite, optimisticOpts, getProfilingInfo(graph), null, suites,
 196                         new ExternalCompilationResult(), CompilationResultBuilderFactory.Default);
 197 
 198         // this code added to dump infopoints
 199         try (Scope s = Debug.scope("CodeGen")) {
 200             if (Debug.isLogEnabled()) {
 201                 // show infopoints
 202                 List<Infopoint> infoList = hsailCode.getInfopoints();
 203                 Debug.log("%d HSAIL infopoints", infoList.size());
 204                 for (Infopoint info : infoList) {
 205                     Debug.log(info.toString());
 206                     Debug.log(info.debugInfo.frame().toString());
 207                 }
 208             }
 209         } catch (Throwable e) {
 210             throw Debug.handle(e);
 211         }
 212 
 213         if (makeBinary) {
 214             if (!deviceInitialized) {
 215                 throw new GraalInternalError("Cannot generate GPU kernel if device is not initialized");
 216             }
 217             try (Scope ds = Debug.scope("GeneratingKernelBinary")) {
 218                 long kernel = generateKernel(hsailCode.getTargetCode(), method.getName());
 219                 if (kernel == 0) {
 220                     throw new GraalInternalError("Failed to compile HSAIL kernel");
 221                 }
 222                 hsailCode.setEntryPoint(kernel);
 223             } catch (Throwable e) {
 224                 throw Debug.handle(e);
 225             }
 226         }
 227         return hsailCode;
 228     }
 229 
 230     private static class HSAILNonNullParametersPhase extends Phase {
 231         // we use this to limit the stamping to exclude the final argument in an obj stream method
 232         private int numArgs;
 233 
 234         public HSAILNonNullParametersPhase(int numArgs) {
 235             this.numArgs = numArgs;
 236         }
 237 
 238         @Override
 239         protected void run(StructuredGraph graph) {
 240             int argCount = 0;
 241             Stamp nonNull = StampFactory.objectNonNull();
 242             for (ParameterNode param : graph.getNodes(ParameterNode.class)) {
 243                 argCount++;
 244                 if (argCount < numArgs && param.stamp() instanceof ObjectStamp) {
 245                     ObjectStamp paramStamp = (ObjectStamp) param.stamp();
 246                     param.setStamp(paramStamp.join(nonNull));
 247                 }
 248             }
 249         }
 250     }
 251 
 252     /**
 253      * Generates a GPU binary from HSAIL code.
 254      */
 255     static native long generateKernel(byte[] hsailCode, String name);
 256 
 257     /**
 258      * Installs the {@linkplain ExternalCompilationResult#getEntryPoint() GPU binary} associated
 259      * with some given HSAIL code in the code cache and returns a {@link HotSpotNmethod} handle to
 260      * the installed code.
 261      *
 262      * @param hsailCode HSAIL compilation result for which a GPU binary has been generated
 263      * @return a handle to the binary as installed in the HotSpot code cache
 264      */
 265     public final HotSpotNmethod installKernel(ResolvedJavaMethod method, ExternalCompilationResult hsailCode) {
 266         assert hsailCode.getEntryPoint() != 0L;
 267         // Code here based on HotSpotCodeCacheProvider.addExternalMethod().
 268         HotSpotResolvedJavaMethod javaMethod = (HotSpotResolvedJavaMethod) method;
 269         if (hsailCode.getId() == -1) {
 270             hsailCode.setId(javaMethod.allocateCompileId(hsailCode.getEntryBCI()));
 271         }
 272         CompilationResult compilationResult = hsailCode;
 273         StructuredGraph hostGraph = hsailCode.getHostGraph();
 274         if (hostGraph != null) {
 275             // TODO get rid of the unverified entry point in the host code
 276             try (Scope ds = Debug.scope("GeneratingHostGraph", new DebugDumpScope("HostGraph"))) {
 277                 HotSpotBackend hostBackend = getRuntime().getHostBackend();
 278                 JavaType[] parameterTypes = new JavaType[hostGraph.getNodes(ParameterNode.class).count()];
 279                 Debug.log("Param count: %d", parameterTypes.length);
 280                 for (int i = 0; i < parameterTypes.length; i++) {
 281                     ParameterNode parameter = hostGraph.getParameter(i);
 282                     Debug.log("Param [%d]=%s", i, parameter);
 283                     parameterTypes[i] = parameter.stamp().javaType(hostBackend.getProviders().getMetaAccess());
 284                     Debug.log(" %s", parameterTypes[i]);
 285                 }
 286                 CallingConvention cc = hostBackend.getProviders().getCodeCache().getRegisterConfig().getCallingConvention(Type.JavaCallee, method.getSignature().getReturnType(null), parameterTypes,
 287                                 hostBackend.getTarget(), false);
 288                 CompilationResult hostCode = compileGraph(hostGraph, null, cc, method, hostBackend.getProviders(), hostBackend, this.getTarget(), null,
 289                                 hostBackend.getProviders().getSuites().getDefaultGraphBuilderSuite(), OptimisticOptimizations.NONE, null, null,
 290                                 hostBackend.getProviders().getSuites().getDefaultSuites(), new CompilationResult(), CompilationResultBuilderFactory.Default);
 291                 compilationResult = merge(hostCode, hsailCode);
 292             } catch (Throwable e) {
 293                 throw Debug.handle(e);
 294             }
 295         }
 296 
 297         HSAILHotSpotNmethod code = new HSAILHotSpotNmethod(javaMethod, hsailCode.getName(), false, true);
 298         code.setOopMapArray(hsailCode.getOopMapArray());
 299         code.setUsesAllocationFlag(hsailCode.getUsesAllocationFlag());
 300         HotSpotCompiledNmethod compiled = new HotSpotCompiledNmethod(getTarget(), javaMethod, compilationResult);
 301         CodeInstallResult result = getRuntime().getCompilerToVM().installCode(compiled, code, null);
 302         if (result != CodeInstallResult.OK) {
 303             return null;
 304         }
 305         return code;
 306     }
 307 
 308     private static ExternalCompilationResult merge(CompilationResult hostCode, ExternalCompilationResult hsailCode) {
 309         ExternalCompilationResult result = new ExternalCompilationResult();
 310 
 311         // from hsail code
 312         result.setEntryPoint(hsailCode.getEntryPoint());
 313         result.setId(hsailCode.getId());
 314         result.setEntryBCI(hsailCode.getEntryBCI());
 315         assert hsailCode.getMarks().isEmpty();
 316         assert hsailCode.getExceptionHandlers().isEmpty();
 317         assert hsailCode.getDataReferences().isEmpty();
 318 
 319         // from host code
 320         result.setTotalFrameSize(hostCode.getTotalFrameSize());
 321         result.setCustomStackAreaOffset(hostCode.getCustomStackAreaOffset());
 322         result.setTargetCode(hostCode.getTargetCode(), hostCode.getTargetCodeSize());
 323         for (CodeAnnotation annotation : hostCode.getAnnotations()) {
 324             result.addAnnotation(annotation);
 325         }
 326         for (Mark mark : hostCode.getMarks()) {
 327             result.recordMark(mark.pcOffset, mark.id);
 328         }
 329         for (ExceptionHandler handler : hostCode.getExceptionHandlers()) {
 330             result.recordExceptionHandler(handler.pcOffset, handler.handlerPos);
 331         }
 332         for (DataPatch patch : hostCode.getDataReferences()) {
 333             if (patch.data != null) {
 334                 if (patch.inline) {
 335                     result.recordInlineData(patch.pcOffset, patch.data);
 336                 } else {
 337                     result.recordDataReference(patch.pcOffset, patch.data);
 338                 }
 339             }
 340         }
 341         for (Infopoint infopoint : hostCode.getInfopoints()) {
 342             if (infopoint instanceof Call) {
 343                 Call call = (Call) infopoint;
 344                 result.recordCall(call.pcOffset, call.size, call.target, call.debugInfo, call.direct);
 345             } else {
 346                 result.recordInfopoint(infopoint.pcOffset, infopoint.debugInfo, infopoint.reason);
 347             }
 348         }
 349 
 350         // merged
 351         Assumptions mergedAssumptions = new Assumptions(true);
 352         if (hostCode.getAssumptions() != null) {
 353             for (Assumption assumption : hostCode.getAssumptions().getAssumptions()) {
 354                 if (assumption != null) {
 355                     mergedAssumptions.record(assumption);
 356                 }
 357             }
 358         }
 359         if (hsailCode.getAssumptions() != null) {
 360             for (Assumption assumption : hsailCode.getAssumptions().getAssumptions()) {
 361                 if (assumption != null) {
 362                     mergedAssumptions.record(assumption);
 363                 }
 364             }
 365         }
 366         if (!mergedAssumptions.isEmpty()) {
 367             result.setAssumptions(mergedAssumptions);
 368         }
 369         return result;
 370     }
 371 
 372     private static final ThreadLocal<DonorThreadPool> donorThreadPool = new ThreadLocal<DonorThreadPool>() {
 373         @Override
 374         protected DonorThreadPool initialValue() {
 375             return new DonorThreadPool();
 376         }
 377     };
 378 
 379     public boolean executeKernel(HotSpotInstalledCode kernel, int jobSize, Object[] args) throws InvalidInstalledCodeException {
 380         if (!deviceInitialized) {
 381             throw new GraalInternalError("Cannot execute GPU kernel if device is not initialized");
 382         }
 383         int[] oopMapArray = ((HSAILHotSpotNmethod) kernel).getOopMapArray();
 384 
 385         // Pass donorThreadPoolArray if this kernel uses allocation, otherwise null
 386         Thread[] donorThreadArray = ((HSAILHotSpotNmethod) kernel).getUsesAllocationFlag() ? donorThreadPool.get().getThreads() : null;
 387         return executeKernel0(kernel, jobSize, args, donorThreadArray, HsailAllocBytesPerWorkitem.getValue(), oopMapArray);
 388     }
 389 
 390     private static native boolean executeKernel0(HotSpotInstalledCode kernel, int jobSize, Object[] args, Thread[] donorThreads, int allocBytesPerWorkitem, int[] oopMapArray)
 391                     throws InvalidInstalledCodeException;
 392 
 393     /**
 394      * Use the HSAIL register set when the compilation target is HSAIL.
 395      */
 396     @Override
 397     public FrameMap newFrameMap(RegisterConfig registerConfig) {
 398         return new HSAILFrameMap(getCodeCache(), registerConfig);
 399     }
 400 
 401     @Override
 402     public LIRGeneratorTool newLIRGenerator(CallingConvention cc, LIRGenerationResult lirGenRes) {
 403         return new HSAILHotSpotLIRGenerator(getProviders(), getRuntime().getConfig(), cc, lirGenRes);
 404     }
 405 
 406     @Override
 407     public LIRGenerationResult newLIRGenerationResult(LIR lir, FrameMap frameMap, ResolvedJavaMethod method, Object stub) {
 408         return new HSAILHotSpotLIRGenerationResult(lir, frameMap);
 409     }
 410 
 411     @Override
 412     public NodeLIRBuilderTool newNodeLIRBuilder(StructuredGraph graph, LIRGeneratorTool lirGen) {
 413         return new HSAILHotSpotNodeLIRBuilder(graph, lirGen);
 414     }
 415 
 416     class HotSpotFrameContext implements FrameContext {
 417 
 418         public boolean hasFrame() {
 419             return true;
 420         }
 421 
 422         @Override
 423         public void enter(CompilationResultBuilder crb) {
 424             Debug.log("Nothing to do here");
 425         }
 426 
 427         @Override
 428         public void leave(CompilationResultBuilder crb) {
 429             Debug.log("Nothing to do here");
 430         }
 431     }
 432 
 433     /**
 434      * a class to allow us to save lirGen.
 435      */
 436     static class HSAILCompilationResultBuilder extends CompilationResultBuilder {
 437         public HSAILHotSpotLIRGenerationResult lirGenRes;
 438 
 439         public HSAILCompilationResultBuilder(CodeCacheProvider codeCache, ForeignCallsProvider foreignCalls, FrameMap frameMap, Assembler asm, FrameContext frameContext,
 440                         CompilationResult compilationResult, HSAILHotSpotLIRGenerationResult lirGenRes) {
 441             super(codeCache, foreignCalls, frameMap, asm, frameContext, compilationResult);
 442             this.lirGenRes = lirGenRes;
 443         }
 444     }
 445 
 446     static class HSAILHotSpotNmethod extends HotSpotNmethod {
 447         private int[] oopMapArray;
 448         private boolean usesAllocation;
 449 
 450         HSAILHotSpotNmethod(HotSpotResolvedJavaMethod method, String name, boolean isDefault, boolean isExternal) {
 451             super(method, name, isDefault, isExternal);
 452         }
 453 
 454         void setOopMapArray(int[] array) {
 455             oopMapArray = array;
 456         }
 457 
 458         int[] getOopMapArray() {
 459             return oopMapArray;
 460         }
 461 
 462         public void setUsesAllocationFlag(boolean val) {
 463             usesAllocation = val;
 464         }
 465 
 466         public boolean getUsesAllocationFlag() {
 467             return usesAllocation;
 468         }
 469     }
 470 
 471     @Override
 472     protected Assembler createAssembler(FrameMap frameMap) {
 473         return new HSAILHotSpotAssembler(getTarget());
 474     }
 475 
 476     @Override
 477     public CompilationResultBuilder newCompilationResultBuilder(LIRGenerationResult lirGenRes, CompilationResult compilationResult, CompilationResultBuilderFactory factory) {
 478         FrameMap frameMap = lirGenRes.getFrameMap();
 479         Assembler masm = createAssembler(frameMap);
 480         HotSpotFrameContext frameContext = new HotSpotFrameContext();
 481         // save lirGen for later use by setHostGraph
 482         CompilationResultBuilder crb = new HSAILCompilationResultBuilder(getCodeCache(), getForeignCalls(), frameMap, masm, frameContext, compilationResult,
 483                         (HSAILHotSpotLIRGenerationResult) lirGenRes);
 484         crb.setTotalFrameSize(frameMap.totalFrameSize());
 485         return crb;
 486     }
 487 
 488     @Override
 489     public void emitCode(CompilationResultBuilder crb, LIR lir, ResolvedJavaMethod method) {
 490         assert method != null : lir + " is not associated with a method";
 491         Kind wordKind = getProviders().getCodeCache().getTarget().wordKind;
 492         LIRKind wordLIRKind = LIRKind.value(wordKind);
 493 
 494         HotSpotVMConfig config = getRuntime().getConfig();
 495         boolean useHSAILDeoptimization = config.useHSAILDeoptimization;
 496         boolean useHSAILSafepoints = config.useHSAILSafepoints;
 497 
 498         if ((useHSAILSafepoints == true) && (useHSAILDeoptimization == false)) {
 499             Debug.log("+UseHSAILSafepoints requires +UseHSAILDeoptimization");
 500         }
 501 
 502         /*
 503          * See what graph nodes we have to see if we are using the thread register. If not, we don't
 504          * have to emit the code that sets it up. Maybe there is a better way to do this?
 505          */
 506         boolean usesAllocation = false;
 507         search: for (AbstractBlock<?> b : lir.linearScanOrder()) {
 508             for (LIRInstruction op : lir.getLIRforBlock(b)) {
 509                 if ((op instanceof HSAILMove.LoadOp) && ((HSAILMove.LoadOp) op).usesThreadRegister()) {
 510                     usesAllocation = true;
 511                     assert useHSAILDeoptimization : "cannot use thread register if HSAIL deopt support is disabled";
 512                     break search;
 513                 }
 514             }
 515         }
 516         // save usesAllocation flag in ExternalCompilationResult
 517         ((ExternalCompilationResult) crb.compilationResult).setUsesAllocationFlag(usesAllocation);
 518 
 519         // Emit the prologue.
 520         HSAILAssembler asm = (HSAILAssembler) crb.asm;
 521         asm.emitString0("version 0:95: $full : $large;\n");
 522 
 523         Signature signature = method.getSignature();
 524         int sigParamCount = signature.getParameterCount(false);
 525         // We're subtracting 1 because we're not making the final gid as a parameter.
 526 
 527         int nonConstantParamCount = sigParamCount - 1;
 528         boolean isStatic = (method.isStatic());
 529         // Determine if this is an object lambda.
 530         boolean isObjectLambda = true;
 531 
 532         if (signature.getParameterType(nonConstantParamCount, null).getKind() == Kind.Int) {
 533             isObjectLambda = false;
 534         } else {
 535             // Add space for gid int reg.
 536             nonConstantParamCount++;
 537         }
 538 
 539         // If this is an instance method, include the "this" parameter
 540         if (!isStatic) {
 541             nonConstantParamCount++;
 542         }
 543         // Add in any "constant" parameters (currently none).
 544         int totalParamCount = nonConstantParamCount;
 545         JavaType[] paramtypes = new JavaType[totalParamCount];
 546         String[] paramNames = new String[totalParamCount];
 547         int pidx = 0;
 548         MetaAccessProvider metaAccess = getProviders().getMetaAccess();
 549         for (int i = 0; i < totalParamCount; i++) {
 550             if (i == 0 && !isStatic) {
 551                 paramtypes[i] = metaAccess.lookupJavaType(Object.class);
 552                 paramNames[i] = "%_this";
 553             } else if (i < nonConstantParamCount) {
 554                 if (isObjectLambda && (i == (nonConstantParamCount))) {
 555                     // Set up the gid register mapping.
 556                     paramtypes[i] = metaAccess.lookupJavaType(int.class);
 557                     paramNames[i] = "%_gid";
 558                 } else {
 559                     paramtypes[i] = signature.getParameterType(pidx++, null);
 560                     paramNames[i] = "%_arg" + i;
 561                 }
 562             }
 563         }
 564 
 565         asm.emitString0("// " + (isStatic ? "static" : "instance") + " method " + method + "\n");
 566         asm.emitString0("kernel &run ( \n");
 567 
 568         FrameMap frameMap = crb.frameMap;
 569         RegisterConfig regConfig = frameMap.registerConfig;
 570         // Build list of param types which does include the gid (for cc register mapping query).
 571         JavaType[] ccParamTypes = new JavaType[nonConstantParamCount + 1];
 572         // Include the gid.
 573         System.arraycopy(paramtypes, 0, ccParamTypes, 0, nonConstantParamCount);
 574 
 575         /*
 576          * Last entry is always int (its register gets used in the workitemabsid instruction). This
 577          * is true even for object stream lambdas.
 578          */
 579         if (sigParamCount > 0) {
 580             ccParamTypes[ccParamTypes.length - 1] = metaAccess.lookupJavaType(int.class);
 581         }
 582         CallingConvention cc = regConfig.getCallingConvention(JavaCallee, null, ccParamTypes, getTarget(), false);
 583 
 584         /**
 585          * Compute the hsail size mappings up to but not including the last non-constant parameter
 586          * (which is the gid).
 587          *
 588          */
 589         String[] paramHsailSizes = new String[totalParamCount];
 590         for (int i = 0; i < totalParamCount; i++) {
 591             String paramtypeStr = paramtypes[i].toString();
 592             String sizeStr = paramTypeMap.get(paramtypeStr);
 593             // Catch all for any unmapped paramtype that is u64 (address of an object).
 594             paramHsailSizes[i] = (sizeStr != null ? sizeStr : "u64");
 595         }
 596         // Emit the kernel function parameters.
 597         for (int i = 0; i < totalParamCount; i++) {
 598             String str = "align 8 kernarg_" + paramHsailSizes[i] + " " + paramNames[i];
 599 
 600             if (useHSAILDeoptimization || (i != totalParamCount - 1)) {
 601                 str += ",";
 602             }
 603             asm.emitString(str);
 604         }
 605 
 606         if (useHSAILDeoptimization) {
 607             // add in the deoptInfo parameter
 608             asm.emitString("kernarg_u64 " + asm.getDeoptInfoName());
 609         }
 610 
 611         asm.emitString(") {");
 612 
 613         /*
 614          * End of parameters start of prolog code. Emit the load instructions for loading of the
 615          * kernel non-constant parameters into registers. The constant class parameters will not be
 616          * loaded up front but will be loaded as needed.
 617          */
 618         for (int i = 0; i < nonConstantParamCount; i++) {
 619             asm.emitString("ld_kernarg_" + paramHsailSizes[i] + "  " + HSAIL.mapRegister(cc.getArgument(i)) + ", [" + paramNames[i] + "];");
 620         }
 621 
 622         /*
 623          * Emit the workitemaid instruction for loading the hidden gid parameter. This is assigned
 624          * the register as if it were the last of the nonConstant parameters.
 625          */
 626         String workItemReg = "$s" + Integer.toString(asRegister(cc.getArgument(nonConstantParamCount)).encoding());
 627         asm.emitString("workitemabsid_u32 " + workItemReg + ", 0;");
 628 
 629         final String deoptInProgressLabel = "@LHandleDeoptInProgress";
 630 
 631         if (useHSAILDeoptimization) {
 632             // Aliases for d16
 633             RegisterValue d16_deoptInfo = HSAIL.d16.asValue(wordLIRKind);
 634 
 635             // Aliases for d17
 636             RegisterValue d17_donorThreadIndex = HSAIL.d17.asValue(wordLIRKind);
 637             RegisterValue d17_safepointFlagAddrIndex = d17_donorThreadIndex;
 638 
 639             // Aliases for s34
 640             RegisterValue s34_deoptOccurred = HSAIL.s34.asValue(LIRKind.value(Kind.Int));
 641             RegisterValue s34_donorThreadIndex = s34_deoptOccurred;
 642 
 643             asm.emitLoadKernelArg(d16_deoptInfo, asm.getDeoptInfoName(), "u64");
 644             asm.emitComment("// Check if a deopt or safepoint has occurred and abort if true before doing any work");
 645 
 646             if (useHSAILSafepoints) {
 647                 // Load address of _notice_safepoints field
 648                 asm.emitLoad(wordKind, d17_safepointFlagAddrIndex, new HSAILAddressValue(wordLIRKind, d16_deoptInfo, config.hsailNoticeSafepointsOffset).toAddress());
 649                 // Load int value from that field
 650                 asm.emitLoadAcquire(s34_deoptOccurred, new HSAILAddressValue(wordLIRKind, d17_safepointFlagAddrIndex, 0).toAddress());
 651                 asm.emitCompare(Kind.Int, s34_deoptOccurred, Constant.forInt(0), "ne", false, false);
 652                 asm.cbr(deoptInProgressLabel);
 653             }
 654             asm.emitLoadAcquire(s34_deoptOccurred, new HSAILAddressValue(wordLIRKind, d16_deoptInfo, config.hsailDeoptOccurredOffset).toAddress());
 655             asm.emitCompare(Kind.Int, s34_deoptOccurred, Constant.forInt(0), "ne", false, false);
 656             asm.cbr(deoptInProgressLabel);
 657             // load thread register if this kernel performs allocation
 658             if (usesAllocation) {
 659                 RegisterValue threadReg = getProviders().getRegisters().getThreadRegister().asValue(wordLIRKind);
 660                 assert HsailDonorThreads.getValue() > 0;
 661                 asm.emitLoad(wordKind, threadReg, new HSAILAddressValue(wordLIRKind, d16_deoptInfo, config.hsailCurTlabInfoOffset).toAddress());
 662                 if (HsailDonorThreads.getValue() != 1) {
 663                     asm.emitComment("// map workitem to a donor thread");
 664                     asm.emitString(String.format("rem_u32  $%s, %s, %d;", s34_donorThreadIndex.getRegister(), workItemReg, HsailDonorThreads.getValue()));
 665                     asm.emitConvert(d17_donorThreadIndex, s34_donorThreadIndex, wordKind, Kind.Int);
 666                     asm.emit("mad", threadReg, d17_donorThreadIndex, Constant.forInt(8), threadReg);
 667                 } else {
 668                     // workitem is already mapped to solitary donor thread
 669                 }
 670                 asm.emitComment("// $" + getProviders().getRegisters().getThreadRegister() + " will point to holder of tlab thread info for this workitem");
 671             }
 672         }
 673 
 674         /*
 675          * Note the logic used for this spillseg size is to leave space and then go back and patch
 676          * in the correct size once we have generated all the instructions. This should probably be
 677          * done in a more robust way by implementing something like asm.insertString.
 678          */
 679         int spillsegDeclarationPosition = asm.position() + 1;
 680         String spillsegTemplate = "align 4 spill_u8 %spillseg[123456];";
 681         asm.emitString(spillsegTemplate);
 682         // Emit object array load prologue here.
 683         if (isObjectLambda) {
 684             boolean useCompressedOops = config.useCompressedOops;
 685             final int arrayElementsOffset = HotSpotGraalRuntime.getArrayBaseOffset(wordKind);
 686             String iterationObjArgReg = HSAIL.mapRegister(cc.getArgument(nonConstantParamCount - 1));
 687             /*
 688              * iterationObjArgReg will be the highest $d register in use (it is the last parameter)
 689              * so tempReg can be the next higher $d register
 690              */
 691             String tmpReg = "$d" + (asRegister(cc.getArgument(nonConstantParamCount - 1)).encoding() + 1);
 692             // Convert gid to long.
 693             asm.emitString("cvt_u64_s32 " + tmpReg + ", " + workItemReg + "; // Convert gid to long");
 694             // Adjust index for sizeof ref. Where to pull this size from?
 695             asm.emitString("mul_u64 " + tmpReg + ", " + tmpReg + ", " + (useCompressedOops ? 4 : 8) + "; // Adjust index for sizeof ref");
 696             // Adjust for actual data start.
 697             asm.emitString("add_u64 " + tmpReg + ", " + tmpReg + ", " + arrayElementsOffset + "; // Adjust for actual elements data start");
 698             // Add to array ref ptr.
 699             asm.emitString("add_u64 " + tmpReg + ", " + tmpReg + ", " + iterationObjArgReg + "; // Add to array ref ptr");
 700             // Load the object into the parameter reg.
 701             if (useCompressedOops) {
 702 
 703                 // Load u32 into the d 64 reg since it will become an object address
 704                 asm.emitString("ld_global_u32 " + tmpReg + ", " + "[" + tmpReg + "]" + "; // Load compressed ptr from array");
 705 
 706                 long narrowOopBase = config.narrowOopBase;
 707                 long narrowOopShift = config.narrowOopShift;
 708 
 709                 if (narrowOopBase == 0 && narrowOopShift == 0) {
 710                     // No more calculation to do, mov to target register
 711                     asm.emitString("mov_b64 " + iterationObjArgReg + ", " + tmpReg + "; // no shift or base addition");
 712                 } else {
 713                     if (narrowOopBase == 0) {
 714                         asm.emitString("shl_u64 " + iterationObjArgReg + ", " + tmpReg + ", " + narrowOopShift + "; // do narrowOopShift");
 715                     } else if (narrowOopShift == 0) {
 716                         // not sure if we ever get add with 0 shift but just in case
 717                         asm.emitString("cmp_eq_b1_u64  $c0, " + tmpReg + ", 0x0; // avoid add if compressed is null");
 718                         asm.emitString("add_u64 " + iterationObjArgReg + ", " + tmpReg + ", " + narrowOopBase + "; // add narrowOopBase");
 719                         asm.emitString("cmov_b64 " + iterationObjArgReg + ", $c0, 0x0, " + iterationObjArgReg + "; // avoid add if compressed is null");
 720                     } else {
 721                         asm.emitString("cmp_eq_b1_u64  $c0, " + tmpReg + ", 0x0; // avoid shift-add if compressed is null");
 722                         asm.emitString("mad_u64 " + iterationObjArgReg + ", " + tmpReg + ", " + (1 << narrowOopShift) + ", " + narrowOopBase + "; // shift and add narrowOopBase");
 723                         asm.emitString("cmov_b64 " + iterationObjArgReg + ", $c0, 0x0, " + iterationObjArgReg + "; // avoid shift-add if compressed is null");
 724                     }
 725                 }
 726 
 727             } else {
 728                 asm.emitString("ld_global_u64 " + iterationObjArgReg + ", " + "[" + tmpReg + "]" + "; // Load from array element into parameter reg");
 729             }
 730         }
 731         // Prologue done, Emit code for the LIR.
 732         crb.emit(lir);
 733         // Now that code is emitted go back and figure out what the upper Bound stack size was.
 734         long maxStackSize = ((HSAILAssembler) crb.asm).upperBoundStackSize();
 735         String spillsegStringFinal;
 736         if (maxStackSize == 0) {
 737             // If no spilling, get rid of spillseg declaration.
 738             char[] array = new char[spillsegTemplate.length()];
 739             Arrays.fill(array, ' ');
 740             spillsegStringFinal = new String(array);
 741         } else {
 742             spillsegStringFinal = spillsegTemplate.replace("123456", String.format("%6d", maxStackSize));
 743         }
 744         asm.emitString(spillsegStringFinal, spillsegDeclarationPosition);
 745         // Emit the epilogue.
 746 
 747         HSAILHotSpotLIRGenerationResult lirGenRes = ((HSAILCompilationResultBuilder) crb).lirGenRes;
 748 
 749         int numSRegs = 0;
 750         int numDRegs = 0;
 751         int numStackSlotBytes = 0;
 752         if (useHSAILDeoptimization) {
 753             /*
 754              * Get the union of registers and stack slots needed to be saved at the infopoints.
 755              * While doing this compute the highest register in each category.
 756              */
 757             HSAILHotSpotRegisterConfig hsailRegConfig = (HSAILHotSpotRegisterConfig) regConfig;
 758             Set<Register> infoUsedRegs = new TreeSet<>();
 759             Set<StackSlot> infoUsedStackSlots = new HashSet<>();
 760             List<Infopoint> infoList = crb.compilationResult.getInfopoints();
 761             Queue<Value[]> workList = new LinkedList<>();
 762             for (Infopoint info : infoList) {
 763                 BytecodeFrame frame = info.debugInfo.frame();
 764                 while (frame != null) {
 765                     workList.add(frame.values);
 766                     frame = frame.caller();
 767                 }
 768             }
 769             while (!workList.isEmpty()) {
 770                 Value[] values = workList.poll();
 771                 for (Value val : values) {
 772                     if (isLegal(val)) {
 773                         if (isRegister(val)) {
 774                             Register reg = asRegister(val);
 775                             infoUsedRegs.add(reg);
 776                             if (hsailRegConfig.isAllocatableSReg(reg)) {
 777                                 numSRegs = Math.max(numSRegs, reg.encoding + 1);
 778                             } else if (hsailRegConfig.isAllocatableDReg(reg)) {
 779                                 numDRegs = Math.max(numDRegs, reg.encoding + 1);
 780                             }
 781                         } else if (isStackSlot(val)) {
 782                             StackSlot slot = asStackSlot(val);
 783                             Kind slotKind = slot.getKind();
 784                             int slotSizeBytes = (slotKind.isObject() ? 8 : slotKind.getByteCount());
 785                             int slotOffsetMax = HSAIL.getStackOffsetStart(slot, slotSizeBytes * 8) + slotSizeBytes;
 786                             numStackSlotBytes = Math.max(numStackSlotBytes, slotOffsetMax);
 787                             infoUsedStackSlots.add(slot);
 788                         } else if (isVirtualObject(val)) {
 789                             workList.add(((VirtualObject) val).getValues());
 790                         } else {
 791                             assert isConstant(val) : "Unsupported value: " + val;
 792                         }
 793                     }
 794                 }
 795             }
 796 
 797             // round up numSRegs to even number so dregs start on aligned boundary
 798             numSRegs += (numSRegs & 1);
 799 
 800             // numStackSlots is the number of 8-byte locations used for stack variables
 801             int numStackSlots = (numStackSlotBytes + 7) / 8;
 802 
 803             final int offsetToDeoptSaveStates = config.hsailDeoptimizationInfoHeaderSize;
 804             final int bytesPerSaveArea = 4 * numSRegs + 8 * numDRegs + 8 * numStackSlots;
 805             final int sizeofKernelDeopt = config.hsailKernelDeoptimizationHeaderSize + config.hsailFrameHeaderSize + bytesPerSaveArea;
 806             final int offsetToNeverRanArray = config.hsailNeverRanArrayOffset;
 807             final int offsetToDeoptNextIndex = config.hsailDeoptNextIndexOffset;
 808             final int offsetToDeoptimizationWorkItem = config.hsailDeoptimizationWorkItem;
 809             final int offsetToDeoptimizationReason = config.hsailDeoptimizationReason;
 810             final int offsetToDeoptimizationFrame = config.hsailKernelDeoptimizationHeaderSize;
 811             final int offsetToFramePc = config.hsailFramePcOffset;
 812             final int offsetToNumSaves = config.hsailFrameNumSRegOffset;
 813             final int offsetToSaveArea = config.hsailFrameHeaderSize;
 814 
 815             AllocatableValue scratch64 = HSAIL.d16.asValue(wordLIRKind);
 816             AllocatableValue cuSaveAreaPtr = HSAIL.d17.asValue(wordLIRKind);
 817             AllocatableValue waveMathScratch1 = HSAIL.d18.asValue(wordLIRKind);
 818             AllocatableValue waveMathScratch2 = HSAIL.d19.asValue(wordLIRKind);
 819 
 820             AllocatableValue actionAndReasonReg = HSAIL.actionAndReasonReg.asValue(LIRKind.value(Kind.Int));
 821             AllocatableValue codeBufferOffsetReg = HSAIL.codeBufferOffsetReg.asValue(LIRKind.value(Kind.Int));
 822             AllocatableValue scratch32 = HSAIL.s34.asValue(LIRKind.value(Kind.Int));
 823             AllocatableValue workidreg = HSAIL.s35.asValue(LIRKind.value(Kind.Int));
 824 
 825             HSAILAddress deoptNextIndexAddr = new HSAILAddressValue(wordLIRKind, scratch64, offsetToDeoptNextIndex).toAddress();
 826             HSAILAddress neverRanArrayAddr = new HSAILAddressValue(wordLIRKind, scratch64, offsetToNeverRanArray).toAddress();
 827 
 828             // The just-started lanes that see the deopt flag will jump here
 829             asm.emitString0(deoptInProgressLabel + ":\n");
 830             asm.emitLoad(wordKind, waveMathScratch1, neverRanArrayAddr);
 831             asm.emitWorkItemAbsId(workidreg);
 832             asm.emitConvert(waveMathScratch2, workidreg, wordKind, Kind.Int);
 833             asm.emit("add", waveMathScratch1, waveMathScratch1, waveMathScratch2);
 834             HSAILAddress neverRanStoreAddr = new HSAILAddressValue(wordLIRKind, waveMathScratch1, 0).toAddress();
 835             asm.emitStore(Kind.Byte, Constant.forInt(1), neverRanStoreAddr);
 836             asm.emitString("ret;");
 837 
 838             // The deoptimizing lanes will jump here
 839             asm.emitString0(asm.getDeoptLabelName() + ":\n");
 840             String labelExit = asm.getDeoptLabelName() + "_Exit";
 841 
 842             HSAILAddress deoptInfoAddr = new HSAILAddressValue(wordLIRKind, scratch64, config.hsailDeoptOccurredOffset).toAddress();
 843             asm.emitLoadKernelArg(scratch64, asm.getDeoptInfoName(), "u64");
 844 
 845             // Set deopt occurred flag
 846             asm.emitMov(Kind.Int, scratch32, Constant.forInt(1));
 847             asm.emitStoreRelease(scratch32, deoptInfoAddr);
 848 
 849             asm.emitComment("// Determine next deopt save slot");
 850             asm.emitAtomicAdd(scratch32, deoptNextIndexAddr, Constant.forInt(1));
 851             /*
 852              * scratch32 now holds next index to use set error condition if no room in save area
 853              */
 854             asm.emitComment("// assert room to save deopt");
 855             asm.emitCompare(Kind.Int, scratch32, Constant.forInt(maxDeoptIndex), "lt", false, false);
 856             asm.cbr("@L_StoreDeopt");
 857             /*
 858              * if assert fails, store a guaranteed negative workitemid in top level deopt occurred
 859              * flag
 860              */
 861             asm.emitWorkItemAbsId(scratch32);
 862             asm.emit("mad", scratch32, scratch32, Constant.forInt(-1), Constant.forInt(-1));
 863             asm.emitStore(scratch32, deoptInfoAddr);
 864             asm.emitString("ret;");
 865 
 866             asm.emitString0("@L_StoreDeopt" + ":\n");
 867 
 868             // Store deopt for this workitem into its slot in the HSAILComputeUnitSaveStates array
 869 
 870             asm.emitComment("// Convert id's for ptr math");
 871             asm.emitConvert(cuSaveAreaPtr, scratch32, wordKind, Kind.Int);
 872             asm.emitComment("// multiply by sizeof KernelDeoptArea");
 873             asm.emit("mul", cuSaveAreaPtr, cuSaveAreaPtr, Constant.forInt(sizeofKernelDeopt));
 874             asm.emitComment("// Add computed offset to deoptInfoPtr base");
 875             asm.emit("add", cuSaveAreaPtr, cuSaveAreaPtr, scratch64);
 876             // Add offset to _deopt_save_states[0]
 877             asm.emit("add", scratch64, cuSaveAreaPtr, Constant.forInt(offsetToDeoptSaveStates));
 878 
 879             HSAILAddress workItemAddr = new HSAILAddressValue(wordLIRKind, scratch64, offsetToDeoptimizationWorkItem).toAddress();
 880             HSAILAddress actionReasonStoreAddr = new HSAILAddressValue(wordLIRKind, scratch64, offsetToDeoptimizationReason).toAddress();
 881 
 882             asm.emitComment("// Get _deopt_info._first_frame");
 883             asm.emit("add", waveMathScratch1, scratch64, Constant.forInt(offsetToDeoptimizationFrame));
 884             // Now scratch64 is the _deopt_info._first_frame
 885             HSAILAddress pcStoreAddr = new HSAILAddressValue(wordLIRKind, waveMathScratch1, offsetToFramePc).toAddress();
 886             HSAILAddress regCountsAddr = new HSAILAddressValue(wordLIRKind, waveMathScratch1, offsetToNumSaves).toAddress();
 887             asm.emitComment("// store deopting workitem");
 888             asm.emitWorkItemAbsId(scratch32);
 889             asm.emitStore(Kind.Int, scratch32, workItemAddr);
 890             asm.emitComment("// store actionAndReason");
 891             asm.emitStore(Kind.Int, actionAndReasonReg, actionReasonStoreAddr);
 892             asm.emitComment("// store PC");
 893             asm.emitStore(Kind.Int, codeBufferOffsetReg, pcStoreAddr);
 894 
 895             asm.emitComment("// store regCounts (" + numSRegs + " $s registers, " + numDRegs + " $d registers, " + numStackSlots + " stack slots)");
 896             asm.emitStore(Kind.Int, Constant.forInt(numSRegs + (numDRegs << 8) + (numStackSlots << 16)), regCountsAddr);
 897 
 898             /*
 899              * Loop thru the usedValues storing each of the registers that are used. We always store
 900              * in a fixed location, even if some registers are skipped.
 901              */
 902             asm.emitComment("// store used regs");
 903             for (Register reg : infoUsedRegs) {
 904                 if (hsailRegConfig.isAllocatableSReg(reg)) {
 905                     // 32 bit registers
 906                     Kind kind = Kind.Int;
 907                     int ofst = offsetToSaveArea + reg.encoding * 4;
 908                     HSAILAddress addr = new HSAILAddressValue(wordLIRKind, waveMathScratch1, ofst).toAddress();
 909                     AllocatableValue regValue = reg.asValue(LIRKind.value(kind));
 910                     asm.emitStore(kind, regValue, addr);
 911                 } else if (hsailRegConfig.isAllocatableDReg(reg)) {
 912                     // 64 bit registers
 913                     Kind kind = Kind.Long;
 914                     // d reg ofst starts past the 32 sregs
 915                     int ofst = offsetToSaveArea + (numSRegs * 4) + reg.encoding * 8;
 916                     HSAILAddress addr = new HSAILAddressValue(wordLIRKind, waveMathScratch1, ofst).toAddress();
 917                     AllocatableValue regValue = reg.asValue(LIRKind.value(kind));
 918                     asm.emitStore(kind, regValue, addr);
 919                 } else {
 920                     throw GraalInternalError.unimplemented();
 921                 }
 922             }
 923 
 924             // loop thru the usedStackSlots creating instructions to save in the save area
 925             if (numStackSlotBytes > 0) {
 926                 asm.emitComment("// store stack slots (uses " + numStackSlotBytes + " bytes)");
 927                 for (StackSlot slot : infoUsedStackSlots) {
 928                     asm.emitComment("// store " + slot);
 929                     Kind kind = slot.getKind();
 930                     int sizeInBits = (kind.isObject() || kind.getByteCount() == 8 ? 64 : 32);
 931                     int ofst = offsetToSaveArea + (numSRegs * 4) + (numDRegs * 8) + HSAIL.getStackOffsetStart(slot, sizeInBits);
 932                     HSAILAddress addr = new HSAILAddressValue(wordLIRKind, waveMathScratch1, ofst).toAddress();
 933                     if (sizeInBits == 64) {
 934                         asm.emitSpillLoad(kind, scratch64, slot);
 935                         asm.emitStore(kind, scratch64, addr);
 936                     } else {
 937                         asm.emitSpillLoad(kind, scratch32, slot);
 938                         asm.emitStore(kind, scratch32, addr);
 939                     }
 940                 }
 941             }
 942 
 943             asm.emitString0(labelExit + ":\n");
 944 
 945             // and emit the return
 946             crb.frameContext.leave(crb);
 947             asm.exit();
 948             // build the oopMap Array
 949             int[] oopMapArray = new OopMapArrayBuilder().build(infoList, numSRegs, numDRegs, numStackSlots, hsailRegConfig);
 950             ((ExternalCompilationResult) crb.compilationResult).setOopMapArray(oopMapArray);
 951         } else {
 952             // Deoptimization is explicitly off, so emit simple return
 953             asm.emitString0(asm.getDeoptLabelName() + ":\n");
 954             asm.emitComment("// No deoptimization");
 955             asm.emitString("ret;");
 956         }
 957 
 958         asm.emitString0("}; \n");
 959 
 960         ExternalCompilationResult compilationResult = (ExternalCompilationResult) crb.compilationResult;
 961         if (useHSAILDeoptimization) {
 962             compilationResult.setHostGraph(prepareHostGraph(method, lirGenRes.getDeopts(), getProviders(), config, numSRegs, numDRegs));
 963         }
 964     }
 965 
 966     private static class OopMapArrayBuilder {
 967         // oopMapArray struct
 968         // int bytesPerSaveArea; (not strictly part of oopsmap but convenient to put here)
 969         // int intsPerInfopoint;
 970         static final int SAVEAREACOUNTS_OFST = 0;
 971         static final int INTSPERINFOPOINT_OFST = 1;
 972         static final int HEADERSIZE = 2;
 973         // for each infopoint:
 974         // int deoptId
 975         // one or more ints of bits for the oopmap
 976 
 977         private int[] array;
 978         private int intsPerInfopoint;
 979 
 980         int[] build(List<Infopoint> infoList, int numSRegs, int numDRegs, int numStackSlots, HSAILHotSpotRegisterConfig hsailRegConfig) {
 981             /*
 982              * We are told that infoList is always sorted. Each infoPoint can have a different
 983              * oopMap. Since numStackSlots is the number of 8-byte stack slots used, it is an upper
 984              * limit on the number of oop stack slots
 985              */
 986             int bitsPerInfopoint = numDRegs + numStackSlots;
 987             int intsForBits = (bitsPerInfopoint + 31) / 32;
 988             int numInfopoints = infoList.size();
 989             intsPerInfopoint = intsForBits + 1;  // +1 for the pcoffset
 990             int arraySize = HEADERSIZE + (numInfopoints * intsPerInfopoint);
 991             array = new int[arraySize];
 992             array[INTSPERINFOPOINT_OFST] = intsPerInfopoint;
 993             // compute saveAreaCounts
 994             int saveAreaCounts = (numSRegs & 0xff) + (numDRegs << 8) + (numStackSlots << 16);
 995             array[SAVEAREACOUNTS_OFST] = saveAreaCounts;
 996 
 997             // loop thru the infoList
 998             int infoIndex = 0;
 999             for (Infopoint info : infoList) {
1000                 setOopMapPcOffset(infoIndex, info.pcOffset);
1001                 BytecodeFrame frame = info.debugInfo.frame();
1002                 while (frame != null) {
1003                     for (int i = 0; i < frame.numLocals + frame.numStack; i++) {
1004                         Value val = frame.values[i];
1005                         if (isLegal(val)) {
1006                             if (isRegister(val)) {
1007                                 Register reg = asRegister(val);
1008                                 if (val.getKind().isObject()) {
1009                                     assert (hsailRegConfig.isAllocatableDReg(reg));
1010                                     int bitIndex = reg.encoding();
1011                                     setOopMapBit(infoIndex, bitIndex);
1012                                 }
1013                             } else if (isStackSlot(val)) {
1014                                 StackSlot slot = asStackSlot(val);
1015                                 if (val.getKind().isObject()) {
1016                                     assert (HSAIL.getStackOffsetStart(slot, 64) % 8 == 0);
1017                                     int bitIndex = numDRegs + HSAIL.getStackOffsetStart(slot, 64) / 8;
1018                                     setOopMapBit(infoIndex, bitIndex);
1019                                 }
1020                             }
1021                         }
1022                     }
1023                     frame = frame.caller();
1024                 }
1025                 infoIndex++;
1026             }
1027             try (Scope s = Debug.scope("CodeGen")) {
1028                 if (Debug.isLogEnabled()) {
1029                     Debug.log("numSRegs=%d, numDRegs=%d, numStackSlots=%d", numSRegs, numDRegs, numStackSlots);
1030                     // show infopoint oopmap details
1031                     for (infoIndex = 0; infoIndex < infoList.size(); infoIndex++) {
1032                         String infoString = "Infopoint " + infoIndex + ", pcOffset=" + getOopMapPcOffset(infoIndex) + ",   oopmap=";
1033                         for (int i = 0; i < intsForBits; i++) {
1034                             infoString += (i != 0 ? ", " : "") + Integer.toHexString(getOopMapBitsAsInt(infoIndex, i));
1035                         }
1036                         Debug.log(infoString);
1037                     }
1038                 }
1039             } catch (Throwable e) {
1040                 throw Debug.handle(e);
1041             }
1042 
1043             return array;
1044         }
1045 
1046         private void setOopMapPcOffset(int infoIndex, int pcOffset) {
1047             int arrIndex = HEADERSIZE + infoIndex * intsPerInfopoint;
1048             array[arrIndex] = pcOffset;
1049         }
1050 
1051         private int getOopMapPcOffset(int infoIndex) {
1052             int arrIndex = HEADERSIZE + infoIndex * intsPerInfopoint;
1053             return array[arrIndex];
1054         }
1055 
1056         private void setOopMapBit(int infoIndex, int bitIndex) {
1057             int arrIndex = HEADERSIZE + infoIndex * intsPerInfopoint + 1 + bitIndex / 32;
1058             array[arrIndex] |= (1 << (bitIndex % 32));
1059         }
1060 
1061         private int getOopMapBitsAsInt(int infoIndex, int intIndex) {
1062             int arrIndex = HEADERSIZE + infoIndex * intsPerInfopoint + 1 + intIndex;
1063             return array[arrIndex];
1064         }
1065     }
1066 
1067     private static StructuredGraph prepareHostGraph(ResolvedJavaMethod method, List<DeoptimizingOp> deopts, HotSpotProviders providers, HotSpotVMConfig config, int numSRegs, int numDRegs) {
1068         if (deopts.isEmpty()) {
1069             return null;
1070         }
1071         StructuredGraph hostGraph = new StructuredGraph(method, -2);
1072         ParameterNode deoptId = hostGraph.unique(new ParameterNode(0, StampFactory.intValue()));
1073         ParameterNode hsailFrame = hostGraph.unique(new ParameterNode(1, StampFactory.forKind(providers.getCodeCache().getTarget().wordKind)));
1074         ParameterNode reasonAndAction = hostGraph.unique(new ParameterNode(2, StampFactory.intValue()));
1075         ParameterNode speculation = hostGraph.unique(new ParameterNode(3, StampFactory.object()));
1076         BeginNode[] branches = new BeginNode[deopts.size() + 1];
1077         int[] keys = new int[deopts.size()];
1078         int[] keySuccessors = new int[deopts.size() + 1];
1079         double[] keyProbabilities = new double[deopts.size() + 1];
1080         int i = 0;
1081         Collections.sort(deopts, new Comparator<DeoptimizingOp>() {
1082             public int compare(DeoptimizingOp o1, DeoptimizingOp o2) {
1083                 return o1.getCodeBufferPos() - o2.getCodeBufferPos();
1084             }
1085         });
1086         for (DeoptimizingOp deopt : deopts) {
1087             keySuccessors[i] = i;
1088             keyProbabilities[i] = 1.0 / deopts.size();
1089             keys[i] = deopt.getCodeBufferPos();
1090             assert keys[i] >= 0;
1091             branches[i] = createHostDeoptBranch(deopt, hsailFrame, reasonAndAction, speculation, providers, config, numSRegs, numDRegs);
1092 
1093             i++;
1094         }
1095         keyProbabilities[deopts.size()] = 0; // default
1096         keySuccessors[deopts.size()] = deopts.size();
1097         branches[deopts.size()] = createHostCrashBranch(hostGraph, deoptId);
1098         IntegerSwitchNode switchNode = hostGraph.add(new IntegerSwitchNode(deoptId, branches, keys, keyProbabilities, keySuccessors));
1099         StartNode start = hostGraph.start();
1100         start.setNext(switchNode);
1101         /*
1102          * printf.setNext(printf2); printf2.setNext(switchNode);
1103          */
1104         hostGraph.setGuardsStage(GuardsStage.AFTER_FSA);
1105         return hostGraph;
1106     }
1107 
1108     private static BeginNode createHostCrashBranch(StructuredGraph hostGraph, ValueNode deoptId) {
1109         VMErrorNode vmError = hostGraph.add(new VMErrorNode("Error in HSAIL deopt. DeoptId=%d", deoptId));
1110         // ConvertNode.convert(hostGraph, Kind.Long, deoptId)));
1111         vmError.setNext(hostGraph.add(new ReturnNode(ConstantNode.defaultForKind(hostGraph.method().getSignature().getReturnKind(), hostGraph))));
1112         return BeginNode.begin(vmError);
1113     }
1114 
1115     private static BeginNode createHostDeoptBranch(DeoptimizingOp deopt, ParameterNode hsailFrame, ValueNode reasonAndAction, ValueNode speculation, HotSpotProviders providers,
1116                     HotSpotVMConfig config, int numSRegs, int numDRegs) {
1117         BeginNode branch = hsailFrame.graph().add(new BeginNode());
1118         DynamicDeoptimizeNode deoptimization = hsailFrame.graph().add(new DynamicDeoptimizeNode(reasonAndAction, speculation));
1119         deoptimization.setStateBefore(createFrameState(deopt.getFrameState().topFrame, hsailFrame, providers, config, numSRegs, numDRegs));
1120         branch.setNext(deoptimization);
1121         return branch;
1122     }
1123 
1124     private static FrameState createFrameState(BytecodeFrame lowLevelFrame, ParameterNode hsailFrame, HotSpotProviders providers, HotSpotVMConfig config, int numSRegs, int numDRegs) {
1125         return createFrameState(lowLevelFrame, hsailFrame, providers, config, numSRegs, numDRegs, new HashMap<VirtualObject, VirtualObjectNode>());
1126     }
1127 
1128     private static FrameState createFrameState(BytecodeFrame lowLevelFrame, ParameterNode hsailFrame, HotSpotProviders providers, HotSpotVMConfig config, int numSRegs, int numDRegs,
1129                     Map<VirtualObject, VirtualObjectNode> virtualObjects) {
1130         FrameState outterFrameState = null;
1131         if (lowLevelFrame.caller() != null) {
1132             outterFrameState = createFrameState(lowLevelFrame.caller(), hsailFrame, providers, config, numSRegs, numDRegs, virtualObjects);
1133         }
1134         StructuredGraph hostGraph = hsailFrame.graph();
1135         Function<? super Value, ? extends ValueNode> lirValueToHirNode = v -> getNodeForValueFromFrame(v, hsailFrame, hostGraph, providers, config, numSRegs, numDRegs, virtualObjects);
1136         ValueNode[] locals = new ValueNode[lowLevelFrame.numLocals];
1137         for (int i = 0; i < lowLevelFrame.numLocals; i++) {
1138             locals[i] = lirValueToHirNode.apply(lowLevelFrame.getLocalValue(i));
1139         }
1140         List<ValueNode> stack = new ArrayList<>(lowLevelFrame.numStack);
1141         for (int i = 0; i < lowLevelFrame.numStack; i++) {
1142             stack.add(lirValueToHirNode.apply(lowLevelFrame.getStackValue(i)));
1143         }
1144         ValueNode[] locks = new ValueNode[lowLevelFrame.numLocks];
1145         MonitorIdNode[] monitorIds = new MonitorIdNode[lowLevelFrame.numLocks];
1146         for (int i = 0; i < lowLevelFrame.numLocks; i++) {
1147             HotSpotMonitorValue lockValue = (HotSpotMonitorValue) lowLevelFrame.getLockValue(i);
1148             locks[i] = lirValueToHirNode.apply(lockValue);
1149             monitorIds[i] = getMonitorIdForHotSpotMonitorValueFromFrame(lockValue, hsailFrame, hostGraph);
1150         }
1151         FrameState frameState = hostGraph.add(new FrameState(lowLevelFrame.getMethod(), lowLevelFrame.getBCI(), locals, stack, locks, monitorIds, lowLevelFrame.rethrowException, false));
1152         if (outterFrameState != null) {
1153             frameState.setOuterFrameState(outterFrameState);
1154         }
1155         Map<VirtualObject, VirtualObjectNode> virtualObjectsCopy;
1156         // TODO this could be implemented more efficiently with a mark into the map
1157         // unfortunately LinkedHashMap doesn't seem to provide that.
1158         List<VirtualObjectState> virtualStates = new ArrayList<>(virtualObjects.size());
1159         do {
1160             virtualObjectsCopy = new HashMap<>(virtualObjects);
1161             virtualStates.clear();
1162             for (Entry<VirtualObject, VirtualObjectNode> entry : virtualObjectsCopy.entrySet()) {
1163                 VirtualObject virtualObject = entry.getKey();
1164                 VirtualObjectNode virtualObjectNode = entry.getValue();
1165                 List<ValueNode> fieldValues = Arrays.stream(virtualObject.getValues()).map(lirValueToHirNode).collect(Collectors.toList());
1166                 virtualStates.add(new VirtualObjectState(virtualObjectNode, fieldValues));
1167             }
1168             // New virtual objects may have been discovered while processing the previous set.
1169             // Wait until a fixed point is reached
1170         } while (virtualObjectsCopy.size() < virtualObjects.size());
1171         virtualStates.forEach(vos -> frameState.addVirtualObjectMapping(hostGraph.unique(vos)));
1172         return frameState;
1173     }
1174 
1175     @SuppressWarnings("unused")
1176     private static MonitorIdNode getMonitorIdForHotSpotMonitorValueFromFrame(HotSpotMonitorValue lockValue, ParameterNode hsailFrame, StructuredGraph hsailGraph) {
1177         if (lockValue.isEliminated()) {
1178             return null;
1179         }
1180         throw GraalInternalError.unimplemented();
1181     }
1182 
1183     private static ValueNode getNodeForValueFromFrame(Value localValue, ParameterNode hsailFrame, StructuredGraph hostGraph, HotSpotProviders providers, HotSpotVMConfig config, int numSRegs,
1184                     int numDRegs, Map<VirtualObject, VirtualObjectNode> virtualObjects) {
1185         ValueNode valueNode;
1186         if (localValue instanceof Constant) {
1187             valueNode = ConstantNode.forConstant((Constant) localValue, providers.getMetaAccess(), hostGraph);
1188         } else if (localValue instanceof VirtualObject) {
1189             valueNode = getNodeForVirtualObjectFromFrame((VirtualObject) localValue, virtualObjects, hostGraph);
1190         } else if (localValue instanceof StackSlot) {
1191             StackSlot slot = (StackSlot) localValue;
1192             valueNode = getNodeForStackSlotFromFrame(slot, localValue.getKind(), hsailFrame, hostGraph, providers, config, numSRegs, numDRegs);
1193         } else if (localValue instanceof HotSpotMonitorValue) {
1194             HotSpotMonitorValue hotSpotMonitorValue = (HotSpotMonitorValue) localValue;
1195             return getNodeForValueFromFrame(hotSpotMonitorValue.getOwner(), hsailFrame, hostGraph, providers, config, numSRegs, numDRegs, virtualObjects);
1196         } else if (localValue instanceof RegisterValue) {
1197             RegisterValue registerValue = (RegisterValue) localValue;
1198             int regNumber = registerValue.getRegister().number;
1199             valueNode = getNodeForRegisterFromFrame(regNumber, localValue.getKind(), hsailFrame, hostGraph, providers, config, numSRegs);
1200         } else if (Value.ILLEGAL.equals(localValue)) {
1201             valueNode = null;
1202         } else {
1203             throw GraalInternalError.shouldNotReachHere();
1204         }
1205         return valueNode;
1206     }
1207 
1208     private static ValueNode getNodeForVirtualObjectFromFrame(VirtualObject virtualObject, Map<VirtualObject, VirtualObjectNode> virtualObjects, StructuredGraph hostGraph) {
1209         return virtualObjects.computeIfAbsent(virtualObject, vo -> {
1210             if (vo.getType().isArray()) {
1211                 return hostGraph.add(new VirtualArrayNode(vo.getType().getComponentType(), vo.getValues().length));
1212             } else {
1213                 return hostGraph.add(new VirtualInstanceNode(vo.getType(), true));
1214             }
1215         });
1216     }
1217 
1218     private static ValueNode getNodeForRegisterFromFrame(int regNumber, Kind valueKind, ParameterNode hsailFrame, StructuredGraph hostGraph, HotSpotProviders providers, HotSpotVMConfig config,
1219                     int numSRegs) {
1220         ValueNode valueNode;
1221         LocationNode location;
1222         int longSize = providers.getCodeCache().getTarget().arch.getSizeInBytes(Kind.Long);
1223         int intSize = providers.getCodeCache().getTarget().arch.getSizeInBytes(Kind.Int);
1224         if (regNumber >= HSAIL.s0.number && regNumber <= HSAIL.s31.number) {
1225             long offset = config.hsailFrameHeaderSize + intSize * (regNumber - HSAIL.s0.number);
1226             location = ConstantLocationNode.create(FINAL_LOCATION, valueKind, offset, hostGraph);
1227         } else if (regNumber >= HSAIL.d0.number && regNumber <= HSAIL.d15.number) {
1228             long offset = config.hsailFrameHeaderSize + intSize * numSRegs + longSize * (regNumber - HSAIL.d0.number);
1229             location = ConstantLocationNode.create(FINAL_LOCATION, valueKind, offset, hostGraph);
1230         } else {
1231             throw GraalInternalError.shouldNotReachHere("unknown hsail register: " + regNumber);
1232         }
1233         valueNode = hostGraph.unique(new FloatingReadNode(hsailFrame, location, null, StampFactory.forKind(valueKind)));
1234         return valueNode;
1235     }
1236 
1237     private static ValueNode getNodeForStackSlotFromFrame(StackSlot slot, Kind valueKind, ParameterNode hsailFrame, StructuredGraph hostGraph, HotSpotProviders providers, HotSpotVMConfig config,
1238                     int numSRegs, int numDRegs) {
1239         int slotSizeInBits = (valueKind == Kind.Object ? 64 : valueKind.getByteCount() * 8);
1240         if ((slotSizeInBits == 32) || (slotSizeInBits == 64)) {
1241             int longSize = providers.getCodeCache().getTarget().arch.getSizeInBytes(Kind.Long);
1242             int intSize = providers.getCodeCache().getTarget().arch.getSizeInBytes(Kind.Int);
1243             long offset = config.hsailFrameHeaderSize + (intSize * numSRegs) + (longSize * numDRegs) + HSAIL.getStackOffsetStart(slot, slotSizeInBits);
1244             LocationNode location = ConstantLocationNode.create(FINAL_LOCATION, valueKind, offset, hostGraph);
1245             ValueNode valueNode = hostGraph.unique(new FloatingReadNode(hsailFrame, location, null, StampFactory.forKind(valueKind)));
1246             return valueNode;
1247         } else {
1248             throw GraalInternalError.shouldNotReachHere("unsupported stack slot kind: " + valueKind);
1249         }
1250     }
1251 }