1 /* 2 * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 package com.oracle.graal.hotspot.hsail; 24 25 import static com.oracle.graal.api.code.CallingConvention.Type.*; 26 import static com.oracle.graal.api.code.CodeUtil.*; 27 import static com.oracle.graal.api.code.ValueUtil.*; 28 import static com.oracle.graal.api.meta.LocationIdentity.*; 29 import static com.oracle.graal.compiler.GraalCompiler.*; 30 import static com.oracle.graal.hotspot.hsail.HSAILHotSpotBackend.Options.*; 31 import static com.oracle.graal.hotspot.hsail.replacements.HSAILNewObjectSnippets.Options.*; 32 33 import java.lang.reflect.*; 34 import java.util.*; 35 import java.util.Map.Entry; 36 import java.util.function.*; 37 import java.util.stream.*; 38 39 import com.amd.okra.*; 40 import com.oracle.graal.api.code.*; 41 import com.oracle.graal.api.code.Assumptions.Assumption; 42 import com.oracle.graal.api.code.CallingConvention.Type; 43 import com.oracle.graal.api.code.CompilationResult.Call; 44 import com.oracle.graal.api.code.CompilationResult.CodeAnnotation; 45 import com.oracle.graal.api.code.CompilationResult.DataPatch; 46 import com.oracle.graal.api.code.CompilationResult.ExceptionHandler; 47 import com.oracle.graal.api.code.CompilationResult.Infopoint; 48 import com.oracle.graal.api.code.CompilationResult.Mark; 49 import com.oracle.graal.api.meta.*; 50 import com.oracle.graal.asm.*; 51 import com.oracle.graal.asm.hsail.*; 52 import com.oracle.graal.compiler.common.*; 53 import com.oracle.graal.compiler.common.cfg.*; 54 import com.oracle.graal.compiler.common.type.*; 55 import com.oracle.graal.debug.*; 56 import com.oracle.graal.debug.Debug.Scope; 57 import com.oracle.graal.gpu.*; 58 import com.oracle.graal.hotspot.*; 59 import com.oracle.graal.hotspot.bridge.CompilerToVM.CodeInstallResult; 60 import com.oracle.graal.hotspot.meta.*; 61 import com.oracle.graal.hotspot.nodes.*; 62 import com.oracle.graal.hsail.*; 63 import com.oracle.graal.java.*; 64 import com.oracle.graal.lir.*; 65 import com.oracle.graal.lir.asm.*; 66 import com.oracle.graal.lir.gen.*; 67 import com.oracle.graal.lir.hsail.*; 68 import com.oracle.graal.lir.hsail.HSAILControlFlow.DeoptimizingOp; 69 import com.oracle.graal.nodes.*; 70 import com.oracle.graal.nodes.StructuredGraph.GuardsStage; 71 import com.oracle.graal.nodes.extended.*; 72 import com.oracle.graal.nodes.java.*; 73 import com.oracle.graal.nodes.spi.*; 74 import com.oracle.graal.nodes.virtual.*; 75 import com.oracle.graal.options.*; 76 import com.oracle.graal.phases.*; 77 import com.oracle.graal.phases.tiers.*; 78 import com.oracle.graal.virtual.nodes.*; 79 80 /** 81 * HSAIL specific backend. 82 */ 83 public class HSAILHotSpotBackend extends HotSpotBackend { 84 85 public static class Options { 86 87 // @formatter:off 88 @Option(help = "Number of donor threads for HSAIL kernel dispatch") 89 static public final OptionValue<Integer> HsailDonorThreads = new OptionValue<>(4); 90 // @formatter:on 91 } 92 93 private Map<String, String> paramTypeMap = new HashMap<>(); 94 private final boolean deviceInitialized; 95 // TODO: get maximum Concurrency from okra 96 private int maxDeoptIndex = 8 * 40 * 64; // see gpu_hsail.hpp 97 98 public HSAILHotSpotBackend(HotSpotGraalRuntime runtime, HotSpotProviders providers) { 99 super(runtime, providers); 100 paramTypeMap.put("HotSpotResolvedPrimitiveType<int>", "s32"); 101 paramTypeMap.put("HotSpotResolvedPrimitiveType<float>", "f32"); 102 paramTypeMap.put("HotSpotResolvedPrimitiveType<double>", "f64"); 103 paramTypeMap.put("HotSpotResolvedPrimitiveType<long>", "s64"); 104 105 /* 106 * The order of the conjunction below is important: the OkraUtil call may provision the 107 * native library required by the initialize() call 108 */ 109 deviceInitialized = OkraUtil.okraLibExists() && initialize(); 110 } 111 112 @Override 113 public boolean shouldAllocateRegisters() { 114 return true; 115 } 116 117 /** 118 * Initializes the GPU device. 119 * 120 * @return whether or not initialization was successful 121 */ 122 private static native boolean initialize(); 123 124 /** 125 * Determines if the GPU device (or simulator) is available and initialized. 126 */ 127 public boolean isDeviceInitialized() { 128 return deviceInitialized; 129 } 130 131 /** 132 * Completes the initialization of the HSAIL backend. This includes initializing the providers 133 * and registering any method substitutions specified by the HSAIL backend. 134 */ 135 @Override 136 public void completeInitialization() { 137 final HotSpotProviders providers = getProviders(); 138 HotSpotVMConfig config = getRuntime().getConfig(); 139 // Initialize the lowering provider. 140 final HotSpotLoweringProvider lowerer = (HotSpotLoweringProvider) providers.getLowerer(); 141 lowerer.initialize(providers, config); 142 143 // Register the replacements used by the HSAIL backend. 144 HSAILHotSpotReplacementsImpl replacements = (HSAILHotSpotReplacementsImpl) providers.getReplacements(); 145 replacements.completeInitialization(); 146 } 147 148 /** 149 * Compiles and installs a given method to a GPU binary. 150 */ 151 public HotSpotNmethod compileAndInstallKernel(Method method) { 152 ResolvedJavaMethod javaMethod = getProviders().getMetaAccess().lookupJavaMethod(method); 153 HotSpotNmethod nm = installKernel(javaMethod, compileKernel(javaMethod, true)); 154 try (Scope s = Debug.scope("HostCodeGen")) { 155 if (Debug.isLogEnabled()) { 156 DisassemblerProvider dis = getRuntime().getHostBackend().getDisassembler(); 157 if (dis != null) { 158 String disasm = dis.disassemble(nm); 159 Debug.log("host code generated for %s%n%s", javaMethod, disasm); 160 } else { 161 Debug.log("host code disassembler is null"); 162 } 163 } 164 } catch (Throwable e) { 165 throw Debug.handle(e); 166 } 167 return nm; 168 } 169 170 /** 171 * Compiles a given method to HSAIL code. 172 * 173 * @param makeBinary specifies whether a GPU binary should also be generated for the HSAIL code. 174 * If true, the returned value is guaranteed to have a non-zero 175 * {@linkplain ExternalCompilationResult#getEntryPoint() entry point}. 176 * @return the HSAIL code compiled from {@code method}'s bytecode 177 */ 178 public ExternalCompilationResult compileKernel(ResolvedJavaMethod method, boolean makeBinary) { 179 StructuredGraph graph = new StructuredGraph(method); 180 HotSpotProviders providers = getProviders(); 181 MetaAccessProvider metaAccess = getProviders().getMetaAccess(); 182 183 // changed this from default to help us generate deopts when needed 184 OptimisticOptimizations optimisticOpts = OptimisticOptimizations.ALL; 185 optimisticOpts.remove(OptimisticOptimizations.Optimization.UseExceptionProbabilityForOperations); 186 new GraphBuilderPhase.Instance(metaAccess, GraphBuilderConfiguration.getSnippetDefault(), optimisticOpts).apply(graph); 187 PhaseSuite<HighTierContext> graphBuilderSuite = providers.getSuites().getDefaultGraphBuilderSuite(); 188 CallingConvention cc = getCallingConvention(providers.getCodeCache(), Type.JavaCallee, graph.method(), false); 189 190 // append special HSAILNonNullParametersPhase 191 int numArgs = cc.getArguments().length; 192 graphBuilderSuite.appendPhase(new HSAILNonNullParametersPhase(numArgs)); 193 194 Suites suites = providers.getSuites().getDefaultSuites(); 195 ExternalCompilationResult hsailCode = compileGraph(graph, null, cc, method, providers, this, this.getTarget(), null, graphBuilderSuite, optimisticOpts, getProfilingInfo(graph), null, suites, 196 new ExternalCompilationResult(), CompilationResultBuilderFactory.Default); 197 198 // this code added to dump infopoints 199 try (Scope s = Debug.scope("CodeGen")) { 200 if (Debug.isLogEnabled()) { 201 // show infopoints 202 List<Infopoint> infoList = hsailCode.getInfopoints(); 203 Debug.log("%d HSAIL infopoints", infoList.size()); 204 for (Infopoint info : infoList) { 205 Debug.log(info.toString()); 206 Debug.log(info.debugInfo.frame().toString()); 207 } 208 } 209 } catch (Throwable e) { 210 throw Debug.handle(e); 211 } 212 213 if (makeBinary) { 214 if (!deviceInitialized) { 215 throw new GraalInternalError("Cannot generate GPU kernel if device is not initialized"); 216 } 217 try (Scope ds = Debug.scope("GeneratingKernelBinary")) { 218 long kernel = generateKernel(hsailCode.getTargetCode(), method.getName()); 219 if (kernel == 0) { 220 throw new GraalInternalError("Failed to compile HSAIL kernel"); 221 } 222 hsailCode.setEntryPoint(kernel); 223 } catch (Throwable e) { 224 throw Debug.handle(e); 225 } 226 } 227 return hsailCode; 228 } 229 230 private static class HSAILNonNullParametersPhase extends Phase { 231 // we use this to limit the stamping to exclude the final argument in an obj stream method 232 private int numArgs; 233 234 public HSAILNonNullParametersPhase(int numArgs) { 235 this.numArgs = numArgs; 236 } 237 238 @Override 239 protected void run(StructuredGraph graph) { 240 int argCount = 0; 241 Stamp nonNull = StampFactory.objectNonNull(); 242 for (ParameterNode param : graph.getNodes(ParameterNode.class)) { 243 argCount++; 244 if (argCount < numArgs && param.stamp() instanceof ObjectStamp) { 245 ObjectStamp paramStamp = (ObjectStamp) param.stamp(); 246 param.setStamp(paramStamp.join(nonNull)); 247 } 248 } 249 } 250 } 251 252 /** 253 * Generates a GPU binary from HSAIL code. 254 */ 255 static native long generateKernel(byte[] hsailCode, String name); 256 257 /** 258 * Installs the {@linkplain ExternalCompilationResult#getEntryPoint() GPU binary} associated 259 * with some given HSAIL code in the code cache and returns a {@link HotSpotNmethod} handle to 260 * the installed code. 261 * 262 * @param hsailCode HSAIL compilation result for which a GPU binary has been generated 263 * @return a handle to the binary as installed in the HotSpot code cache 264 */ 265 public final HotSpotNmethod installKernel(ResolvedJavaMethod method, ExternalCompilationResult hsailCode) { 266 assert hsailCode.getEntryPoint() != 0L; 267 // Code here based on HotSpotCodeCacheProvider.addExternalMethod(). 268 HotSpotResolvedJavaMethod javaMethod = (HotSpotResolvedJavaMethod) method; 269 if (hsailCode.getId() == -1) { 270 hsailCode.setId(javaMethod.allocateCompileId(hsailCode.getEntryBCI())); 271 } 272 CompilationResult compilationResult = hsailCode; 273 StructuredGraph hostGraph = hsailCode.getHostGraph(); 274 if (hostGraph != null) { 275 // TODO get rid of the unverified entry point in the host code 276 try (Scope ds = Debug.scope("GeneratingHostGraph", new DebugDumpScope("HostGraph"))) { 277 HotSpotBackend hostBackend = getRuntime().getHostBackend(); 278 JavaType[] parameterTypes = new JavaType[hostGraph.getNodes(ParameterNode.class).count()]; 279 Debug.log("Param count: %d", parameterTypes.length); 280 for (int i = 0; i < parameterTypes.length; i++) { 281 ParameterNode parameter = hostGraph.getParameter(i); 282 Debug.log("Param [%d]=%s", i, parameter); 283 parameterTypes[i] = parameter.stamp().javaType(hostBackend.getProviders().getMetaAccess()); 284 Debug.log(" %s", parameterTypes[i]); 285 } 286 CallingConvention cc = hostBackend.getProviders().getCodeCache().getRegisterConfig().getCallingConvention(Type.JavaCallee, method.getSignature().getReturnType(null), parameterTypes, 287 hostBackend.getTarget(), false); 288 CompilationResult hostCode = compileGraph(hostGraph, null, cc, method, hostBackend.getProviders(), hostBackend, this.getTarget(), null, 289 hostBackend.getProviders().getSuites().getDefaultGraphBuilderSuite(), OptimisticOptimizations.NONE, null, null, 290 hostBackend.getProviders().getSuites().getDefaultSuites(), new CompilationResult(), CompilationResultBuilderFactory.Default); 291 compilationResult = merge(hostCode, hsailCode); 292 } catch (Throwable e) { 293 throw Debug.handle(e); 294 } 295 } 296 297 HSAILHotSpotNmethod code = new HSAILHotSpotNmethod(javaMethod, hsailCode.getName(), false, true); 298 code.setOopMapArray(hsailCode.getOopMapArray()); 299 code.setUsesAllocationFlag(hsailCode.getUsesAllocationFlag()); 300 HotSpotCompiledNmethod compiled = new HotSpotCompiledNmethod(getTarget(), javaMethod, compilationResult); 301 CodeInstallResult result = getRuntime().getCompilerToVM().installCode(compiled, code, null); 302 if (result != CodeInstallResult.OK) { 303 return null; 304 } 305 return code; 306 } 307 308 private static ExternalCompilationResult merge(CompilationResult hostCode, ExternalCompilationResult hsailCode) { 309 ExternalCompilationResult result = new ExternalCompilationResult(); 310 311 // from hsail code 312 result.setEntryPoint(hsailCode.getEntryPoint()); 313 result.setId(hsailCode.getId()); 314 result.setEntryBCI(hsailCode.getEntryBCI()); 315 assert hsailCode.getMarks().isEmpty(); 316 assert hsailCode.getExceptionHandlers().isEmpty(); 317 assert hsailCode.getDataReferences().isEmpty(); 318 319 // from host code 320 result.setTotalFrameSize(hostCode.getTotalFrameSize()); 321 result.setCustomStackAreaOffset(hostCode.getCustomStackAreaOffset()); 322 result.setTargetCode(hostCode.getTargetCode(), hostCode.getTargetCodeSize()); 323 for (CodeAnnotation annotation : hostCode.getAnnotations()) { 324 result.addAnnotation(annotation); 325 } 326 for (Mark mark : hostCode.getMarks()) { 327 result.recordMark(mark.pcOffset, mark.id); 328 } 329 for (ExceptionHandler handler : hostCode.getExceptionHandlers()) { 330 result.recordExceptionHandler(handler.pcOffset, handler.handlerPos); 331 } 332 for (DataPatch patch : hostCode.getDataReferences()) { 333 if (patch.data != null) { 334 if (patch.inline) { 335 result.recordInlineData(patch.pcOffset, patch.data); 336 } else { 337 result.recordDataReference(patch.pcOffset, patch.data); 338 } 339 } 340 } 341 for (Infopoint infopoint : hostCode.getInfopoints()) { 342 if (infopoint instanceof Call) { 343 Call call = (Call) infopoint; 344 result.recordCall(call.pcOffset, call.size, call.target, call.debugInfo, call.direct); 345 } else { 346 result.recordInfopoint(infopoint.pcOffset, infopoint.debugInfo, infopoint.reason); 347 } 348 } 349 350 // merged 351 Assumptions mergedAssumptions = new Assumptions(true); 352 if (hostCode.getAssumptions() != null) { 353 for (Assumption assumption : hostCode.getAssumptions().getAssumptions()) { 354 if (assumption != null) { 355 mergedAssumptions.record(assumption); 356 } 357 } 358 } 359 if (hsailCode.getAssumptions() != null) { 360 for (Assumption assumption : hsailCode.getAssumptions().getAssumptions()) { 361 if (assumption != null) { 362 mergedAssumptions.record(assumption); 363 } 364 } 365 } 366 if (!mergedAssumptions.isEmpty()) { 367 result.setAssumptions(mergedAssumptions); 368 } 369 return result; 370 } 371 372 private static final ThreadLocal<DonorThreadPool> donorThreadPool = new ThreadLocal<DonorThreadPool>() { 373 @Override 374 protected DonorThreadPool initialValue() { 375 return new DonorThreadPool(); 376 } 377 }; 378 379 public boolean executeKernel(HotSpotInstalledCode kernel, int jobSize, Object[] args) throws InvalidInstalledCodeException { 380 if (!deviceInitialized) { 381 throw new GraalInternalError("Cannot execute GPU kernel if device is not initialized"); 382 } 383 int[] oopMapArray = ((HSAILHotSpotNmethod) kernel).getOopMapArray(); 384 385 // Pass donorThreadPoolArray if this kernel uses allocation, otherwise null 386 Thread[] donorThreadArray = ((HSAILHotSpotNmethod) kernel).getUsesAllocationFlag() ? donorThreadPool.get().getThreads() : null; 387 return executeKernel0(kernel, jobSize, args, donorThreadArray, HsailAllocBytesPerWorkitem.getValue(), oopMapArray); 388 } 389 390 private static native boolean executeKernel0(HotSpotInstalledCode kernel, int jobSize, Object[] args, Thread[] donorThreads, int allocBytesPerWorkitem, int[] oopMapArray) 391 throws InvalidInstalledCodeException; 392 393 /** 394 * Use the HSAIL register set when the compilation target is HSAIL. 395 */ 396 @Override 397 public FrameMap newFrameMap(RegisterConfig registerConfig) { 398 return new HSAILFrameMap(getCodeCache(), registerConfig); 399 } 400 401 @Override 402 public LIRGeneratorTool newLIRGenerator(CallingConvention cc, LIRGenerationResult lirGenRes) { 403 return new HSAILHotSpotLIRGenerator(getProviders(), getRuntime().getConfig(), cc, lirGenRes); 404 } 405 406 @Override 407 public LIRGenerationResult newLIRGenerationResult(LIR lir, FrameMap frameMap, ResolvedJavaMethod method, Object stub) { 408 return new HSAILHotSpotLIRGenerationResult(lir, frameMap); 409 } 410 411 @Override 412 public NodeLIRBuilderTool newNodeLIRBuilder(StructuredGraph graph, LIRGeneratorTool lirGen) { 413 return new HSAILHotSpotNodeLIRBuilder(graph, lirGen); 414 } 415 416 class HotSpotFrameContext implements FrameContext { 417 418 public boolean hasFrame() { 419 return true; 420 } 421 422 @Override 423 public void enter(CompilationResultBuilder crb) { 424 Debug.log("Nothing to do here"); 425 } 426 427 @Override 428 public void leave(CompilationResultBuilder crb) { 429 Debug.log("Nothing to do here"); 430 } 431 } 432 433 /** 434 * a class to allow us to save lirGen. 435 */ 436 static class HSAILCompilationResultBuilder extends CompilationResultBuilder { 437 public HSAILHotSpotLIRGenerationResult lirGenRes; 438 439 public HSAILCompilationResultBuilder(CodeCacheProvider codeCache, ForeignCallsProvider foreignCalls, FrameMap frameMap, Assembler asm, FrameContext frameContext, 440 CompilationResult compilationResult, HSAILHotSpotLIRGenerationResult lirGenRes) { 441 super(codeCache, foreignCalls, frameMap, asm, frameContext, compilationResult); 442 this.lirGenRes = lirGenRes; 443 } 444 } 445 446 static class HSAILHotSpotNmethod extends HotSpotNmethod { 447 private int[] oopMapArray; 448 private boolean usesAllocation; 449 450 HSAILHotSpotNmethod(HotSpotResolvedJavaMethod method, String name, boolean isDefault, boolean isExternal) { 451 super(method, name, isDefault, isExternal); 452 } 453 454 void setOopMapArray(int[] array) { 455 oopMapArray = array; 456 } 457 458 int[] getOopMapArray() { 459 return oopMapArray; 460 } 461 462 public void setUsesAllocationFlag(boolean val) { 463 usesAllocation = val; 464 } 465 466 public boolean getUsesAllocationFlag() { 467 return usesAllocation; 468 } 469 } 470 471 @Override 472 protected Assembler createAssembler(FrameMap frameMap) { 473 return new HSAILHotSpotAssembler(getTarget()); 474 } 475 476 @Override 477 public CompilationResultBuilder newCompilationResultBuilder(LIRGenerationResult lirGenRes, CompilationResult compilationResult, CompilationResultBuilderFactory factory) { 478 FrameMap frameMap = lirGenRes.getFrameMap(); 479 Assembler masm = createAssembler(frameMap); 480 HotSpotFrameContext frameContext = new HotSpotFrameContext(); 481 // save lirGen for later use by setHostGraph 482 CompilationResultBuilder crb = new HSAILCompilationResultBuilder(getCodeCache(), getForeignCalls(), frameMap, masm, frameContext, compilationResult, 483 (HSAILHotSpotLIRGenerationResult) lirGenRes); 484 crb.setTotalFrameSize(frameMap.totalFrameSize()); 485 return crb; 486 } 487 488 @Override 489 public void emitCode(CompilationResultBuilder crb, LIR lir, ResolvedJavaMethod method) { 490 assert method != null : lir + " is not associated with a method"; 491 Kind wordKind = getProviders().getCodeCache().getTarget().wordKind; 492 LIRKind wordLIRKind = LIRKind.value(wordKind); 493 494 HotSpotVMConfig config = getRuntime().getConfig(); 495 boolean useHSAILDeoptimization = config.useHSAILDeoptimization; 496 boolean useHSAILSafepoints = config.useHSAILSafepoints; 497 498 if ((useHSAILSafepoints == true) && (useHSAILDeoptimization == false)) { 499 Debug.log("+UseHSAILSafepoints requires +UseHSAILDeoptimization"); 500 } 501 502 /* 503 * See what graph nodes we have to see if we are using the thread register. If not, we don't 504 * have to emit the code that sets it up. Maybe there is a better way to do this? 505 */ 506 boolean usesAllocation = false; 507 search: for (AbstractBlock<?> b : lir.linearScanOrder()) { 508 for (LIRInstruction op : lir.getLIRforBlock(b)) { 509 if ((op instanceof HSAILMove.LoadOp) && ((HSAILMove.LoadOp) op).usesThreadRegister()) { 510 usesAllocation = true; 511 assert useHSAILDeoptimization : "cannot use thread register if HSAIL deopt support is disabled"; 512 break search; 513 } 514 } 515 } 516 // save usesAllocation flag in ExternalCompilationResult 517 ((ExternalCompilationResult) crb.compilationResult).setUsesAllocationFlag(usesAllocation); 518 519 // Emit the prologue. 520 HSAILAssembler asm = (HSAILAssembler) crb.asm; 521 asm.emitString0("version 0:95: $full : $large;\n"); 522 523 Signature signature = method.getSignature(); 524 int sigParamCount = signature.getParameterCount(false); 525 // We're subtracting 1 because we're not making the final gid as a parameter. 526 527 int nonConstantParamCount = sigParamCount - 1; 528 boolean isStatic = (method.isStatic()); 529 // Determine if this is an object lambda. 530 boolean isObjectLambda = true; 531 532 if (signature.getParameterType(nonConstantParamCount, null).getKind() == Kind.Int) { 533 isObjectLambda = false; 534 } else { 535 // Add space for gid int reg. 536 nonConstantParamCount++; 537 } 538 539 // If this is an instance method, include the "this" parameter 540 if (!isStatic) { 541 nonConstantParamCount++; 542 } 543 // Add in any "constant" parameters (currently none). 544 int totalParamCount = nonConstantParamCount; 545 JavaType[] paramtypes = new JavaType[totalParamCount]; 546 String[] paramNames = new String[totalParamCount]; 547 int pidx = 0; 548 MetaAccessProvider metaAccess = getProviders().getMetaAccess(); 549 for (int i = 0; i < totalParamCount; i++) { 550 if (i == 0 && !isStatic) { 551 paramtypes[i] = metaAccess.lookupJavaType(Object.class); 552 paramNames[i] = "%_this"; 553 } else if (i < nonConstantParamCount) { 554 if (isObjectLambda && (i == (nonConstantParamCount))) { 555 // Set up the gid register mapping. 556 paramtypes[i] = metaAccess.lookupJavaType(int.class); 557 paramNames[i] = "%_gid"; 558 } else { 559 paramtypes[i] = signature.getParameterType(pidx++, null); 560 paramNames[i] = "%_arg" + i; 561 } 562 } 563 } 564 565 asm.emitString0("// " + (isStatic ? "static" : "instance") + " method " + method + "\n"); 566 asm.emitString0("kernel &run ( \n"); 567 568 FrameMap frameMap = crb.frameMap; 569 RegisterConfig regConfig = frameMap.registerConfig; 570 // Build list of param types which does include the gid (for cc register mapping query). 571 JavaType[] ccParamTypes = new JavaType[nonConstantParamCount + 1]; 572 // Include the gid. 573 System.arraycopy(paramtypes, 0, ccParamTypes, 0, nonConstantParamCount); 574 575 /* 576 * Last entry is always int (its register gets used in the workitemabsid instruction). This 577 * is true even for object stream lambdas. 578 */ 579 if (sigParamCount > 0) { 580 ccParamTypes[ccParamTypes.length - 1] = metaAccess.lookupJavaType(int.class); 581 } 582 CallingConvention cc = regConfig.getCallingConvention(JavaCallee, null, ccParamTypes, getTarget(), false); 583 584 /** 585 * Compute the hsail size mappings up to but not including the last non-constant parameter 586 * (which is the gid). 587 * 588 */ 589 String[] paramHsailSizes = new String[totalParamCount]; 590 for (int i = 0; i < totalParamCount; i++) { 591 String paramtypeStr = paramtypes[i].toString(); 592 String sizeStr = paramTypeMap.get(paramtypeStr); 593 // Catch all for any unmapped paramtype that is u64 (address of an object). 594 paramHsailSizes[i] = (sizeStr != null ? sizeStr : "u64"); 595 } 596 // Emit the kernel function parameters. 597 for (int i = 0; i < totalParamCount; i++) { 598 String str = "align 8 kernarg_" + paramHsailSizes[i] + " " + paramNames[i]; 599 600 if (useHSAILDeoptimization || (i != totalParamCount - 1)) { 601 str += ","; 602 } 603 asm.emitString(str); 604 } 605 606 if (useHSAILDeoptimization) { 607 // add in the deoptInfo parameter 608 asm.emitString("kernarg_u64 " + asm.getDeoptInfoName()); 609 } 610 611 asm.emitString(") {"); 612 613 /* 614 * End of parameters start of prolog code. Emit the load instructions for loading of the 615 * kernel non-constant parameters into registers. The constant class parameters will not be 616 * loaded up front but will be loaded as needed. 617 */ 618 for (int i = 0; i < nonConstantParamCount; i++) { 619 asm.emitString("ld_kernarg_" + paramHsailSizes[i] + " " + HSAIL.mapRegister(cc.getArgument(i)) + ", [" + paramNames[i] + "];"); 620 } 621 622 /* 623 * Emit the workitemaid instruction for loading the hidden gid parameter. This is assigned 624 * the register as if it were the last of the nonConstant parameters. 625 */ 626 String workItemReg = "$s" + Integer.toString(asRegister(cc.getArgument(nonConstantParamCount)).encoding()); 627 asm.emitString("workitemabsid_u32 " + workItemReg + ", 0;"); 628 629 final String deoptInProgressLabel = "@LHandleDeoptInProgress"; 630 631 if (useHSAILDeoptimization) { 632 // Aliases for d16 633 RegisterValue d16_deoptInfo = HSAIL.d16.asValue(wordLIRKind); 634 635 // Aliases for d17 636 RegisterValue d17_donorThreadIndex = HSAIL.d17.asValue(wordLIRKind); 637 RegisterValue d17_safepointFlagAddrIndex = d17_donorThreadIndex; 638 639 // Aliases for s34 640 RegisterValue s34_deoptOccurred = HSAIL.s34.asValue(LIRKind.value(Kind.Int)); 641 RegisterValue s34_donorThreadIndex = s34_deoptOccurred; 642 643 asm.emitLoadKernelArg(d16_deoptInfo, asm.getDeoptInfoName(), "u64"); 644 asm.emitComment("// Check if a deopt or safepoint has occurred and abort if true before doing any work"); 645 646 if (useHSAILSafepoints) { 647 // Load address of _notice_safepoints field 648 asm.emitLoad(wordKind, d17_safepointFlagAddrIndex, new HSAILAddressValue(wordLIRKind, d16_deoptInfo, config.hsailNoticeSafepointsOffset).toAddress()); 649 // Load int value from that field 650 asm.emitLoadAcquire(s34_deoptOccurred, new HSAILAddressValue(wordLIRKind, d17_safepointFlagAddrIndex, 0).toAddress()); 651 asm.emitCompare(Kind.Int, s34_deoptOccurred, Constant.forInt(0), "ne", false, false); 652 asm.cbr(deoptInProgressLabel); 653 } 654 asm.emitLoadAcquire(s34_deoptOccurred, new HSAILAddressValue(wordLIRKind, d16_deoptInfo, config.hsailDeoptOccurredOffset).toAddress()); 655 asm.emitCompare(Kind.Int, s34_deoptOccurred, Constant.forInt(0), "ne", false, false); 656 asm.cbr(deoptInProgressLabel); 657 // load thread register if this kernel performs allocation 658 if (usesAllocation) { 659 RegisterValue threadReg = getProviders().getRegisters().getThreadRegister().asValue(wordLIRKind); 660 assert HsailDonorThreads.getValue() > 0; 661 asm.emitLoad(wordKind, threadReg, new HSAILAddressValue(wordLIRKind, d16_deoptInfo, config.hsailCurTlabInfoOffset).toAddress()); 662 if (HsailDonorThreads.getValue() != 1) { 663 asm.emitComment("// map workitem to a donor thread"); 664 asm.emitString(String.format("rem_u32 $%s, %s, %d;", s34_donorThreadIndex.getRegister(), workItemReg, HsailDonorThreads.getValue())); 665 asm.emitConvert(d17_donorThreadIndex, s34_donorThreadIndex, wordKind, Kind.Int); 666 asm.emit("mad", threadReg, d17_donorThreadIndex, Constant.forInt(8), threadReg); 667 } else { 668 // workitem is already mapped to solitary donor thread 669 } 670 asm.emitComment("// $" + getProviders().getRegisters().getThreadRegister() + " will point to holder of tlab thread info for this workitem"); 671 } 672 } 673 674 /* 675 * Note the logic used for this spillseg size is to leave space and then go back and patch 676 * in the correct size once we have generated all the instructions. This should probably be 677 * done in a more robust way by implementing something like asm.insertString. 678 */ 679 int spillsegDeclarationPosition = asm.position() + 1; 680 String spillsegTemplate = "align 4 spill_u8 %spillseg[123456];"; 681 asm.emitString(spillsegTemplate); 682 // Emit object array load prologue here. 683 if (isObjectLambda) { 684 boolean useCompressedOops = config.useCompressedOops; 685 final int arrayElementsOffset = HotSpotGraalRuntime.getArrayBaseOffset(wordKind); 686 String iterationObjArgReg = HSAIL.mapRegister(cc.getArgument(nonConstantParamCount - 1)); 687 /* 688 * iterationObjArgReg will be the highest $d register in use (it is the last parameter) 689 * so tempReg can be the next higher $d register 690 */ 691 String tmpReg = "$d" + (asRegister(cc.getArgument(nonConstantParamCount - 1)).encoding() + 1); 692 // Convert gid to long. 693 asm.emitString("cvt_u64_s32 " + tmpReg + ", " + workItemReg + "; // Convert gid to long"); 694 // Adjust index for sizeof ref. Where to pull this size from? 695 asm.emitString("mul_u64 " + tmpReg + ", " + tmpReg + ", " + (useCompressedOops ? 4 : 8) + "; // Adjust index for sizeof ref"); 696 // Adjust for actual data start. 697 asm.emitString("add_u64 " + tmpReg + ", " + tmpReg + ", " + arrayElementsOffset + "; // Adjust for actual elements data start"); 698 // Add to array ref ptr. 699 asm.emitString("add_u64 " + tmpReg + ", " + tmpReg + ", " + iterationObjArgReg + "; // Add to array ref ptr"); 700 // Load the object into the parameter reg. 701 if (useCompressedOops) { 702 703 // Load u32 into the d 64 reg since it will become an object address 704 asm.emitString("ld_global_u32 " + tmpReg + ", " + "[" + tmpReg + "]" + "; // Load compressed ptr from array"); 705 706 long narrowOopBase = config.narrowOopBase; 707 long narrowOopShift = config.narrowOopShift; 708 709 if (narrowOopBase == 0 && narrowOopShift == 0) { 710 // No more calculation to do, mov to target register 711 asm.emitString("mov_b64 " + iterationObjArgReg + ", " + tmpReg + "; // no shift or base addition"); 712 } else { 713 if (narrowOopBase == 0) { 714 asm.emitString("shl_u64 " + iterationObjArgReg + ", " + tmpReg + ", " + narrowOopShift + "; // do narrowOopShift"); 715 } else if (narrowOopShift == 0) { 716 // not sure if we ever get add with 0 shift but just in case 717 asm.emitString("cmp_eq_b1_u64 $c0, " + tmpReg + ", 0x0; // avoid add if compressed is null"); 718 asm.emitString("add_u64 " + iterationObjArgReg + ", " + tmpReg + ", " + narrowOopBase + "; // add narrowOopBase"); 719 asm.emitString("cmov_b64 " + iterationObjArgReg + ", $c0, 0x0, " + iterationObjArgReg + "; // avoid add if compressed is null"); 720 } else { 721 asm.emitString("cmp_eq_b1_u64 $c0, " + tmpReg + ", 0x0; // avoid shift-add if compressed is null"); 722 asm.emitString("mad_u64 " + iterationObjArgReg + ", " + tmpReg + ", " + (1 << narrowOopShift) + ", " + narrowOopBase + "; // shift and add narrowOopBase"); 723 asm.emitString("cmov_b64 " + iterationObjArgReg + ", $c0, 0x0, " + iterationObjArgReg + "; // avoid shift-add if compressed is null"); 724 } 725 } 726 727 } else { 728 asm.emitString("ld_global_u64 " + iterationObjArgReg + ", " + "[" + tmpReg + "]" + "; // Load from array element into parameter reg"); 729 } 730 } 731 // Prologue done, Emit code for the LIR. 732 crb.emit(lir); 733 // Now that code is emitted go back and figure out what the upper Bound stack size was. 734 long maxStackSize = ((HSAILAssembler) crb.asm).upperBoundStackSize(); 735 String spillsegStringFinal; 736 if (maxStackSize == 0) { 737 // If no spilling, get rid of spillseg declaration. 738 char[] array = new char[spillsegTemplate.length()]; 739 Arrays.fill(array, ' '); 740 spillsegStringFinal = new String(array); 741 } else { 742 spillsegStringFinal = spillsegTemplate.replace("123456", String.format("%6d", maxStackSize)); 743 } 744 asm.emitString(spillsegStringFinal, spillsegDeclarationPosition); 745 // Emit the epilogue. 746 747 HSAILHotSpotLIRGenerationResult lirGenRes = ((HSAILCompilationResultBuilder) crb).lirGenRes; 748 749 int numSRegs = 0; 750 int numDRegs = 0; 751 int numStackSlotBytes = 0; 752 if (useHSAILDeoptimization) { 753 /* 754 * Get the union of registers and stack slots needed to be saved at the infopoints. 755 * While doing this compute the highest register in each category. 756 */ 757 HSAILHotSpotRegisterConfig hsailRegConfig = (HSAILHotSpotRegisterConfig) regConfig; 758 Set<Register> infoUsedRegs = new TreeSet<>(); 759 Set<StackSlot> infoUsedStackSlots = new HashSet<>(); 760 List<Infopoint> infoList = crb.compilationResult.getInfopoints(); 761 Queue<Value[]> workList = new LinkedList<>(); 762 for (Infopoint info : infoList) { 763 BytecodeFrame frame = info.debugInfo.frame(); 764 while (frame != null) { 765 workList.add(frame.values); 766 frame = frame.caller(); 767 } 768 } 769 while (!workList.isEmpty()) { 770 Value[] values = workList.poll(); 771 for (Value val : values) { 772 if (isLegal(val)) { 773 if (isRegister(val)) { 774 Register reg = asRegister(val); 775 infoUsedRegs.add(reg); 776 if (hsailRegConfig.isAllocatableSReg(reg)) { 777 numSRegs = Math.max(numSRegs, reg.encoding + 1); 778 } else if (hsailRegConfig.isAllocatableDReg(reg)) { 779 numDRegs = Math.max(numDRegs, reg.encoding + 1); 780 } 781 } else if (isStackSlot(val)) { 782 StackSlot slot = asStackSlot(val); 783 Kind slotKind = slot.getKind(); 784 int slotSizeBytes = (slotKind.isObject() ? 8 : slotKind.getByteCount()); 785 int slotOffsetMax = HSAIL.getStackOffsetStart(slot, slotSizeBytes * 8) + slotSizeBytes; 786 numStackSlotBytes = Math.max(numStackSlotBytes, slotOffsetMax); 787 infoUsedStackSlots.add(slot); 788 } else if (isVirtualObject(val)) { 789 workList.add(((VirtualObject) val).getValues()); 790 } else { 791 assert isConstant(val) : "Unsupported value: " + val; 792 } 793 } 794 } 795 } 796 797 // round up numSRegs to even number so dregs start on aligned boundary 798 numSRegs += (numSRegs & 1); 799 800 // numStackSlots is the number of 8-byte locations used for stack variables 801 int numStackSlots = (numStackSlotBytes + 7) / 8; 802 803 final int offsetToDeoptSaveStates = config.hsailDeoptimizationInfoHeaderSize; 804 final int bytesPerSaveArea = 4 * numSRegs + 8 * numDRegs + 8 * numStackSlots; 805 final int sizeofKernelDeopt = config.hsailKernelDeoptimizationHeaderSize + config.hsailFrameHeaderSize + bytesPerSaveArea; 806 final int offsetToNeverRanArray = config.hsailNeverRanArrayOffset; 807 final int offsetToDeoptNextIndex = config.hsailDeoptNextIndexOffset; 808 final int offsetToDeoptimizationWorkItem = config.hsailDeoptimizationWorkItem; 809 final int offsetToDeoptimizationReason = config.hsailDeoptimizationReason; 810 final int offsetToDeoptimizationFrame = config.hsailKernelDeoptimizationHeaderSize; 811 final int offsetToFramePc = config.hsailFramePcOffset; 812 final int offsetToNumSaves = config.hsailFrameNumSRegOffset; 813 final int offsetToSaveArea = config.hsailFrameHeaderSize; 814 815 AllocatableValue scratch64 = HSAIL.d16.asValue(wordLIRKind); 816 AllocatableValue cuSaveAreaPtr = HSAIL.d17.asValue(wordLIRKind); 817 AllocatableValue waveMathScratch1 = HSAIL.d18.asValue(wordLIRKind); 818 AllocatableValue waveMathScratch2 = HSAIL.d19.asValue(wordLIRKind); 819 820 AllocatableValue actionAndReasonReg = HSAIL.actionAndReasonReg.asValue(LIRKind.value(Kind.Int)); 821 AllocatableValue codeBufferOffsetReg = HSAIL.codeBufferOffsetReg.asValue(LIRKind.value(Kind.Int)); 822 AllocatableValue scratch32 = HSAIL.s34.asValue(LIRKind.value(Kind.Int)); 823 AllocatableValue workidreg = HSAIL.s35.asValue(LIRKind.value(Kind.Int)); 824 825 HSAILAddress deoptNextIndexAddr = new HSAILAddressValue(wordLIRKind, scratch64, offsetToDeoptNextIndex).toAddress(); 826 HSAILAddress neverRanArrayAddr = new HSAILAddressValue(wordLIRKind, scratch64, offsetToNeverRanArray).toAddress(); 827 828 // The just-started lanes that see the deopt flag will jump here 829 asm.emitString0(deoptInProgressLabel + ":\n"); 830 asm.emitLoad(wordKind, waveMathScratch1, neverRanArrayAddr); 831 asm.emitWorkItemAbsId(workidreg); 832 asm.emitConvert(waveMathScratch2, workidreg, wordKind, Kind.Int); 833 asm.emit("add", waveMathScratch1, waveMathScratch1, waveMathScratch2); 834 HSAILAddress neverRanStoreAddr = new HSAILAddressValue(wordLIRKind, waveMathScratch1, 0).toAddress(); 835 asm.emitStore(Kind.Byte, Constant.forInt(1), neverRanStoreAddr); 836 asm.emitString("ret;"); 837 838 // The deoptimizing lanes will jump here 839 asm.emitString0(asm.getDeoptLabelName() + ":\n"); 840 String labelExit = asm.getDeoptLabelName() + "_Exit"; 841 842 HSAILAddress deoptInfoAddr = new HSAILAddressValue(wordLIRKind, scratch64, config.hsailDeoptOccurredOffset).toAddress(); 843 asm.emitLoadKernelArg(scratch64, asm.getDeoptInfoName(), "u64"); 844 845 // Set deopt occurred flag 846 asm.emitMov(Kind.Int, scratch32, Constant.forInt(1)); 847 asm.emitStoreRelease(scratch32, deoptInfoAddr); 848 849 asm.emitComment("// Determine next deopt save slot"); 850 asm.emitAtomicAdd(scratch32, deoptNextIndexAddr, Constant.forInt(1)); 851 /* 852 * scratch32 now holds next index to use set error condition if no room in save area 853 */ 854 asm.emitComment("// assert room to save deopt"); 855 asm.emitCompare(Kind.Int, scratch32, Constant.forInt(maxDeoptIndex), "lt", false, false); 856 asm.cbr("@L_StoreDeopt"); 857 /* 858 * if assert fails, store a guaranteed negative workitemid in top level deopt occurred 859 * flag 860 */ 861 asm.emitWorkItemAbsId(scratch32); 862 asm.emit("mad", scratch32, scratch32, Constant.forInt(-1), Constant.forInt(-1)); 863 asm.emitStore(scratch32, deoptInfoAddr); 864 asm.emitString("ret;"); 865 866 asm.emitString0("@L_StoreDeopt" + ":\n"); 867 868 // Store deopt for this workitem into its slot in the HSAILComputeUnitSaveStates array 869 870 asm.emitComment("// Convert id's for ptr math"); 871 asm.emitConvert(cuSaveAreaPtr, scratch32, wordKind, Kind.Int); 872 asm.emitComment("// multiply by sizeof KernelDeoptArea"); 873 asm.emit("mul", cuSaveAreaPtr, cuSaveAreaPtr, Constant.forInt(sizeofKernelDeopt)); 874 asm.emitComment("// Add computed offset to deoptInfoPtr base"); 875 asm.emit("add", cuSaveAreaPtr, cuSaveAreaPtr, scratch64); 876 // Add offset to _deopt_save_states[0] 877 asm.emit("add", scratch64, cuSaveAreaPtr, Constant.forInt(offsetToDeoptSaveStates)); 878 879 HSAILAddress workItemAddr = new HSAILAddressValue(wordLIRKind, scratch64, offsetToDeoptimizationWorkItem).toAddress(); 880 HSAILAddress actionReasonStoreAddr = new HSAILAddressValue(wordLIRKind, scratch64, offsetToDeoptimizationReason).toAddress(); 881 882 asm.emitComment("// Get _deopt_info._first_frame"); 883 asm.emit("add", waveMathScratch1, scratch64, Constant.forInt(offsetToDeoptimizationFrame)); 884 // Now scratch64 is the _deopt_info._first_frame 885 HSAILAddress pcStoreAddr = new HSAILAddressValue(wordLIRKind, waveMathScratch1, offsetToFramePc).toAddress(); 886 HSAILAddress regCountsAddr = new HSAILAddressValue(wordLIRKind, waveMathScratch1, offsetToNumSaves).toAddress(); 887 asm.emitComment("// store deopting workitem"); 888 asm.emitWorkItemAbsId(scratch32); 889 asm.emitStore(Kind.Int, scratch32, workItemAddr); 890 asm.emitComment("// store actionAndReason"); 891 asm.emitStore(Kind.Int, actionAndReasonReg, actionReasonStoreAddr); 892 asm.emitComment("// store PC"); 893 asm.emitStore(Kind.Int, codeBufferOffsetReg, pcStoreAddr); 894 895 asm.emitComment("// store regCounts (" + numSRegs + " $s registers, " + numDRegs + " $d registers, " + numStackSlots + " stack slots)"); 896 asm.emitStore(Kind.Int, Constant.forInt(numSRegs + (numDRegs << 8) + (numStackSlots << 16)), regCountsAddr); 897 898 /* 899 * Loop thru the usedValues storing each of the registers that are used. We always store 900 * in a fixed location, even if some registers are skipped. 901 */ 902 asm.emitComment("// store used regs"); 903 for (Register reg : infoUsedRegs) { 904 if (hsailRegConfig.isAllocatableSReg(reg)) { 905 // 32 bit registers 906 Kind kind = Kind.Int; 907 int ofst = offsetToSaveArea + reg.encoding * 4; 908 HSAILAddress addr = new HSAILAddressValue(wordLIRKind, waveMathScratch1, ofst).toAddress(); 909 AllocatableValue regValue = reg.asValue(LIRKind.value(kind)); 910 asm.emitStore(kind, regValue, addr); 911 } else if (hsailRegConfig.isAllocatableDReg(reg)) { 912 // 64 bit registers 913 Kind kind = Kind.Long; 914 // d reg ofst starts past the 32 sregs 915 int ofst = offsetToSaveArea + (numSRegs * 4) + reg.encoding * 8; 916 HSAILAddress addr = new HSAILAddressValue(wordLIRKind, waveMathScratch1, ofst).toAddress(); 917 AllocatableValue regValue = reg.asValue(LIRKind.value(kind)); 918 asm.emitStore(kind, regValue, addr); 919 } else { 920 throw GraalInternalError.unimplemented(); 921 } 922 } 923 924 // loop thru the usedStackSlots creating instructions to save in the save area 925 if (numStackSlotBytes > 0) { 926 asm.emitComment("// store stack slots (uses " + numStackSlotBytes + " bytes)"); 927 for (StackSlot slot : infoUsedStackSlots) { 928 asm.emitComment("// store " + slot); 929 Kind kind = slot.getKind(); 930 int sizeInBits = (kind.isObject() || kind.getByteCount() == 8 ? 64 : 32); 931 int ofst = offsetToSaveArea + (numSRegs * 4) + (numDRegs * 8) + HSAIL.getStackOffsetStart(slot, sizeInBits); 932 HSAILAddress addr = new HSAILAddressValue(wordLIRKind, waveMathScratch1, ofst).toAddress(); 933 if (sizeInBits == 64) { 934 asm.emitSpillLoad(kind, scratch64, slot); 935 asm.emitStore(kind, scratch64, addr); 936 } else { 937 asm.emitSpillLoad(kind, scratch32, slot); 938 asm.emitStore(kind, scratch32, addr); 939 } 940 } 941 } 942 943 asm.emitString0(labelExit + ":\n"); 944 945 // and emit the return 946 crb.frameContext.leave(crb); 947 asm.exit(); 948 // build the oopMap Array 949 int[] oopMapArray = new OopMapArrayBuilder().build(infoList, numSRegs, numDRegs, numStackSlots, hsailRegConfig); 950 ((ExternalCompilationResult) crb.compilationResult).setOopMapArray(oopMapArray); 951 } else { 952 // Deoptimization is explicitly off, so emit simple return 953 asm.emitString0(asm.getDeoptLabelName() + ":\n"); 954 asm.emitComment("// No deoptimization"); 955 asm.emitString("ret;"); 956 } 957 958 asm.emitString0("}; \n"); 959 960 ExternalCompilationResult compilationResult = (ExternalCompilationResult) crb.compilationResult; 961 if (useHSAILDeoptimization) { 962 compilationResult.setHostGraph(prepareHostGraph(method, lirGenRes.getDeopts(), getProviders(), config, numSRegs, numDRegs)); 963 } 964 } 965 966 private static class OopMapArrayBuilder { 967 // oopMapArray struct 968 // int bytesPerSaveArea; (not strictly part of oopsmap but convenient to put here) 969 // int intsPerInfopoint; 970 static final int SAVEAREACOUNTS_OFST = 0; 971 static final int INTSPERINFOPOINT_OFST = 1; 972 static final int HEADERSIZE = 2; 973 // for each infopoint: 974 // int deoptId 975 // one or more ints of bits for the oopmap 976 977 private int[] array; 978 private int intsPerInfopoint; 979 980 int[] build(List<Infopoint> infoList, int numSRegs, int numDRegs, int numStackSlots, HSAILHotSpotRegisterConfig hsailRegConfig) { 981 /* 982 * We are told that infoList is always sorted. Each infoPoint can have a different 983 * oopMap. Since numStackSlots is the number of 8-byte stack slots used, it is an upper 984 * limit on the number of oop stack slots 985 */ 986 int bitsPerInfopoint = numDRegs + numStackSlots; 987 int intsForBits = (bitsPerInfopoint + 31) / 32; 988 int numInfopoints = infoList.size(); 989 intsPerInfopoint = intsForBits + 1; // +1 for the pcoffset 990 int arraySize = HEADERSIZE + (numInfopoints * intsPerInfopoint); 991 array = new int[arraySize]; 992 array[INTSPERINFOPOINT_OFST] = intsPerInfopoint; 993 // compute saveAreaCounts 994 int saveAreaCounts = (numSRegs & 0xff) + (numDRegs << 8) + (numStackSlots << 16); 995 array[SAVEAREACOUNTS_OFST] = saveAreaCounts; 996 997 // loop thru the infoList 998 int infoIndex = 0; 999 for (Infopoint info : infoList) { 1000 setOopMapPcOffset(infoIndex, info.pcOffset); 1001 BytecodeFrame frame = info.debugInfo.frame(); 1002 while (frame != null) { 1003 for (int i = 0; i < frame.numLocals + frame.numStack; i++) { 1004 Value val = frame.values[i]; 1005 if (isLegal(val)) { 1006 if (isRegister(val)) { 1007 Register reg = asRegister(val); 1008 if (val.getKind().isObject()) { 1009 assert (hsailRegConfig.isAllocatableDReg(reg)); 1010 int bitIndex = reg.encoding(); 1011 setOopMapBit(infoIndex, bitIndex); 1012 } 1013 } else if (isStackSlot(val)) { 1014 StackSlot slot = asStackSlot(val); 1015 if (val.getKind().isObject()) { 1016 assert (HSAIL.getStackOffsetStart(slot, 64) % 8 == 0); 1017 int bitIndex = numDRegs + HSAIL.getStackOffsetStart(slot, 64) / 8; 1018 setOopMapBit(infoIndex, bitIndex); 1019 } 1020 } 1021 } 1022 } 1023 frame = frame.caller(); 1024 } 1025 infoIndex++; 1026 } 1027 try (Scope s = Debug.scope("CodeGen")) { 1028 if (Debug.isLogEnabled()) { 1029 Debug.log("numSRegs=%d, numDRegs=%d, numStackSlots=%d", numSRegs, numDRegs, numStackSlots); 1030 // show infopoint oopmap details 1031 for (infoIndex = 0; infoIndex < infoList.size(); infoIndex++) { 1032 String infoString = "Infopoint " + infoIndex + ", pcOffset=" + getOopMapPcOffset(infoIndex) + ", oopmap="; 1033 for (int i = 0; i < intsForBits; i++) { 1034 infoString += (i != 0 ? ", " : "") + Integer.toHexString(getOopMapBitsAsInt(infoIndex, i)); 1035 } 1036 Debug.log(infoString); 1037 } 1038 } 1039 } catch (Throwable e) { 1040 throw Debug.handle(e); 1041 } 1042 1043 return array; 1044 } 1045 1046 private void setOopMapPcOffset(int infoIndex, int pcOffset) { 1047 int arrIndex = HEADERSIZE + infoIndex * intsPerInfopoint; 1048 array[arrIndex] = pcOffset; 1049 } 1050 1051 private int getOopMapPcOffset(int infoIndex) { 1052 int arrIndex = HEADERSIZE + infoIndex * intsPerInfopoint; 1053 return array[arrIndex]; 1054 } 1055 1056 private void setOopMapBit(int infoIndex, int bitIndex) { 1057 int arrIndex = HEADERSIZE + infoIndex * intsPerInfopoint + 1 + bitIndex / 32; 1058 array[arrIndex] |= (1 << (bitIndex % 32)); 1059 } 1060 1061 private int getOopMapBitsAsInt(int infoIndex, int intIndex) { 1062 int arrIndex = HEADERSIZE + infoIndex * intsPerInfopoint + 1 + intIndex; 1063 return array[arrIndex]; 1064 } 1065 } 1066 1067 private static StructuredGraph prepareHostGraph(ResolvedJavaMethod method, List<DeoptimizingOp> deopts, HotSpotProviders providers, HotSpotVMConfig config, int numSRegs, int numDRegs) { 1068 if (deopts.isEmpty()) { 1069 return null; 1070 } 1071 StructuredGraph hostGraph = new StructuredGraph(method, -2); 1072 ParameterNode deoptId = hostGraph.unique(new ParameterNode(0, StampFactory.intValue())); 1073 ParameterNode hsailFrame = hostGraph.unique(new ParameterNode(1, StampFactory.forKind(providers.getCodeCache().getTarget().wordKind))); 1074 ParameterNode reasonAndAction = hostGraph.unique(new ParameterNode(2, StampFactory.intValue())); 1075 ParameterNode speculation = hostGraph.unique(new ParameterNode(3, StampFactory.object())); 1076 BeginNode[] branches = new BeginNode[deopts.size() + 1]; 1077 int[] keys = new int[deopts.size()]; 1078 int[] keySuccessors = new int[deopts.size() + 1]; 1079 double[] keyProbabilities = new double[deopts.size() + 1]; 1080 int i = 0; 1081 Collections.sort(deopts, new Comparator<DeoptimizingOp>() { 1082 public int compare(DeoptimizingOp o1, DeoptimizingOp o2) { 1083 return o1.getCodeBufferPos() - o2.getCodeBufferPos(); 1084 } 1085 }); 1086 for (DeoptimizingOp deopt : deopts) { 1087 keySuccessors[i] = i; 1088 keyProbabilities[i] = 1.0 / deopts.size(); 1089 keys[i] = deopt.getCodeBufferPos(); 1090 assert keys[i] >= 0; 1091 branches[i] = createHostDeoptBranch(deopt, hsailFrame, reasonAndAction, speculation, providers, config, numSRegs, numDRegs); 1092 1093 i++; 1094 } 1095 keyProbabilities[deopts.size()] = 0; // default 1096 keySuccessors[deopts.size()] = deopts.size(); 1097 branches[deopts.size()] = createHostCrashBranch(hostGraph, deoptId); 1098 IntegerSwitchNode switchNode = hostGraph.add(new IntegerSwitchNode(deoptId, branches, keys, keyProbabilities, keySuccessors)); 1099 StartNode start = hostGraph.start(); 1100 start.setNext(switchNode); 1101 /* 1102 * printf.setNext(printf2); printf2.setNext(switchNode); 1103 */ 1104 hostGraph.setGuardsStage(GuardsStage.AFTER_FSA); 1105 return hostGraph; 1106 } 1107 1108 private static BeginNode createHostCrashBranch(StructuredGraph hostGraph, ValueNode deoptId) { 1109 VMErrorNode vmError = hostGraph.add(new VMErrorNode("Error in HSAIL deopt. DeoptId=%d", deoptId)); 1110 // ConvertNode.convert(hostGraph, Kind.Long, deoptId))); 1111 vmError.setNext(hostGraph.add(new ReturnNode(ConstantNode.defaultForKind(hostGraph.method().getSignature().getReturnKind(), hostGraph)))); 1112 return BeginNode.begin(vmError); 1113 } 1114 1115 private static BeginNode createHostDeoptBranch(DeoptimizingOp deopt, ParameterNode hsailFrame, ValueNode reasonAndAction, ValueNode speculation, HotSpotProviders providers, 1116 HotSpotVMConfig config, int numSRegs, int numDRegs) { 1117 BeginNode branch = hsailFrame.graph().add(new BeginNode()); 1118 DynamicDeoptimizeNode deoptimization = hsailFrame.graph().add(new DynamicDeoptimizeNode(reasonAndAction, speculation)); 1119 deoptimization.setStateBefore(createFrameState(deopt.getFrameState().topFrame, hsailFrame, providers, config, numSRegs, numDRegs)); 1120 branch.setNext(deoptimization); 1121 return branch; 1122 } 1123 1124 private static FrameState createFrameState(BytecodeFrame lowLevelFrame, ParameterNode hsailFrame, HotSpotProviders providers, HotSpotVMConfig config, int numSRegs, int numDRegs) { 1125 return createFrameState(lowLevelFrame, hsailFrame, providers, config, numSRegs, numDRegs, new HashMap<VirtualObject, VirtualObjectNode>()); 1126 } 1127 1128 private static FrameState createFrameState(BytecodeFrame lowLevelFrame, ParameterNode hsailFrame, HotSpotProviders providers, HotSpotVMConfig config, int numSRegs, int numDRegs, 1129 Map<VirtualObject, VirtualObjectNode> virtualObjects) { 1130 FrameState outterFrameState = null; 1131 if (lowLevelFrame.caller() != null) { 1132 outterFrameState = createFrameState(lowLevelFrame.caller(), hsailFrame, providers, config, numSRegs, numDRegs, virtualObjects); 1133 } 1134 StructuredGraph hostGraph = hsailFrame.graph(); 1135 Function<? super Value, ? extends ValueNode> lirValueToHirNode = v -> getNodeForValueFromFrame(v, hsailFrame, hostGraph, providers, config, numSRegs, numDRegs, virtualObjects); 1136 ValueNode[] locals = new ValueNode[lowLevelFrame.numLocals]; 1137 for (int i = 0; i < lowLevelFrame.numLocals; i++) { 1138 locals[i] = lirValueToHirNode.apply(lowLevelFrame.getLocalValue(i)); 1139 } 1140 List<ValueNode> stack = new ArrayList<>(lowLevelFrame.numStack); 1141 for (int i = 0; i < lowLevelFrame.numStack; i++) { 1142 stack.add(lirValueToHirNode.apply(lowLevelFrame.getStackValue(i))); 1143 } 1144 ValueNode[] locks = new ValueNode[lowLevelFrame.numLocks]; 1145 MonitorIdNode[] monitorIds = new MonitorIdNode[lowLevelFrame.numLocks]; 1146 for (int i = 0; i < lowLevelFrame.numLocks; i++) { 1147 HotSpotMonitorValue lockValue = (HotSpotMonitorValue) lowLevelFrame.getLockValue(i); 1148 locks[i] = lirValueToHirNode.apply(lockValue); 1149 monitorIds[i] = getMonitorIdForHotSpotMonitorValueFromFrame(lockValue, hsailFrame, hostGraph); 1150 } 1151 FrameState frameState = hostGraph.add(new FrameState(lowLevelFrame.getMethod(), lowLevelFrame.getBCI(), locals, stack, locks, monitorIds, lowLevelFrame.rethrowException, false)); 1152 if (outterFrameState != null) { 1153 frameState.setOuterFrameState(outterFrameState); 1154 } 1155 Map<VirtualObject, VirtualObjectNode> virtualObjectsCopy; 1156 // TODO this could be implemented more efficiently with a mark into the map 1157 // unfortunately LinkedHashMap doesn't seem to provide that. 1158 List<VirtualObjectState> virtualStates = new ArrayList<>(virtualObjects.size()); 1159 do { 1160 virtualObjectsCopy = new HashMap<>(virtualObjects); 1161 virtualStates.clear(); 1162 for (Entry<VirtualObject, VirtualObjectNode> entry : virtualObjectsCopy.entrySet()) { 1163 VirtualObject virtualObject = entry.getKey(); 1164 VirtualObjectNode virtualObjectNode = entry.getValue(); 1165 List<ValueNode> fieldValues = Arrays.stream(virtualObject.getValues()).map(lirValueToHirNode).collect(Collectors.toList()); 1166 virtualStates.add(new VirtualObjectState(virtualObjectNode, fieldValues)); 1167 } 1168 // New virtual objects may have been discovered while processing the previous set. 1169 // Wait until a fixed point is reached 1170 } while (virtualObjectsCopy.size() < virtualObjects.size()); 1171 virtualStates.forEach(vos -> frameState.addVirtualObjectMapping(hostGraph.unique(vos))); 1172 return frameState; 1173 } 1174 1175 @SuppressWarnings("unused") 1176 private static MonitorIdNode getMonitorIdForHotSpotMonitorValueFromFrame(HotSpotMonitorValue lockValue, ParameterNode hsailFrame, StructuredGraph hsailGraph) { 1177 if (lockValue.isEliminated()) { 1178 return null; 1179 } 1180 throw GraalInternalError.unimplemented(); 1181 } 1182 1183 private static ValueNode getNodeForValueFromFrame(Value localValue, ParameterNode hsailFrame, StructuredGraph hostGraph, HotSpotProviders providers, HotSpotVMConfig config, int numSRegs, 1184 int numDRegs, Map<VirtualObject, VirtualObjectNode> virtualObjects) { 1185 ValueNode valueNode; 1186 if (localValue instanceof Constant) { 1187 valueNode = ConstantNode.forConstant((Constant) localValue, providers.getMetaAccess(), hostGraph); 1188 } else if (localValue instanceof VirtualObject) { 1189 valueNode = getNodeForVirtualObjectFromFrame((VirtualObject) localValue, virtualObjects, hostGraph); 1190 } else if (localValue instanceof StackSlot) { 1191 StackSlot slot = (StackSlot) localValue; 1192 valueNode = getNodeForStackSlotFromFrame(slot, localValue.getKind(), hsailFrame, hostGraph, providers, config, numSRegs, numDRegs); 1193 } else if (localValue instanceof HotSpotMonitorValue) { 1194 HotSpotMonitorValue hotSpotMonitorValue = (HotSpotMonitorValue) localValue; 1195 return getNodeForValueFromFrame(hotSpotMonitorValue.getOwner(), hsailFrame, hostGraph, providers, config, numSRegs, numDRegs, virtualObjects); 1196 } else if (localValue instanceof RegisterValue) { 1197 RegisterValue registerValue = (RegisterValue) localValue; 1198 int regNumber = registerValue.getRegister().number; 1199 valueNode = getNodeForRegisterFromFrame(regNumber, localValue.getKind(), hsailFrame, hostGraph, providers, config, numSRegs); 1200 } else if (Value.ILLEGAL.equals(localValue)) { 1201 valueNode = null; 1202 } else { 1203 throw GraalInternalError.shouldNotReachHere(); 1204 } 1205 return valueNode; 1206 } 1207 1208 private static ValueNode getNodeForVirtualObjectFromFrame(VirtualObject virtualObject, Map<VirtualObject, VirtualObjectNode> virtualObjects, StructuredGraph hostGraph) { 1209 return virtualObjects.computeIfAbsent(virtualObject, vo -> { 1210 if (vo.getType().isArray()) { 1211 return hostGraph.add(new VirtualArrayNode(vo.getType().getComponentType(), vo.getValues().length)); 1212 } else { 1213 return hostGraph.add(new VirtualInstanceNode(vo.getType(), true)); 1214 } 1215 }); 1216 } 1217 1218 private static ValueNode getNodeForRegisterFromFrame(int regNumber, Kind valueKind, ParameterNode hsailFrame, StructuredGraph hostGraph, HotSpotProviders providers, HotSpotVMConfig config, 1219 int numSRegs) { 1220 ValueNode valueNode; 1221 LocationNode location; 1222 int longSize = providers.getCodeCache().getTarget().arch.getSizeInBytes(Kind.Long); 1223 int intSize = providers.getCodeCache().getTarget().arch.getSizeInBytes(Kind.Int); 1224 if (regNumber >= HSAIL.s0.number && regNumber <= HSAIL.s31.number) { 1225 long offset = config.hsailFrameHeaderSize + intSize * (regNumber - HSAIL.s0.number); 1226 location = ConstantLocationNode.create(FINAL_LOCATION, valueKind, offset, hostGraph); 1227 } else if (regNumber >= HSAIL.d0.number && regNumber <= HSAIL.d15.number) { 1228 long offset = config.hsailFrameHeaderSize + intSize * numSRegs + longSize * (regNumber - HSAIL.d0.number); 1229 location = ConstantLocationNode.create(FINAL_LOCATION, valueKind, offset, hostGraph); 1230 } else { 1231 throw GraalInternalError.shouldNotReachHere("unknown hsail register: " + regNumber); 1232 } 1233 valueNode = hostGraph.unique(new FloatingReadNode(hsailFrame, location, null, StampFactory.forKind(valueKind))); 1234 return valueNode; 1235 } 1236 1237 private static ValueNode getNodeForStackSlotFromFrame(StackSlot slot, Kind valueKind, ParameterNode hsailFrame, StructuredGraph hostGraph, HotSpotProviders providers, HotSpotVMConfig config, 1238 int numSRegs, int numDRegs) { 1239 int slotSizeInBits = (valueKind == Kind.Object ? 64 : valueKind.getByteCount() * 8); 1240 if ((slotSizeInBits == 32) || (slotSizeInBits == 64)) { 1241 int longSize = providers.getCodeCache().getTarget().arch.getSizeInBytes(Kind.Long); 1242 int intSize = providers.getCodeCache().getTarget().arch.getSizeInBytes(Kind.Int); 1243 long offset = config.hsailFrameHeaderSize + (intSize * numSRegs) + (longSize * numDRegs) + HSAIL.getStackOffsetStart(slot, slotSizeInBits); 1244 LocationNode location = ConstantLocationNode.create(FINAL_LOCATION, valueKind, offset, hostGraph); 1245 ValueNode valueNode = hostGraph.unique(new FloatingReadNode(hsailFrame, location, null, StampFactory.forKind(valueKind))); 1246 return valueNode; 1247 } else { 1248 throw GraalInternalError.shouldNotReachHere("unsupported stack slot kind: " + valueKind); 1249 } 1250 } 1251 }