1 /* 2 * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 package com.oracle.graal.hotspot.hsail.replacements; 24 25 import static com.oracle.graal.api.code.UnsignedMath.*; 26 import static com.oracle.graal.compiler.common.GraalOptions.*; 27 import static com.oracle.graal.hotspot.hsail.replacements.HSAILHotSpotReplacementsUtil.*; 28 import static com.oracle.graal.hotspot.hsail.replacements.HSAILNewObjectSnippets.Options.*; 29 import static com.oracle.graal.nodes.PiArrayNode.*; 30 import static com.oracle.graal.nodes.extended.BranchProbabilityNode.*; 31 import static com.oracle.graal.replacements.SnippetTemplate.*; 32 33 import com.oracle.graal.api.code.*; 34 import com.oracle.graal.api.meta.*; 35 import com.oracle.graal.compiler.common.type.*; 36 import com.oracle.graal.debug.*; 37 import com.oracle.graal.hotspot.*; 38 import com.oracle.graal.hotspot.meta.*; 39 import com.oracle.graal.hotspot.replacements.*; 40 import com.oracle.graal.hotspot.stubs.*; 41 import com.oracle.graal.nodes.*; 42 import com.oracle.graal.nodes.java.*; 43 import com.oracle.graal.nodes.spi.*; 44 import com.oracle.graal.options.*; 45 import com.oracle.graal.replacements.*; 46 import com.oracle.graal.replacements.Snippet.ConstantParameter; 47 import com.oracle.graal.replacements.SnippetTemplate.AbstractTemplates; 48 import com.oracle.graal.replacements.SnippetTemplate.Arguments; 49 import com.oracle.graal.replacements.SnippetTemplate.SnippetInfo; 50 import com.oracle.graal.word.*; 51 52 /** 53 * HSAIL-specific Snippets used for implementing NEW and NEWARRAY. 54 */ 55 public class HSAILNewObjectSnippets extends NewObjectSnippets { 56 57 static public class Options { 58 59 // @formatter:off 60 @Option(help = "In HSAIL allocation, allow allocation from eden as fallback if TLAB is full") 61 static final OptionValue<Boolean> HsailUseEdenAllocate = new OptionValue<>(false); 62 63 @Option(help = "In HSAIL allocation, allow GPU to allocate a new tlab if TLAB is full") 64 static final OptionValue<Boolean> HsailNewTlabAllocate = new OptionValue<>(true); 65 66 @Option(help = "Estimate of number of bytes allocated by each HSAIL workitem, used to size TLABs") 67 static public final OptionValue<Integer> HsailAllocBytesPerWorkitem = new OptionValue<>(64); 68 69 // @formatter:on 70 } 71 72 private static final boolean hsailUseEdenAllocate = HsailUseEdenAllocate.getValue(); 73 private static final boolean hsailNewTlabAllocate = HsailNewTlabAllocate.getValue(); 74 75 protected static Word fillNewTlabInfoWithTlab(Word oldTlabInfo) { 76 Word allocInfo = readTlabInfoAllocInfo(oldTlabInfo); 77 Word newTlabInfo = atomicGetAndAddAllocInfoTlabInfosPoolNext(allocInfo, config().hsailTlabInfoSize); 78 Word tlabInfosPoolEnd = readAllocInfoTlabInfosPoolEnd(allocInfo); 79 if (newTlabInfo.aboveOrEqual(tlabInfosPoolEnd)) { 80 // could not get a new tlab info, mark zero and we will later deoptimize 81 return (Word.zero()); 82 } 83 84 // make new size depend on old tlab size 85 Word newTlabSize = readTlabInfoEnd(oldTlabInfo).subtract(readTlabInfoStart(oldTlabInfo)); 86 // try to allocate a new tlab 87 Word tlabStart = NewInstanceStub.edenAllocate(newTlabSize, false); 88 writeTlabInfoStart(newTlabInfo, tlabStart); // write this field even if zero 89 if (tlabStart.equal(0)) { 90 // could not get a new tlab, mark zero and we will later deoptimize 91 return (Word.zero()); 92 } 93 // here we have a new tlab and a tlabInfo, we can fill it in 94 writeTlabInfoTop(newTlabInfo, tlabStart); 95 writeTlabInfoOriginalTop(newTlabInfo, tlabStart); 96 // set end so that we leave space for the tlab "alignment reserve" 97 Word alignReserveBytes = readAllocInfoTlabAlignReserveBytes(allocInfo); 98 writeTlabInfoEnd(newTlabInfo, tlabStart.add(newTlabSize.subtract(alignReserveBytes))); 99 writeTlabInfoAllocInfo(newTlabInfo, allocInfo); 100 writeTlabInfoTlab(newTlabInfo, readTlabInfoTlab(oldTlabInfo)); 101 return (newTlabInfo); 102 } 103 104 protected static Word allocateFromTlabSlowPath(Word fastPathTlabInfo, int size, Word fastPathTop, Word fastPathEnd) { 105 // eventually this will be a separate call, not inlined 106 107 // we come here from the fastpath allocation 108 // here we know that the tlab has overflowed (top + size > end) 109 // find out if we are the first overflower 110 Word tlabInfo = fastPathTlabInfo; 111 Word top = fastPathTop; 112 Word end = fastPathEnd; 113 114 // start a loop where we try to get a new tlab and then try to allocate from it 115 // keep doing this until we run out of tlabs or tlabInfo structures 116 // initialize result with error return value 117 Word result = Word.zero(); 118 while (result.equal(Word.zero()) && tlabInfo.notEqual(Word.zero())) { 119 boolean firstOverflower = top.belowOrEqual(end); 120 if (firstOverflower) { 121 // store the last good top before overflow into last_good_top field 122 // we will move it back into top later when back in the VM 123 writeTlabInfoLastGoodTop(tlabInfo, top); 124 } 125 126 // if all this allocate tlab from gpu logic is disabled, 127 // just immediately set tlabInfo to 0 here 128 if (!hsailNewTlabAllocate) { 129 tlabInfo = Word.zero(); 130 } else { 131 // loop here waiting for the first overflower to get a new tlab 132 // note that on an hsa device we must be careful how we loop in order to ensure 133 // "forward progress". For example we must not break out of the loop. 134 Word oldTlabInfo = tlabInfo; 135 do { 136 if (firstOverflower) { 137 // allocate new tlabInfo and new tlab to fill it, returning 0 if any 138 // problems 139 // this will get all spinners out of this loop. 140 tlabInfo = fillNewTlabInfoWithTlab(oldTlabInfo); 141 writeTlabInfoPtrStoreRelease(tlabInfo); 142 } else { 143 tlabInfo = getTlabInfoPtrLoadAcquire(); 144 } 145 } while (tlabInfo.equal(oldTlabInfo)); 146 // when we get out of the loop if tlabInfoPtr contains 0, it means we 147 // can't get any more tlabs and will have to deoptimize 148 // otherwise, we have a valid new tlabInfo/tlab and can try to allocate again. 149 if (tlabInfo.notEqual(0)) { 150 top = atomicGetAndAddTlabInfoTop(tlabInfo, size); 151 end = readTlabInfoEnd(tlabInfo); 152 Word newTop = top.add(size); 153 if (probability(FAST_PATH_PROBABILITY, newTop.belowOrEqual(end))) { 154 result = top; 155 } 156 } 157 } 158 } // while (result == 0) && (tlabInfo != 0)) 159 return result; 160 } 161 162 protected static Object addressToFormattedObject(Word addr, @ConstantParameter int size, Word hub, Word prototypeMarkWord, @ConstantParameter boolean fillContents, 163 @ConstantParameter String typeContext) { 164 Object result = formatObject(hub, size, addr, prototypeMarkWord, fillContents, true, true); 165 profileAllocation("instance", size, typeContext); 166 return piCast(verifyOop(result), StampFactory.forNodeIntrinsic()); 167 } 168 169 @Snippet 170 public static Object allocateInstanceAtomic(@ConstantParameter int size, Word hub, Word prototypeMarkWord, @ConstantParameter boolean fillContents, @ConstantParameter String typeContext) { 171 boolean haveResult = false; 172 if (useTLAB()) { 173 // inlining this manually here because it resulted in better fastpath codegen 174 Word tlabInfo = getTlabInfoPtr(); 175 if (probability(FAST_PATH_PROBABILITY, tlabInfo.notEqual(0))) { 176 Word top = atomicGetAndAddTlabInfoTop(tlabInfo, size); 177 Word end = readTlabInfoEnd(tlabInfo); 178 Word newTop = top.add(size); 179 if (probability(FAST_PATH_PROBABILITY, newTop.belowOrEqual(end))) { 180 return addressToFormattedObject(top, size, hub, prototypeMarkWord, fillContents, typeContext); 181 } else { 182 Word addr = allocateFromTlabSlowPath(tlabInfo, size, top, end); 183 if (addr.notEqual(0)) { 184 return addressToFormattedObject(addr, size, hub, prototypeMarkWord, fillContents, typeContext); 185 } 186 } 187 } 188 } 189 190 // we could not allocate from tlab, try allocating directly from eden 191 if (hsailUseEdenAllocate) { 192 // false for no logging 193 Word addr = NewInstanceStub.edenAllocate(Word.unsigned(size), false); 194 if (addr.notEqual(0)) { 195 new_eden.inc(); 196 return addressToFormattedObject(addr, size, hub, prototypeMarkWord, fillContents, typeContext); 197 } 198 } 199 // haveResult test here helps avoid dropping earlier stores were seen to be dropped without 200 // this. 201 if (!haveResult) { 202 DeoptimizeNode.deopt(DeoptimizationAction.None, DeoptimizationReason.RuntimeConstraint); 203 } 204 // will never get here but this keeps the compiler happy 205 return Word.zero().toObject(); 206 } 207 208 @Snippet 209 public static Object allocateArrayAtomic(Word hub, int length, Word prototypeMarkWord, @ConstantParameter int headerSize, @ConstantParameter int log2ElementSize, 210 @ConstantParameter boolean fillContents, @ConstantParameter boolean maybeUnroll, @ConstantParameter String typeContext) { 211 if (!belowThan(length, MAX_ARRAY_FAST_PATH_ALLOCATION_LENGTH)) { 212 // This handles both negative array sizes and very large array sizes 213 DeoptimizeNode.deopt(DeoptimizationAction.None, DeoptimizationReason.RuntimeConstraint); 214 } 215 return allocateArrayAtomicImpl(hub, length, prototypeMarkWord, headerSize, log2ElementSize, fillContents, maybeUnroll, typeContext); 216 } 217 218 protected static Object addressToFormattedArray(Word addr, int allocationSize, int length, int headerSize, Word hub, Word prototypeMarkWord, boolean fillContents, boolean maybeUnroll, 219 @ConstantParameter String typeContext) { 220 // we are not in a stub so we can set useSnippetCounters to true 221 Object result = formatArray(hub, allocationSize, length, headerSize, addr, prototypeMarkWord, fillContents, maybeUnroll, true); 222 profileAllocation("array", allocationSize, typeContext); 223 return piArrayCast(verifyOop(result), length, StampFactory.forNodeIntrinsic()); 224 } 225 226 private static Object allocateArrayAtomicImpl(Word hub, int length, Word prototypeMarkWord, int headerSize, int log2ElementSize, boolean fillContents, boolean maybeUnroll, String typeContext) { 227 int alignment = wordSize(); 228 int allocationSize = computeArrayAllocationSize(length, alignment, headerSize, log2ElementSize); 229 boolean haveResult = false; 230 if (useTLAB()) { 231 // inlining this manually here because it resulted in better fastpath codegen 232 Word tlabInfo = getTlabInfoPtr(); 233 if (probability(FAST_PATH_PROBABILITY, tlabInfo.notEqual(0))) { 234 Word top = atomicGetAndAddTlabInfoTop(tlabInfo, allocationSize); 235 Word end = readTlabInfoEnd(tlabInfo); 236 Word newTop = top.add(allocationSize); 237 if (probability(FAST_PATH_PROBABILITY, newTop.belowOrEqual(end))) { 238 return addressToFormattedArray(top, allocationSize, length, headerSize, hub, prototypeMarkWord, fillContents, maybeUnroll, typeContext); 239 } else { 240 Word addr = allocateFromTlabSlowPath(tlabInfo, allocationSize, top, end); 241 if (addr.notEqual(0)) { 242 return addressToFormattedArray(addr, allocationSize, length, headerSize, hub, prototypeMarkWord, fillContents, maybeUnroll, typeContext); 243 } 244 } 245 } 246 } 247 248 // we could not allocate from tlab, try allocating directly from eden 249 if (hsailUseEdenAllocate) { 250 // false for no logging 251 Word addr = NewInstanceStub.edenAllocate(Word.unsigned(allocationSize), false); 252 if (addr.notEqual(0)) { 253 newarray_eden.inc(); 254 return addressToFormattedArray(addr, allocationSize, length, headerSize, hub, prototypeMarkWord, fillContents, maybeUnroll, typeContext); 255 } 256 } 257 if (!haveResult) { 258 DeoptimizeNode.deopt(DeoptimizationAction.None, DeoptimizationReason.RuntimeConstraint); 259 } 260 // will never get here but this keeps the compiler happy 261 return Word.zero().toObject(); 262 } 263 264 public static class Templates extends AbstractTemplates { 265 266 private final SnippetInfo allocateInstance = snippet(HSAILNewObjectSnippets.class, "allocateInstanceAtomic"); 267 private final SnippetInfo allocateArray = snippet(HSAILNewObjectSnippets.class, "allocateArrayAtomic"); 268 269 // private final SnippetInfo allocateArrayDynamic = snippet(NewObjectSnippets.class, 270 // "allocateArrayDynamic"); 271 // private final SnippetInfo newmultiarray = snippet(NewObjectSnippets.class, 272 // "newmultiarray"); 273 274 public Templates(HotSpotProviders providers, TargetDescription target) { 275 super(providers, providers.getSnippetReflection(), target); 276 } 277 278 /** 279 * Lowers a {@link NewInstanceNode}. 280 */ 281 public void lower(NewInstanceNode newInstanceNode, LoweringTool tool) { 282 StructuredGraph graph = newInstanceNode.graph(); 283 HotSpotResolvedObjectType type = (HotSpotResolvedObjectType) newInstanceNode.instanceClass(); 284 assert !type.isArray(); 285 ConstantNode hub = ConstantNode.forConstant(type.klass(), providers.getMetaAccess(), graph); 286 int size = instanceSize(type); 287 288 Arguments args = new Arguments(allocateInstance, graph.getGuardsStage(), tool.getLoweringStage()); 289 args.addConst("size", size); 290 args.add("hub", hub); 291 args.add("prototypeMarkWord", type.prototypeMarkWord()); 292 args.addConst("fillContents", newInstanceNode.fillContents()); 293 args.addConst("typeContext", type.toJavaName(false)); 294 295 SnippetTemplate template = template(args); 296 Debug.log("Lowering allocateInstance in %s: node=%s, template=%s, arguments=%s", graph, newInstanceNode, template, args); 297 template.instantiate(providers.getMetaAccess(), newInstanceNode, DEFAULT_REPLACER, args); 298 } 299 300 /** 301 * Lowers a {@link NewArrayNode}. 302 */ 303 public void lower(NewArrayNode newArrayNode, LoweringTool tool) { 304 StructuredGraph graph = newArrayNode.graph(); 305 ResolvedJavaType elementType = newArrayNode.elementType(); 306 HotSpotResolvedObjectType arrayType = (HotSpotResolvedObjectType) elementType.getArrayClass(); 307 Kind elementKind = elementType.getKind(); 308 ConstantNode hub = ConstantNode.forConstant(arrayType.klass(), providers.getMetaAccess(), graph); 309 final int headerSize = HotSpotGraalRuntime.getArrayBaseOffset(elementKind); 310 // lowerer extends HotSpotLoweringProvider so we can just use that 311 HotSpotLoweringProvider lowerer = (HotSpotLoweringProvider) providers.getLowerer(); 312 int log2ElementSize = CodeUtil.log2(lowerer.arrayScalingFactor(elementKind)); 313 314 Arguments args = new Arguments(allocateArray, graph.getGuardsStage(), tool.getLoweringStage()); 315 args.add("hub", hub); 316 ValueNode length = newArrayNode.length(); 317 args.add("length", length.isAlive() ? length : graph.addOrUniqueWithInputs(length)); 318 args.add("prototypeMarkWord", arrayType.prototypeMarkWord()); 319 args.addConst("headerSize", headerSize); 320 args.addConst("log2ElementSize", log2ElementSize); 321 args.addConst("fillContents", newArrayNode.fillContents()); 322 args.addConst("maybeUnroll", length.isConstant()); 323 args.addConst("typeContext", arrayType.toJavaName(false)); 324 325 SnippetTemplate template = template(args); 326 Debug.log("Lowering allocateArray in %s: node=%s, template=%s, arguments=%s", graph, newArrayNode, template, args); 327 template.instantiate(providers.getMetaAccess(), newArrayNode, DEFAULT_REPLACER, args); 328 } 329 330 private static int instanceSize(HotSpotResolvedObjectType type) { 331 int size = type.instanceSize(); 332 assert (size % wordSize()) == 0; 333 assert size >= 0; 334 return size; 335 } 336 } 337 338 private static final SnippetCounter.Group countersNew = SnippetCounters.getValue() ? new SnippetCounter.Group("NewInstance") : null; 339 private static final SnippetCounter new_eden = new SnippetCounter(countersNew, "eden", "used edenAllocate"); 340 341 private static final SnippetCounter.Group countersNewArray = SnippetCounters.getValue() ? new SnippetCounter.Group("NewArray") : null; 342 // private static final SnippetCounter newarray_loopInit = new SnippetCounter(countersNewArray, 343 // "tlabLoopInit", "TLAB alloc with zeroing in a loop"); 344 private static final SnippetCounter newarray_eden = new SnippetCounter(countersNewArray, "eden", "used edenAllocate"); 345 }