1 /*
   2  * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 package com.oracle.graal.hotspot.hsail.replacements;
  24 
  25 import static com.oracle.graal.api.code.UnsignedMath.*;
  26 import static com.oracle.graal.compiler.common.GraalOptions.*;
  27 import static com.oracle.graal.hotspot.hsail.replacements.HSAILHotSpotReplacementsUtil.*;
  28 import static com.oracle.graal.hotspot.hsail.replacements.HSAILNewObjectSnippets.Options.*;
  29 import static com.oracle.graal.nodes.PiArrayNode.*;
  30 import static com.oracle.graal.nodes.extended.BranchProbabilityNode.*;
  31 import static com.oracle.graal.replacements.SnippetTemplate.*;
  32 
  33 import com.oracle.graal.api.code.*;
  34 import com.oracle.graal.api.meta.*;
  35 import com.oracle.graal.compiler.common.type.*;
  36 import com.oracle.graal.debug.*;
  37 import com.oracle.graal.hotspot.*;
  38 import com.oracle.graal.hotspot.meta.*;
  39 import com.oracle.graal.hotspot.replacements.*;
  40 import com.oracle.graal.hotspot.stubs.*;
  41 import com.oracle.graal.nodes.*;
  42 import com.oracle.graal.nodes.java.*;
  43 import com.oracle.graal.nodes.spi.*;
  44 import com.oracle.graal.options.*;
  45 import com.oracle.graal.replacements.*;
  46 import com.oracle.graal.replacements.Snippet.ConstantParameter;
  47 import com.oracle.graal.replacements.SnippetTemplate.AbstractTemplates;
  48 import com.oracle.graal.replacements.SnippetTemplate.Arguments;
  49 import com.oracle.graal.replacements.SnippetTemplate.SnippetInfo;
  50 import com.oracle.graal.word.*;
  51 
  52 /**
  53  * HSAIL-specific Snippets used for implementing NEW and NEWARRAY.
  54  */
  55 public class HSAILNewObjectSnippets extends NewObjectSnippets {
  56 
  57     static public class Options {
  58 
  59         // @formatter:off
  60         @Option(help = "In HSAIL allocation, allow allocation from eden as fallback if TLAB is full")
  61         static final OptionValue<Boolean> HsailUseEdenAllocate = new OptionValue<>(false);
  62 
  63         @Option(help = "In HSAIL allocation, allow GPU to allocate a new tlab if TLAB is full")
  64         static final OptionValue<Boolean> HsailNewTlabAllocate = new OptionValue<>(true);
  65 
  66         @Option(help = "Estimate of number of bytes allocated by each HSAIL workitem, used to size TLABs")
  67         static public final OptionValue<Integer> HsailAllocBytesPerWorkitem = new OptionValue<>(64);
  68 
  69         // @formatter:on
  70     }
  71 
  72     private static final boolean hsailUseEdenAllocate = HsailUseEdenAllocate.getValue();
  73     private static final boolean hsailNewTlabAllocate = HsailNewTlabAllocate.getValue();
  74 
  75     protected static Word fillNewTlabInfoWithTlab(Word oldTlabInfo) {
  76         Word allocInfo = readTlabInfoAllocInfo(oldTlabInfo);
  77         Word newTlabInfo = atomicGetAndAddAllocInfoTlabInfosPoolNext(allocInfo, config().hsailTlabInfoSize);
  78         Word tlabInfosPoolEnd = readAllocInfoTlabInfosPoolEnd(allocInfo);
  79         if (newTlabInfo.aboveOrEqual(tlabInfosPoolEnd)) {
  80             // could not get a new tlab info, mark zero and we will later deoptimize
  81             return (Word.zero());
  82         }
  83 
  84         // make new size depend on old tlab size
  85         Word newTlabSize = readTlabInfoEnd(oldTlabInfo).subtract(readTlabInfoStart(oldTlabInfo));
  86         // try to allocate a new tlab
  87         Word tlabStart = NewInstanceStub.edenAllocate(newTlabSize, false);
  88         writeTlabInfoStart(newTlabInfo, tlabStart);  // write this field even if zero
  89         if (tlabStart.equal(0)) {
  90             // could not get a new tlab, mark zero and we will later deoptimize
  91             return (Word.zero());
  92         }
  93         // here we have a new tlab and a tlabInfo, we can fill it in
  94         writeTlabInfoTop(newTlabInfo, tlabStart);
  95         writeTlabInfoOriginalTop(newTlabInfo, tlabStart);
  96         // set end so that we leave space for the tlab "alignment reserve"
  97         Word alignReserveBytes = readAllocInfoTlabAlignReserveBytes(allocInfo);
  98         writeTlabInfoEnd(newTlabInfo, tlabStart.add(newTlabSize.subtract(alignReserveBytes)));
  99         writeTlabInfoAllocInfo(newTlabInfo, allocInfo);
 100         writeTlabInfoTlab(newTlabInfo, readTlabInfoTlab(oldTlabInfo));
 101         return (newTlabInfo);
 102     }
 103 
 104     protected static Word allocateFromTlabSlowPath(Word fastPathTlabInfo, int size, Word fastPathTop, Word fastPathEnd) {
 105         // eventually this will be a separate call, not inlined
 106 
 107         // we come here from the fastpath allocation
 108         // here we know that the tlab has overflowed (top + size > end)
 109         // find out if we are the first overflower
 110         Word tlabInfo = fastPathTlabInfo;
 111         Word top = fastPathTop;
 112         Word end = fastPathEnd;
 113 
 114         // start a loop where we try to get a new tlab and then try to allocate from it
 115         // keep doing this until we run out of tlabs or tlabInfo structures
 116         // initialize result with error return value
 117         Word result = Word.zero();
 118         while (result.equal(Word.zero()) && tlabInfo.notEqual(Word.zero())) {
 119             boolean firstOverflower = top.belowOrEqual(end);
 120             if (firstOverflower) {
 121                 // store the last good top before overflow into last_good_top field
 122                 // we will move it back into top later when back in the VM
 123                 writeTlabInfoLastGoodTop(tlabInfo, top);
 124             }
 125 
 126             // if all this allocate tlab from gpu logic is disabled,
 127             // just immediately set tlabInfo to 0 here
 128             if (!hsailNewTlabAllocate) {
 129                 tlabInfo = Word.zero();
 130             } else {
 131                 // loop here waiting for the first overflower to get a new tlab
 132                 // note that on an hsa device we must be careful how we loop in order to ensure
 133                 // "forward progress". For example we must not break out of the loop.
 134                 Word oldTlabInfo = tlabInfo;
 135                 do {
 136                     if (firstOverflower) {
 137                         // allocate new tlabInfo and new tlab to fill it, returning 0 if any
 138                         // problems
 139                         // this will get all spinners out of this loop.
 140                         tlabInfo = fillNewTlabInfoWithTlab(oldTlabInfo);
 141                         writeTlabInfoPtrStoreRelease(tlabInfo);
 142                     } else {
 143                         tlabInfo = getTlabInfoPtrLoadAcquire();
 144                     }
 145                 } while (tlabInfo.equal(oldTlabInfo));
 146                 // when we get out of the loop if tlabInfoPtr contains 0, it means we
 147                 // can't get any more tlabs and will have to deoptimize
 148                 // otherwise, we have a valid new tlabInfo/tlab and can try to allocate again.
 149                 if (tlabInfo.notEqual(0)) {
 150                     top = atomicGetAndAddTlabInfoTop(tlabInfo, size);
 151                     end = readTlabInfoEnd(tlabInfo);
 152                     Word newTop = top.add(size);
 153                     if (probability(FAST_PATH_PROBABILITY, newTop.belowOrEqual(end))) {
 154                         result = top;
 155                     }
 156                 }
 157             }
 158         } // while (result == 0) && (tlabInfo != 0))
 159         return result;
 160     }
 161 
 162     protected static Object addressToFormattedObject(Word addr, @ConstantParameter int size, Word hub, Word prototypeMarkWord, @ConstantParameter boolean fillContents,
 163                     @ConstantParameter String typeContext) {
 164         Object result = formatObject(hub, size, addr, prototypeMarkWord, fillContents, true, true);
 165         profileAllocation("instance", size, typeContext);
 166         return piCast(verifyOop(result), StampFactory.forNodeIntrinsic());
 167     }
 168 
 169     @Snippet
 170     public static Object allocateInstanceAtomic(@ConstantParameter int size, Word hub, Word prototypeMarkWord, @ConstantParameter boolean fillContents, @ConstantParameter String typeContext) {
 171         boolean haveResult = false;
 172         if (useTLAB()) {
 173             // inlining this manually here because it resulted in better fastpath codegen
 174             Word tlabInfo = getTlabInfoPtr();
 175             if (probability(FAST_PATH_PROBABILITY, tlabInfo.notEqual(0))) {
 176                 Word top = atomicGetAndAddTlabInfoTop(tlabInfo, size);
 177                 Word end = readTlabInfoEnd(tlabInfo);
 178                 Word newTop = top.add(size);
 179                 if (probability(FAST_PATH_PROBABILITY, newTop.belowOrEqual(end))) {
 180                     return addressToFormattedObject(top, size, hub, prototypeMarkWord, fillContents, typeContext);
 181                 } else {
 182                     Word addr = allocateFromTlabSlowPath(tlabInfo, size, top, end);
 183                     if (addr.notEqual(0)) {
 184                         return addressToFormattedObject(addr, size, hub, prototypeMarkWord, fillContents, typeContext);
 185                     }
 186                 }
 187             }
 188         }
 189 
 190         // we could not allocate from tlab, try allocating directly from eden
 191         if (hsailUseEdenAllocate) {
 192             // false for no logging
 193             Word addr = NewInstanceStub.edenAllocate(Word.unsigned(size), false);
 194             if (addr.notEqual(0)) {
 195                 new_eden.inc();
 196                 return addressToFormattedObject(addr, size, hub, prototypeMarkWord, fillContents, typeContext);
 197             }
 198         }
 199         // haveResult test here helps avoid dropping earlier stores were seen to be dropped without
 200         // this.
 201         if (!haveResult) {
 202             DeoptimizeNode.deopt(DeoptimizationAction.None, DeoptimizationReason.RuntimeConstraint);
 203         }
 204         // will never get here but this keeps the compiler happy
 205         return Word.zero().toObject();
 206     }
 207 
 208     @Snippet
 209     public static Object allocateArrayAtomic(Word hub, int length, Word prototypeMarkWord, @ConstantParameter int headerSize, @ConstantParameter int log2ElementSize,
 210                     @ConstantParameter boolean fillContents, @ConstantParameter boolean maybeUnroll, @ConstantParameter String typeContext) {
 211         if (!belowThan(length, MAX_ARRAY_FAST_PATH_ALLOCATION_LENGTH)) {
 212             // This handles both negative array sizes and very large array sizes
 213             DeoptimizeNode.deopt(DeoptimizationAction.None, DeoptimizationReason.RuntimeConstraint);
 214         }
 215         return allocateArrayAtomicImpl(hub, length, prototypeMarkWord, headerSize, log2ElementSize, fillContents, maybeUnroll, typeContext);
 216     }
 217 
 218     protected static Object addressToFormattedArray(Word addr, int allocationSize, int length, int headerSize, Word hub, Word prototypeMarkWord, boolean fillContents, boolean maybeUnroll,
 219                     @ConstantParameter String typeContext) {
 220         // we are not in a stub so we can set useSnippetCounters to true
 221         Object result = formatArray(hub, allocationSize, length, headerSize, addr, prototypeMarkWord, fillContents, maybeUnroll, true);
 222         profileAllocation("array", allocationSize, typeContext);
 223         return piArrayCast(verifyOop(result), length, StampFactory.forNodeIntrinsic());
 224     }
 225 
 226     private static Object allocateArrayAtomicImpl(Word hub, int length, Word prototypeMarkWord, int headerSize, int log2ElementSize, boolean fillContents, boolean maybeUnroll, String typeContext) {
 227         int alignment = wordSize();
 228         int allocationSize = computeArrayAllocationSize(length, alignment, headerSize, log2ElementSize);
 229         boolean haveResult = false;
 230         if (useTLAB()) {
 231             // inlining this manually here because it resulted in better fastpath codegen
 232             Word tlabInfo = getTlabInfoPtr();
 233             if (probability(FAST_PATH_PROBABILITY, tlabInfo.notEqual(0))) {
 234                 Word top = atomicGetAndAddTlabInfoTop(tlabInfo, allocationSize);
 235                 Word end = readTlabInfoEnd(tlabInfo);
 236                 Word newTop = top.add(allocationSize);
 237                 if (probability(FAST_PATH_PROBABILITY, newTop.belowOrEqual(end))) {
 238                     return addressToFormattedArray(top, allocationSize, length, headerSize, hub, prototypeMarkWord, fillContents, maybeUnroll, typeContext);
 239                 } else {
 240                     Word addr = allocateFromTlabSlowPath(tlabInfo, allocationSize, top, end);
 241                     if (addr.notEqual(0)) {
 242                         return addressToFormattedArray(addr, allocationSize, length, headerSize, hub, prototypeMarkWord, fillContents, maybeUnroll, typeContext);
 243                     }
 244                 }
 245             }
 246         }
 247 
 248         // we could not allocate from tlab, try allocating directly from eden
 249         if (hsailUseEdenAllocate) {
 250             // false for no logging
 251             Word addr = NewInstanceStub.edenAllocate(Word.unsigned(allocationSize), false);
 252             if (addr.notEqual(0)) {
 253                 newarray_eden.inc();
 254                 return addressToFormattedArray(addr, allocationSize, length, headerSize, hub, prototypeMarkWord, fillContents, maybeUnroll, typeContext);
 255             }
 256         }
 257         if (!haveResult) {
 258             DeoptimizeNode.deopt(DeoptimizationAction.None, DeoptimizationReason.RuntimeConstraint);
 259         }
 260         // will never get here but this keeps the compiler happy
 261         return Word.zero().toObject();
 262     }
 263 
 264     public static class Templates extends AbstractTemplates {
 265 
 266         private final SnippetInfo allocateInstance = snippet(HSAILNewObjectSnippets.class, "allocateInstanceAtomic");
 267         private final SnippetInfo allocateArray = snippet(HSAILNewObjectSnippets.class, "allocateArrayAtomic");
 268 
 269         // private final SnippetInfo allocateArrayDynamic = snippet(NewObjectSnippets.class,
 270         // "allocateArrayDynamic");
 271         // private final SnippetInfo newmultiarray = snippet(NewObjectSnippets.class,
 272         // "newmultiarray");
 273 
 274         public Templates(HotSpotProviders providers, TargetDescription target) {
 275             super(providers, providers.getSnippetReflection(), target);
 276         }
 277 
 278         /**
 279          * Lowers a {@link NewInstanceNode}.
 280          */
 281         public void lower(NewInstanceNode newInstanceNode, LoweringTool tool) {
 282             StructuredGraph graph = newInstanceNode.graph();
 283             HotSpotResolvedObjectType type = (HotSpotResolvedObjectType) newInstanceNode.instanceClass();
 284             assert !type.isArray();
 285             ConstantNode hub = ConstantNode.forConstant(type.klass(), providers.getMetaAccess(), graph);
 286             int size = instanceSize(type);
 287 
 288             Arguments args = new Arguments(allocateInstance, graph.getGuardsStage(), tool.getLoweringStage());
 289             args.addConst("size", size);
 290             args.add("hub", hub);
 291             args.add("prototypeMarkWord", type.prototypeMarkWord());
 292             args.addConst("fillContents", newInstanceNode.fillContents());
 293             args.addConst("typeContext", type.toJavaName(false));
 294 
 295             SnippetTemplate template = template(args);
 296             Debug.log("Lowering allocateInstance in %s: node=%s, template=%s, arguments=%s", graph, newInstanceNode, template, args);
 297             template.instantiate(providers.getMetaAccess(), newInstanceNode, DEFAULT_REPLACER, args);
 298         }
 299 
 300         /**
 301          * Lowers a {@link NewArrayNode}.
 302          */
 303         public void lower(NewArrayNode newArrayNode, LoweringTool tool) {
 304             StructuredGraph graph = newArrayNode.graph();
 305             ResolvedJavaType elementType = newArrayNode.elementType();
 306             HotSpotResolvedObjectType arrayType = (HotSpotResolvedObjectType) elementType.getArrayClass();
 307             Kind elementKind = elementType.getKind();
 308             ConstantNode hub = ConstantNode.forConstant(arrayType.klass(), providers.getMetaAccess(), graph);
 309             final int headerSize = HotSpotGraalRuntime.getArrayBaseOffset(elementKind);
 310             // lowerer extends HotSpotLoweringProvider so we can just use that
 311             HotSpotLoweringProvider lowerer = (HotSpotLoweringProvider) providers.getLowerer();
 312             int log2ElementSize = CodeUtil.log2(lowerer.arrayScalingFactor(elementKind));
 313 
 314             Arguments args = new Arguments(allocateArray, graph.getGuardsStage(), tool.getLoweringStage());
 315             args.add("hub", hub);
 316             ValueNode length = newArrayNode.length();
 317             args.add("length", length.isAlive() ? length : graph.addOrUniqueWithInputs(length));
 318             args.add("prototypeMarkWord", arrayType.prototypeMarkWord());
 319             args.addConst("headerSize", headerSize);
 320             args.addConst("log2ElementSize", log2ElementSize);
 321             args.addConst("fillContents", newArrayNode.fillContents());
 322             args.addConst("maybeUnroll", length.isConstant());
 323             args.addConst("typeContext", arrayType.toJavaName(false));
 324 
 325             SnippetTemplate template = template(args);
 326             Debug.log("Lowering allocateArray in %s: node=%s, template=%s, arguments=%s", graph, newArrayNode, template, args);
 327             template.instantiate(providers.getMetaAccess(), newArrayNode, DEFAULT_REPLACER, args);
 328         }
 329 
 330         private static int instanceSize(HotSpotResolvedObjectType type) {
 331             int size = type.instanceSize();
 332             assert (size % wordSize()) == 0;
 333             assert size >= 0;
 334             return size;
 335         }
 336     }
 337 
 338     private static final SnippetCounter.Group countersNew = SnippetCounters.getValue() ? new SnippetCounter.Group("NewInstance") : null;
 339     private static final SnippetCounter new_eden = new SnippetCounter(countersNew, "eden", "used edenAllocate");
 340 
 341     private static final SnippetCounter.Group countersNewArray = SnippetCounters.getValue() ? new SnippetCounter.Group("NewArray") : null;
 342     // private static final SnippetCounter newarray_loopInit = new SnippetCounter(countersNewArray,
 343     // "tlabLoopInit", "TLAB alloc with zeroing in a loop");
 344     private static final SnippetCounter newarray_eden = new SnippetCounter(countersNewArray, "eden", "used edenAllocate");
 345 }