New src/gpu/hsail/vm/gpu_hsail

   1 /*
   2  * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef GPU_HSAIL_VM_GPU_HSAIL_TLAB_HPP
  26 #define GPU_HSAIL_VM_GPU_HSAIL_TLAB_HPP
  27 
  28 #include "graal/graalEnv.hpp"
  29 #include "code/debugInfo.hpp"
  30 #include "code/location.hpp"
  31 #include "gpu_hsail.hpp"
  32 
  33 class HSAILAllocationInfo;
  34 
  35 class HSAILTlabInfo VALUE_OBJ_CLASS_SPEC {
  36   friend class VMStructs;
  37 public:
  38   // uses only the necessary fields from a full TLAB
  39   HeapWord* _start;
  40   HeapWord* _top;
  41   HeapWord* _end;
  42   HeapWord* _last_good_top;
  43   HeapWord* _original_top;
  44   ThreadLocalAllocBuffer* _tlab;      // tlab associated with this tlabInfo
  45   HSAILAllocationInfo* _alloc_info;   // same as what is in HSAILDeoptimizationInfo
  46 
  47   // Accessors
  48   HeapWord* start() { return _start; }
  49   HeapWord* top() { return _top; }
  50   HeapWord* end() { return _end; }
  51   HeapWord* last_good_top() { return _last_good_top; }
  52   HeapWord* original_top() { return _original_top; }
  53   ThreadLocalAllocBuffer* tlab() { return _tlab; }
  54   void initialize(HeapWord* start, HeapWord* top, HeapWord* end, ThreadLocalAllocBuffer* tlab, HSAILAllocationInfo* allocInfo) {
  55     _start = start;
  56     _top = _original_top = top;
  57     _end = end;
  58     _tlab = tlab;
  59     _alloc_info = allocInfo;
  60   }
  61 };
  62 
  63 
  64 class HSAILAllocationInfo : public CHeapObj<mtInternal> {
  65   friend class VMStructs;
  66 private:
  67   jint   _num_tlabs;
  68   size_t _tlab_align_reserve_bytes;         // filled in from ThreadLocalAllocBuffer::alignment_reserve_in_bytes()
  69   HSAILTlabInfo** _cur_tlab_infos;          // array of current tlab info pointers, one per num_tlabs
  70   HSAILTlabInfo* _tlab_infos_pool_start;    // pool for new tlab_infos
  71   HSAILTlabInfo* _tlab_infos_pool_next;     // where next will be allocated from
  72   HSAILTlabInfo* _tlab_infos_pool_end;      // where next will be allocated from
  73 
  74 public:
  75   HSAILAllocationInfo(jint num_tlabs, int dimX, int allocBytesPerWorkitem) {
  76     _num_tlabs = num_tlabs;
  77     // if this thread doesn't have gpu_hsail_tlabs allocated yet, do so now
  78     JavaThread* thread = JavaThread::current();
  79     if (thread->get_gpu_hsail_tlabs_count() == 0) {
  80       thread->initialize_gpu_hsail_tlabs(num_tlabs);
  81       if (TraceGPUInteraction) {
  82         for (int i = 0; i < num_tlabs; i++) {
  83           ThreadLocalAllocBuffer* tlab = thread->get_gpu_hsail_tlab_at(i);
  84           tty->print("initialized gpu_hsail_tlab %d at %p -> ", i, tlab);
  85           printTlabInfoFromThread(tlab);
  86         }
  87       }
  88     }
  89 
  90     // Compute max_tlab_infos based on amount of free heap space
  91     size_t max_tlab_infos;
  92     {
  93       ThreadLocalAllocBuffer* tlab = &thread->tlab();
  94       size_t new_tlab_size = tlab->compute_size(0);
  95       size_t heap_bytes_free = Universe::heap()->unsafe_max_tlab_alloc(thread);
  96       if (new_tlab_size != 0) {
  97         max_tlab_infos = MIN2(heap_bytes_free / new_tlab_size, (size_t)(64 * _num_tlabs));
  98       } else {
  99         max_tlab_infos = 8 * _num_tlabs;   // an arbitrary multiple
 100       }
 101       if (TraceGPUInteraction) {
 102         tty->print_cr("heapFree = %ld, newTlabSize=%ld, tlabInfos allocated = %ld", heap_bytes_free, new_tlab_size, max_tlab_infos);
 103       }
 104     }
 105 
 106     _cur_tlab_infos = NEW_C_HEAP_ARRAY(HSAILTlabInfo*, _num_tlabs, mtInternal);
 107     _tlab_infos_pool_start = NEW_C_HEAP_ARRAY(HSAILTlabInfo, max_tlab_infos, mtInternal);
 108     _tlab_infos_pool_next = &_tlab_infos_pool_start[_num_tlabs];
 109     _tlab_infos_pool_end = &_tlab_infos_pool_start[max_tlab_infos];
 110     _tlab_align_reserve_bytes = ThreadLocalAllocBuffer::alignment_reserve_in_bytes();
 111       
 112     // we will fill the first N tlabInfos from the gpu_hsail_tlabs
 113     for (int i = 0; i < _num_tlabs; i++) {
 114       ThreadLocalAllocBuffer* tlab = thread->get_gpu_hsail_tlab_at(i);
 115       if (TraceGPUInteraction) {
 116         tty->print("gpu_hsail_tlab %d at %p -> ", i, tlab);
 117         printTlabInfoFromThread(tlab);
 118       }
 119       
 120       // Here we try to get a new tlab if current one is null. Note:
 121       // eventually we may want to test if the size is too small based
 122       // on some heuristic where we see how much this kernel tends to
 123       // allocate, but for now we can just let it overflow and let the
 124       // GPU allocate new tlabs. Actually, if we can't prime a tlab
 125       // here, it might make sense to do a gc now rather than to start
 126       // the kernel and have it deoptimize.  How to do that?
 127       if (tlab->end() == NULL) {
 128         bool success = getNewGpuHsailTlab(tlab);
 129         if (TraceGPUInteraction) {
 130           if (success) {
 131             tty->print("gpu_hsail_tlab %d, refilled tlab, -> ", i);
 132             printTlabInfoFromThread(tlab);
 133           } else {
 134             tty->print("gpu_hsail_tlab %d, could not refill tlab, left as ", i);
 135             printTlabInfoFromThread(tlab);
 136           }
 137         }
 138       }
 139 
 140       // extract the necessary tlab fields into a TlabInfo record
 141       HSAILTlabInfo* pTlabInfo = &_tlab_infos_pool_start[i];
 142       _cur_tlab_infos[i] = pTlabInfo;
 143       pTlabInfo->initialize(tlab->start(), tlab->top(), tlab->end(), tlab, this);
 144     }
 145   }
 146 
 147   ~HSAILAllocationInfo() {
 148     FREE_C_HEAP_ARRAY(HSAILTlabInfo*, _cur_tlab_infos, mtInternal);
 149     FREE_C_HEAP_ARRAY(HSAILTlabInfo, _tlab_infos_pool_start, mtInternal);
 150   }
 151 
 152   void postKernelCleanup() {
 153     // go thru all the tlabInfos, fix up any tlab tops that overflowed
 154     // complete the tlabs if they overflowed
 155     // update the gpu_hsail_tlabs when appropriate
 156     bool anyOverflows = false;
 157     size_t bytesAllocated = 0;
 158     // if there was an overflow in allocating tlabInfos, correct it here
 159     if (_tlab_infos_pool_next > _tlab_infos_pool_end) {
 160       if (TraceGPUInteraction) {
 161         int overflowAmount = _tlab_infos_pool_next - _tlab_infos_pool_end;
 162         tty->print_cr("tlabInfo allocation overflowed by %d units", overflowAmount);
 163       }
 164       _tlab_infos_pool_next = _tlab_infos_pool_end;
 165     }
 166     for (HSAILTlabInfo* tlabInfo = _tlab_infos_pool_start; tlabInfo < _tlab_infos_pool_next; tlabInfo++) {
 167       if (TraceGPUInteraction) {
 168         tty->print_cr("postprocess tlabInfo %p, start=%p, top=%p, end=%p, last_good_top=%p", tlabInfo, 
 169                       tlabInfo->start(), tlabInfo->top(), tlabInfo->end(), tlabInfo->last_good_top());
 170       }
 171       ThreadLocalAllocBuffer* tlab = tlabInfo->tlab();
 172       bool overflowed = false;
 173       // if a tlabInfo has NULL fields, i.e. we could not prime it on entry,
 174       // or we could not get a tlab from the gpu, so ignore tlabInfo here
 175       if (tlabInfo->start() != NULL) {
 176         if (tlabInfo->top() > tlabInfo->end()) {
 177           anyOverflows = true;
 178           overflowed = true;
 179           if (TraceGPUInteraction) {
 180             long overflowAmount = (long) tlabInfo->top() - (long) tlabInfo->last_good_top(); 
 181             tty->print_cr("tlabInfo %p (tlab = %p) overflowed by %ld bytes, setting last good top to %p", tlabInfo, tlab, overflowAmount, tlabInfo->last_good_top());
 182           }
 183           tlabInfo->_top = tlabInfo->last_good_top();
 184         }
 185 
 186         // fill the gpu_hsail_tlab with the tlabInfo information
 187         // we do this even if it will get overwritten by a later tlabinfo
 188         // because it helps with tlab statistics for that tlab
 189         tlab->fill(tlabInfo->start(), tlabInfo->top(), (tlabInfo->end() - tlabInfo->start()) + tlab->alignment_reserve());
 190 
 191         // if there was an overflow, make it parsable with retire = true
 192         if (overflowed) {
 193           tlab->make_parsable(true);
 194         }
 195         
 196         size_t delta = (long)(tlabInfo->top()) - (long)(tlabInfo->original_top());
 197         if (TraceGPUInteraction) {
 198           tty->print_cr("%ld bytes were allocated by tlabInfo %p (start %p, top %p, end %p", delta, tlabInfo,
 199                         tlabInfo->start(), tlabInfo->top(), tlabInfo->end());
 200         }
 201         bytesAllocated += delta;
 202       }
 203     }
 204     if (TraceGPUInteraction) {
 205       tty->print_cr("%ld total bytes were allocated in this kernel", bytesAllocated);
 206     }
 207     if (anyOverflows) {
 208       // Hsail::kernelStats.incOverflows();
 209     }
 210   }
 211 
 212   HSAILTlabInfo** getCurTlabInfos() {
 213     return _cur_tlab_infos;
 214   }
 215 
 216 private:
 217   // fill and retire old tlab and get a new one
 218   // if we can't get one, no problem someone will eventually do a gc
 219   bool getNewGpuHsailTlab(ThreadLocalAllocBuffer* tlab) {
 220 
 221     tlab->clear_before_allocation();    // fill and retire old tlab (will also check for null)
 222     
 223     // get a size for a new tlab that is based on the desired_size
 224     size_t new_tlab_size = tlab->compute_size(0);
 225     if (new_tlab_size == 0) return false;
 226     
 227     HeapWord* tlab_start = Universe::heap()->allocate_new_tlab(new_tlab_size);
 228     if (tlab_start == NULL) return false;
 229     
 230     // ..and clear it if required
 231     if (ZeroTLAB) {
 232       Copy::zero_to_words(tlab_start, new_tlab_size);
 233     }
 234     // and init the tlab pointers
 235     tlab->fill(tlab_start, tlab_start, new_tlab_size);
 236     return true;
 237   }
 238   
 239   void printTlabInfoFromThread (ThreadLocalAllocBuffer* tlab) {
 240     HeapWord* start = tlab->start();
 241     HeapWord* top = tlab->top();
 242     HeapWord* end = tlab->end();
 243     // sizes are in bytes
 244     size_t tlabFree = tlab->free() * HeapWordSize;
 245     size_t tlabUsed = tlab->used() * HeapWordSize;
 246     size_t tlabSize = tlabFree + tlabUsed;
 247     double freePct = 100.0 * (double) tlabFree/(double) tlabSize;
 248     tty->print_cr("(%p, %p, %p), siz=%ld, free=%ld (%f%%)", start, top, end, tlabSize, tlabFree, freePct);
 249   }
 250   
 251 };
 252   
 253 #endif // GPU_HSAIL_VM_GPU_HSAIL_TLAB_HPP