1 /* 2 * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef GPU_HSAIL_VM_GPU_HSAIL_TLAB_HPP 26 #define GPU_HSAIL_VM_GPU_HSAIL_TLAB_HPP 27 28 #include "graal/graalEnv.hpp" 29 #include "code/debugInfo.hpp" 30 #include "code/location.hpp" 31 #include "gpu_hsail.hpp" 32 33 class HSAILAllocationInfo; 34 35 class HSAILTlabInfo VALUE_OBJ_CLASS_SPEC { 36 friend class VMStructs; 37 public: 38 // uses only the necessary fields from a full TLAB 39 HeapWord* _start; 40 HeapWord* _top; 41 HeapWord* _end; 42 HeapWord* _last_good_top; 43 HeapWord* _original_top; 44 ThreadLocalAllocBuffer* _tlab; // tlab associated with this tlabInfo 45 HSAILAllocationInfo* _alloc_info; // same as what is in HSAILDeoptimizationInfo 46 47 // Accessors 48 HeapWord* start() { return _start; } 49 HeapWord* top() { return _top; } 50 HeapWord* end() { return _end; } 51 HeapWord* last_good_top() { return _last_good_top; } 52 HeapWord* original_top() { return _original_top; } 53 ThreadLocalAllocBuffer* tlab() { return _tlab; } 54 void initialize(HeapWord* start, HeapWord* top, HeapWord* end, ThreadLocalAllocBuffer* tlab, HSAILAllocationInfo* allocInfo) { 55 _start = start; 56 _top = _original_top = top; 57 _end = end; 58 _tlab = tlab; 59 _alloc_info = allocInfo; 60 } 61 }; 62 63 64 class HSAILAllocationInfo : public CHeapObj<mtInternal> { 65 friend class VMStructs; 66 private: 67 jint _num_tlabs; 68 size_t _tlab_align_reserve_bytes; // filled in from ThreadLocalAllocBuffer::alignment_reserve_in_bytes() 69 HSAILTlabInfo** _cur_tlab_infos; // array of current tlab info pointers, one per num_tlabs 70 HSAILTlabInfo* _tlab_infos_pool_start; // pool for new tlab_infos 71 HSAILTlabInfo* _tlab_infos_pool_next; // where next will be allocated from 72 HSAILTlabInfo* _tlab_infos_pool_end; // where next will be allocated from 73 74 public: 75 HSAILAllocationInfo(jint num_tlabs, int dimX, int allocBytesPerWorkitem) { 76 _num_tlabs = num_tlabs; 77 // if this thread doesn't have gpu_hsail_tlabs allocated yet, do so now 78 JavaThread* thread = JavaThread::current(); 79 if (thread->get_gpu_hsail_tlabs_count() == 0) { 80 thread->initialize_gpu_hsail_tlabs(num_tlabs); 81 if (TraceGPUInteraction) { 82 for (int i = 0; i < num_tlabs; i++) { 83 ThreadLocalAllocBuffer* tlab = thread->get_gpu_hsail_tlab_at(i); 84 tty->print("initialized gpu_hsail_tlab %d at %p -> ", i, tlab); 85 printTlabInfoFromThread(tlab); 86 } 87 } 88 } 89 90 // Compute max_tlab_infos based on amount of free heap space 91 size_t max_tlab_infos; 92 { 93 ThreadLocalAllocBuffer* tlab = &thread->tlab(); 94 size_t new_tlab_size = tlab->compute_size(0); 95 size_t heap_bytes_free = Universe::heap()->unsafe_max_tlab_alloc(thread); 96 if (new_tlab_size != 0) { 97 max_tlab_infos = MIN2(heap_bytes_free / new_tlab_size, (size_t)(64 * _num_tlabs)); 98 } else { 99 max_tlab_infos = 8 * _num_tlabs; // an arbitrary multiple 100 } 101 if (TraceGPUInteraction) { 102 tty->print_cr("heapFree = %ld, newTlabSize=%ld, tlabInfos allocated = %ld", heap_bytes_free, new_tlab_size, max_tlab_infos); 103 } 104 } 105 106 _cur_tlab_infos = NEW_C_HEAP_ARRAY(HSAILTlabInfo*, _num_tlabs, mtInternal); 107 _tlab_infos_pool_start = NEW_C_HEAP_ARRAY(HSAILTlabInfo, max_tlab_infos, mtInternal); 108 _tlab_infos_pool_next = &_tlab_infos_pool_start[_num_tlabs]; 109 _tlab_infos_pool_end = &_tlab_infos_pool_start[max_tlab_infos]; 110 _tlab_align_reserve_bytes = ThreadLocalAllocBuffer::alignment_reserve_in_bytes(); 111 112 // we will fill the first N tlabInfos from the gpu_hsail_tlabs 113 for (int i = 0; i < _num_tlabs; i++) { 114 ThreadLocalAllocBuffer* tlab = thread->get_gpu_hsail_tlab_at(i); 115 if (TraceGPUInteraction) { 116 tty->print("gpu_hsail_tlab %d at %p -> ", i, tlab); 117 printTlabInfoFromThread(tlab); 118 } 119 120 // Here we try to get a new tlab if current one is null. Note: 121 // eventually we may want to test if the size is too small based 122 // on some heuristic where we see how much this kernel tends to 123 // allocate, but for now we can just let it overflow and let the 124 // GPU allocate new tlabs. Actually, if we can't prime a tlab 125 // here, it might make sense to do a gc now rather than to start 126 // the kernel and have it deoptimize. How to do that? 127 if (tlab->end() == NULL) { 128 bool success = getNewGpuHsailTlab(tlab); 129 if (TraceGPUInteraction) { 130 if (success) { 131 tty->print("gpu_hsail_tlab %d, refilled tlab, -> ", i); 132 printTlabInfoFromThread(tlab); 133 } else { 134 tty->print("gpu_hsail_tlab %d, could not refill tlab, left as ", i); 135 printTlabInfoFromThread(tlab); 136 } 137 } 138 } 139 140 // extract the necessary tlab fields into a TlabInfo record 141 HSAILTlabInfo* pTlabInfo = &_tlab_infos_pool_start[i]; 142 _cur_tlab_infos[i] = pTlabInfo; 143 pTlabInfo->initialize(tlab->start(), tlab->top(), tlab->end(), tlab, this); 144 } 145 } 146 147 ~HSAILAllocationInfo() { 148 FREE_C_HEAP_ARRAY(HSAILTlabInfo*, _cur_tlab_infos, mtInternal); 149 FREE_C_HEAP_ARRAY(HSAILTlabInfo, _tlab_infos_pool_start, mtInternal); 150 } 151 152 void postKernelCleanup() { 153 // go thru all the tlabInfos, fix up any tlab tops that overflowed 154 // complete the tlabs if they overflowed 155 // update the gpu_hsail_tlabs when appropriate 156 bool anyOverflows = false; 157 size_t bytesAllocated = 0; 158 // if there was an overflow in allocating tlabInfos, correct it here 159 if (_tlab_infos_pool_next > _tlab_infos_pool_end) { 160 if (TraceGPUInteraction) { 161 int overflowAmount = _tlab_infos_pool_next - _tlab_infos_pool_end; 162 tty->print_cr("tlabInfo allocation overflowed by %d units", overflowAmount); 163 } 164 _tlab_infos_pool_next = _tlab_infos_pool_end; 165 } 166 for (HSAILTlabInfo* tlabInfo = _tlab_infos_pool_start; tlabInfo < _tlab_infos_pool_next; tlabInfo++) { 167 if (TraceGPUInteraction) { 168 tty->print_cr("postprocess tlabInfo %p, start=%p, top=%p, end=%p, last_good_top=%p", tlabInfo, 169 tlabInfo->start(), tlabInfo->top(), tlabInfo->end(), tlabInfo->last_good_top()); 170 } 171 ThreadLocalAllocBuffer* tlab = tlabInfo->tlab(); 172 bool overflowed = false; 173 // if a tlabInfo has NULL fields, i.e. we could not prime it on entry, 174 // or we could not get a tlab from the gpu, so ignore tlabInfo here 175 if (tlabInfo->start() != NULL) { 176 if (tlabInfo->top() > tlabInfo->end()) { 177 anyOverflows = true; 178 overflowed = true; 179 if (TraceGPUInteraction) { 180 long overflowAmount = (long) tlabInfo->top() - (long) tlabInfo->last_good_top(); 181 tty->print_cr("tlabInfo %p (tlab = %p) overflowed by %ld bytes, setting last good top to %p", tlabInfo, tlab, overflowAmount, tlabInfo->last_good_top()); 182 } 183 tlabInfo->_top = tlabInfo->last_good_top(); 184 } 185 186 // fill the gpu_hsail_tlab with the tlabInfo information 187 // we do this even if it will get overwritten by a later tlabinfo 188 // because it helps with tlab statistics for that tlab 189 tlab->fill(tlabInfo->start(), tlabInfo->top(), (tlabInfo->end() - tlabInfo->start()) + tlab->alignment_reserve()); 190 191 // if there was an overflow, make it parsable with retire = true 192 if (overflowed) { 193 tlab->make_parsable(true); 194 } 195 196 size_t delta = (long)(tlabInfo->top()) - (long)(tlabInfo->original_top()); 197 if (TraceGPUInteraction) { 198 tty->print_cr("%ld bytes were allocated by tlabInfo %p (start %p, top %p, end %p", delta, tlabInfo, 199 tlabInfo->start(), tlabInfo->top(), tlabInfo->end()); 200 } 201 bytesAllocated += delta; 202 } 203 } 204 if (TraceGPUInteraction) { 205 tty->print_cr("%ld total bytes were allocated in this kernel", bytesAllocated); 206 } 207 if (anyOverflows) { 208 // Hsail::kernelStats.incOverflows(); 209 } 210 } 211 212 HSAILTlabInfo** getCurTlabInfos() { 213 return _cur_tlab_infos; 214 } 215 216 private: 217 // fill and retire old tlab and get a new one 218 // if we can't get one, no problem someone will eventually do a gc 219 bool getNewGpuHsailTlab(ThreadLocalAllocBuffer* tlab) { 220 221 tlab->clear_before_allocation(); // fill and retire old tlab (will also check for null) 222 223 // get a size for a new tlab that is based on the desired_size 224 size_t new_tlab_size = tlab->compute_size(0); 225 if (new_tlab_size == 0) return false; 226 227 HeapWord* tlab_start = Universe::heap()->allocate_new_tlab(new_tlab_size); 228 if (tlab_start == NULL) return false; 229 230 // ..and clear it if required 231 if (ZeroTLAB) { 232 Copy::zero_to_words(tlab_start, new_tlab_size); 233 } 234 // and init the tlab pointers 235 tlab->fill(tlab_start, tlab_start, new_tlab_size); 236 return true; 237 } 238 239 void printTlabInfoFromThread (ThreadLocalAllocBuffer* tlab) { 240 HeapWord* start = tlab->start(); 241 HeapWord* top = tlab->top(); 242 HeapWord* end = tlab->end(); 243 // sizes are in bytes 244 size_t tlabFree = tlab->free() * HeapWordSize; 245 size_t tlabUsed = tlab->used() * HeapWordSize; 246 size_t tlabSize = tlabFree + tlabUsed; 247 double freePct = 100.0 * (double) tlabFree/(double) tlabSize; 248 tty->print_cr("(%p, %p, %p), siz=%ld, free=%ld (%f%%)", start, top, end, tlabSize, tlabFree, freePct); 249 } 250 251 }; 252 253 #endif // GPU_HSAIL_VM_GPU_HSAIL_TLAB_HPP