1 /* 2 * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef GPU_HSAIL_VM_GPU_HSAIL_HPP 26 #define GPU_HSAIL_VM_GPU_HSAIL_HPP 27 28 #include "runtime/gpu.hpp" 29 #include "utilities/exceptions.hpp" 30 #include "graal/graalEnv.hpp" 31 #include "gpu_hsail_OopMapHelper.hpp" 32 #include "gpu_hsail_Frame.hpp" 33 #include "gpu_hsail_Tlab.hpp" 34 35 struct HSAILKernelStats { 36 int _dispatches; 37 int _deopts; 38 int _overflows; 39 bool _changeSeen; 40 41 public: 42 HSAILKernelStats() { 43 _dispatches = _deopts = _overflows = 0; 44 _changeSeen = false; 45 } 46 47 void incDeopts() { 48 _deopts++; 49 _changeSeen = true; 50 } 51 void incOverflows() { 52 _overflows++; 53 _changeSeen = true; 54 } 55 56 void finishDispatch() { 57 _dispatches++; 58 if (_changeSeen) { 59 // print(); 60 _changeSeen = false; 61 } 62 } 63 64 void print() { 65 tty->print_cr("Disp=%d, Deopts=%d, Ovflows=%d", _dispatches, _deopts, _overflows); 66 } 67 }; 68 69 class Hsail : public Gpu { 70 71 public: 72 class HSAILKernelDeoptimization VALUE_OBJ_CLASS_SPEC { 73 friend class VMStructs; 74 private: 75 // TODO: separate workitemid and actionAndReason out 76 // since they are there only once even if there are multiple frames 77 // for now, though we only ever have one hsail frame 78 jint _workitemid; 79 jint _actionAndReason; 80 // the first (innermost) "hsail frame" starts after the above fields 81 82 public: 83 inline jint workitem() { return _workitemid; } 84 inline jint reason() { return _actionAndReason; } 85 inline jint pc_offset() { return first_frame()->pc_offset(); } 86 inline HSAILFrame* first_frame() { 87 // starts after the "header" fields 88 return (HSAILFrame*) (((jbyte*) this) + sizeof(*this)); 89 } 90 }; 91 92 // 8 compute units * 40 waves per cu * wavesize 64 93 // TODO: query the device to get this number 94 #define MAX_DEOPT_SLOTS (8 * 40 * 64) 95 96 97 class HSAILDeoptimizationInfo : public CHeapObj<mtInternal> { 98 friend class VMStructs; 99 private: 100 jint* _notice_safepoints; 101 jint _deopt_occurred; 102 jint _deopt_next_index; 103 jint _num_slots; 104 jint _deopt_span; 105 jint _deopt_work_index; // how far we are in processing the deopts 106 HSAILTlabInfo** _cur_tlab_info; // copy of what was in the HSAILAllocationInfo, to avoid an extra indirection 107 HSAILAllocationInfo* _alloc_info; 108 char _ignore; 109 jobject _oop_map_array; 110 // keep a pointer last so save area following it is word aligned 111 jboolean* _never_ran_array; 112 113 public: 114 // static HSAILKernelStats kernelStats; 115 HSAILKernelDeoptimization _deopt_save_states[1]; // number and size of these can vary per kernel 116 117 static inline size_t hdr_size() { 118 return sizeof(HSAILDeoptimizationInfo); 119 } 120 121 inline jbyte* save_area_start() { 122 return (jbyte*) (this) + hdr_size(); 123 } 124 125 inline HSAILDeoptimizationInfo(int numSlots, int bytesPerSaveArea, int dimX, HSAILAllocationInfo* allocInfo, jobject oop_map_array) { 126 _notice_safepoints = &Hsail::_notice_safepoints; 127 _deopt_occurred = 0; 128 _deopt_next_index = 0; 129 _deopt_work_index = 0; 130 _num_slots = numSlots; 131 _never_ran_array = NEW_C_HEAP_ARRAY(jboolean, dimX, mtInternal); 132 memset(_never_ran_array, 0, dimX * sizeof(jboolean)); 133 _alloc_info = allocInfo; 134 _oop_map_array = oop_map_array; 135 _deopt_span = sizeof(HSAILKernelDeoptimization) + sizeof(HSAILFrame) + bytesPerSaveArea; 136 if (TraceGPUInteraction) { 137 tty->print_cr("HSAILDeoptimizationInfo allocated, %d slots of size %d, total size = 0x%lx bytes", _num_slots, _deopt_span, (_num_slots * _deopt_span + sizeof(HSAILDeoptimizationInfo))); 138 } 139 } 140 141 inline ~HSAILDeoptimizationInfo() { 142 FREE_C_HEAP_ARRAY(jboolean, _never_ran_array, mtInternal); 143 } 144 145 inline jint deopt_occurred() { 146 return _deopt_occurred; 147 } 148 inline jint num_deopts() { return _deopt_next_index; } 149 inline jboolean* never_ran_array() { return _never_ran_array; } 150 inline jint num_slots() {return _num_slots;} 151 inline void set_deopt_work_index(int val) { _deopt_work_index = val; } 152 inline jint deopt_work_index() { return _deopt_work_index; } 153 154 inline HSAILKernelDeoptimization* get_deopt_save_state(int slot) { 155 // use _deopt_span to index into _deopt_states 156 return (HSAILKernelDeoptimization*) (save_area_start() + _deopt_span * slot); 157 } 158 159 void set_cur_tlabInfos(HSAILTlabInfo** ptlabInfos) { 160 _cur_tlab_info = ptlabInfos; 161 } 162 163 void oops_do(OopClosure* f); 164 165 void* operator new (size_t hdrSize, int numSlots, int bytesPerSaveArea) { 166 assert(hdrSize <= hdr_size(), ""); 167 size_t totalSizeBytes = hdr_size() + numSlots * (sizeof(HSAILKernelDeoptimization) + sizeof(HSAILFrame) + bytesPerSaveArea); 168 return NEW_C_HEAP_ARRAY(char, totalSizeBytes, mtInternal); 169 } 170 171 void operator delete (void* ptr) { 172 FREE_C_HEAP_ARRAY(char, ptr, mtInternal); 173 } 174 }; 175 176 private: 177 178 static JNINativeMethod HSAIL_methods[]; 179 180 // static native boolean initialize(); 181 JNIEXPORT static jboolean initialize(JNIEnv* env, jclass); 182 183 // static native long generateKernel(byte[] targetCode, String name); 184 JNIEXPORT static jlong generate_kernel(JNIEnv* env, jclass, jbyteArray code_handle, jstring name_handle); 185 186 // static native boolean executeKernel0(HotSpotInstalledCode kernel, int jobSize, Object[] args); 187 JNIEXPORT static jboolean execute_kernel_void_1d(JNIEnv* env, jclass, jobject hotspotInstalledCode, jint dimX, jobject args, 188 jobject donorThreads, int allocBytesPerWorkitem, jobject oop_map_array); 189 190 static jboolean execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod* nm, 191 jobject donorThreads, int allocBytesPerWorkitem, jobject oop_map_array, TRAPS); 192 193 static GraalEnv::CodeInstallResult install_code(Handle& compiled_code, CodeBlob*& cb, Handle installed_code, Handle triggered_deoptimizations); 194 195 public: 196 197 // Registers the implementations for the native methods in HSAILHotSpotBackend 198 static bool register_natives(JNIEnv* env); 199 200 virtual const char* name() { return "HSAIL"; } 201 202 virtual void notice_safepoints(); 203 virtual void ignore_safepoints(); 204 205 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) 206 typedef unsigned long long CUdeviceptr; 207 #else 208 typedef unsigned int CUdeviceptr; 209 #endif 210 211 private: 212 213 /* 214 * Kernel launch options from okra.h 215 */ 216 typedef struct graal_okra_range_s { 217 uint32_t dimension; //max value is 3 218 uint32_t global_size[3]; 219 uint32_t group_size[3]; 220 uint32_t reserved; //For future use 221 } graal_okra_range_t; 222 223 typedef jint (*okra_get_context_func_t)(void**); 224 typedef jint (*okra_create_kernel_func_t)(void*, unsigned char*, const char*, void**); 225 typedef jint (*okra_push_pointer_func_t)(void*, void*); 226 typedef jint (*okra_push_boolean_func_t)(void*, jboolean); 227 typedef jint (*okra_push_byte_func_t)(void*, jbyte); 228 typedef jint (*okra_push_double_func_t)(void*, jdouble); 229 typedef jint (*okra_push_float_func_t)(void*, jfloat); 230 typedef jint (*okra_push_int_func_t)(void*, jint); 231 typedef jint (*okra_push_long_func_t)(void*, jlong); 232 typedef jint (*okra_execute_kernel_func_t)(void*, void*, graal_okra_range_t*); 233 typedef jint (*okra_clear_args_func_t)(void*); 234 typedef jint (*okra_dispose_kernel_func_t)(void*); 235 typedef jint (*okra_dispose_context_func_t)(void*); 236 237 public: 238 static okra_get_context_func_t _okra_get_context; 239 static okra_create_kernel_func_t _okra_create_kernel; 240 static okra_push_pointer_func_t _okra_push_pointer; 241 static okra_push_boolean_func_t _okra_push_boolean; 242 static okra_push_byte_func_t _okra_push_byte; 243 static okra_push_double_func_t _okra_push_double; 244 static okra_push_float_func_t _okra_push_float; 245 static okra_push_int_func_t _okra_push_int; 246 static okra_push_long_func_t _okra_push_long; 247 static okra_execute_kernel_func_t _okra_execute_kernel; 248 static okra_clear_args_func_t _okra_clear_args; 249 static okra_dispose_kernel_func_t _okra_dispose_kernel; 250 static okra_dispose_context_func_t _okra_dispose_context; 251 252 protected: 253 static void* _device_context; 254 255 // true if safepoints are activated 256 static jint _notice_safepoints; 257 }; 258 #endif // GPU_HSAIL_VM_GPU_HSAIL_HPP