1 /*
   2  * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef GPU_HSAIL_VM_GPU_HSAIL_HPP
  26 #define GPU_HSAIL_VM_GPU_HSAIL_HPP
  27 
  28 #include "runtime/gpu.hpp"
  29 #include "utilities/exceptions.hpp"
  30 #include "graal/graalEnv.hpp"
  31 #include "gpu_hsail_OopMapHelper.hpp"
  32 #include "gpu_hsail_Frame.hpp"
  33 #include "gpu_hsail_Tlab.hpp"
  34 
  35 struct HSAILKernelStats {
  36   int _dispatches;
  37   int _deopts;
  38   int _overflows;
  39   bool _changeSeen;
  40   
  41 public:
  42   HSAILKernelStats() {
  43     _dispatches = _deopts = _overflows = 0;
  44     _changeSeen = false;
  45   }
  46   
  47   void incDeopts() {
  48     _deopts++;
  49     _changeSeen = true;
  50   }
  51   void incOverflows() {
  52     _overflows++;
  53     _changeSeen = true;
  54   }
  55   
  56   void finishDispatch() {
  57     _dispatches++;
  58     if (_changeSeen) {
  59       // print();
  60       _changeSeen = false;
  61     }
  62   }
  63   
  64   void print() {
  65     tty->print_cr("Disp=%d, Deopts=%d, Ovflows=%d", _dispatches, _deopts, _overflows);
  66   }
  67 };
  68 
  69 class Hsail : public Gpu {
  70 
  71   public:
  72   class HSAILKernelDeoptimization VALUE_OBJ_CLASS_SPEC {
  73     friend class VMStructs;
  74    private:
  75     // TODO: separate workitemid and actionAndReason out
  76     // since they are there only once even if there are multiple frames
  77     // for now, though we only ever have one hsail frame
  78     jint  _workitemid;
  79     jint  _actionAndReason;
  80     // the first (innermost) "hsail frame" starts after the above fields
  81 
  82    public:
  83     inline jint workitem() { return _workitemid; }
  84     inline jint reason() { return _actionAndReason; }
  85     inline jint pc_offset() { return first_frame()->pc_offset(); }
  86     inline HSAILFrame* first_frame() {
  87       // starts after the "header" fields
  88       return (HSAILFrame*) (((jbyte*) this) + sizeof(*this));
  89     }
  90   };
  91 
  92 // 8 compute units * 40 waves per cu * wavesize 64
  93 // TODO: query the device to get this number
  94 #define MAX_DEOPT_SLOTS    (8 * 40 * 64)
  95 
  96 
  97   class HSAILDeoptimizationInfo : public CHeapObj<mtInternal> {
  98     friend class VMStructs;
  99    private:
 100     jint* _notice_safepoints;
 101     jint _deopt_occurred;
 102     jint _deopt_next_index;
 103     jint _num_slots;
 104     jint _deopt_span;
 105     jint _deopt_work_index;           // how far we are in processing the deopts
 106     HSAILTlabInfo** _cur_tlab_info;   // copy of what was in the HSAILAllocationInfo, to avoid an extra indirection
 107     HSAILAllocationInfo* _alloc_info;
 108     char _ignore;
 109     jobject _oop_map_array;
 110     // keep a pointer last so save area following it is word aligned
 111     jboolean* _never_ran_array; 
 112 
 113    public:
 114     // static HSAILKernelStats kernelStats;
 115     HSAILKernelDeoptimization _deopt_save_states[1];  // number and size of these can vary per kernel
 116 
 117     static inline size_t hdr_size() {
 118       return sizeof(HSAILDeoptimizationInfo);
 119     }
 120 
 121     inline jbyte* save_area_start() {
 122       return (jbyte*) (this) + hdr_size();
 123     }
 124 
 125     inline HSAILDeoptimizationInfo(int numSlots, int bytesPerSaveArea, int dimX, HSAILAllocationInfo* allocInfo, jobject oop_map_array) {
 126       _notice_safepoints = &Hsail::_notice_safepoints;
 127       _deopt_occurred = 0;
 128       _deopt_next_index = 0;
 129       _deopt_work_index = 0;
 130       _num_slots = numSlots;
 131       _never_ran_array = NEW_C_HEAP_ARRAY(jboolean, dimX, mtInternal);
 132       memset(_never_ran_array, 0, dimX * sizeof(jboolean));
 133       _alloc_info = allocInfo;
 134       _oop_map_array = oop_map_array;
 135       _deopt_span = sizeof(HSAILKernelDeoptimization) + sizeof(HSAILFrame) + bytesPerSaveArea;
 136       if (TraceGPUInteraction) {
 137         tty->print_cr("HSAILDeoptimizationInfo allocated, %d slots of size %d, total size = 0x%lx bytes", _num_slots, _deopt_span, (_num_slots * _deopt_span + sizeof(HSAILDeoptimizationInfo)));
 138       }
 139     }
 140 
 141     inline ~HSAILDeoptimizationInfo() {
 142       FREE_C_HEAP_ARRAY(jboolean, _never_ran_array, mtInternal);
 143     }
 144 
 145     inline jint deopt_occurred() {
 146       return _deopt_occurred;
 147     }
 148     inline jint num_deopts() { return _deopt_next_index; }
 149     inline jboolean* never_ran_array() { return _never_ran_array; }
 150     inline jint num_slots() {return _num_slots;}
 151     inline void set_deopt_work_index(int val) { _deopt_work_index = val; }
 152     inline jint deopt_work_index() { return _deopt_work_index; }
 153 
 154     inline HSAILKernelDeoptimization* get_deopt_save_state(int slot) {
 155       // use _deopt_span to index into _deopt_states
 156       return (HSAILKernelDeoptimization*) (save_area_start() + _deopt_span * slot);
 157     }
 158 
 159     void set_cur_tlabInfos(HSAILTlabInfo** ptlabInfos) {
 160       _cur_tlab_info = ptlabInfos;
 161     }
 162 
 163     void oops_do(OopClosure* f);
 164 
 165     void* operator new (size_t hdrSize, int numSlots, int bytesPerSaveArea) {
 166       assert(hdrSize <= hdr_size(), "");
 167       size_t totalSizeBytes = hdr_size()  + numSlots * (sizeof(HSAILKernelDeoptimization) + sizeof(HSAILFrame) + bytesPerSaveArea);
 168       return NEW_C_HEAP_ARRAY(char, totalSizeBytes, mtInternal);
 169     }
 170 
 171     void operator delete (void* ptr) {
 172       FREE_C_HEAP_ARRAY(char, ptr, mtInternal);
 173     }
 174   };
 175 
 176 private:
 177 
 178   static JNINativeMethod HSAIL_methods[];
 179 
 180   // static native boolean initialize();
 181   JNIEXPORT static jboolean initialize(JNIEnv* env, jclass);
 182 
 183   // static native long generateKernel(byte[] targetCode, String name);
 184   JNIEXPORT static jlong generate_kernel(JNIEnv* env, jclass, jbyteArray code_handle, jstring name_handle);
 185 
 186   // static native boolean executeKernel0(HotSpotInstalledCode kernel, int jobSize, Object[] args);
 187   JNIEXPORT static jboolean execute_kernel_void_1d(JNIEnv* env, jclass, jobject hotspotInstalledCode, jint dimX, jobject args,
 188                                                    jint num_tlabs, int allocBytesPerWorkitem, jobject oop_map_array);
 189 
 190   static jboolean execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod* nm,
 191                                                   jint num_tlabs, int allocBytesPerWorkitem, jobject oop_map_array, TRAPS);
 192 
 193   static GraalEnv::CodeInstallResult install_code(Handle& compiled_code, CodeBlob*& cb, Handle installed_code, Handle triggered_deoptimizations);
 194 
 195 public:
 196 
 197   // Registers the implementations for the native methods in HSAILHotSpotBackend
 198   static bool register_natives(JNIEnv* env);
 199 
 200   virtual const char* name() { return "HSAIL"; }
 201 
 202   virtual void notice_safepoints();
 203   virtual void ignore_safepoints();
 204 
 205 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
 206   typedef unsigned long long CUdeviceptr;
 207 #else
 208   typedef unsigned int CUdeviceptr;
 209 #endif
 210 
 211 private:
 212 
 213   /*
 214    * Kernel launch options from okra.h
 215    */
 216   typedef struct graal_okra_range_s {
 217     uint32_t dimension; //max value is 3
 218     uint32_t global_size[3];
 219     uint32_t group_size[3];
 220     uint32_t reserved; //For future use
 221   } graal_okra_range_t;
 222 
 223   typedef jint (*okra_get_context_func_t)(void**);
 224   typedef jint (*okra_create_kernel_func_t)(void*, unsigned char*, const char*, void**);
 225   typedef jint (*okra_push_pointer_func_t)(void*, void*);
 226   typedef jint (*okra_push_boolean_func_t)(void*, jboolean);
 227   typedef jint (*okra_push_byte_func_t)(void*, jbyte);
 228   typedef jint (*okra_push_double_func_t)(void*, jdouble);
 229   typedef jint (*okra_push_float_func_t)(void*, jfloat);
 230   typedef jint (*okra_push_int_func_t)(void*, jint);
 231   typedef jint (*okra_push_long_func_t)(void*, jlong);
 232   typedef jint (*okra_execute_kernel_func_t)(void*, void*, graal_okra_range_t*);
 233   typedef jint (*okra_clear_args_func_t)(void*);
 234   typedef jint (*okra_dispose_kernel_func_t)(void*);
 235   typedef jint (*okra_dispose_context_func_t)(void*);
 236 
 237 public:
 238   static okra_get_context_func_t                _okra_get_context;
 239   static okra_create_kernel_func_t              _okra_create_kernel;
 240   static okra_push_pointer_func_t               _okra_push_pointer;
 241   static okra_push_boolean_func_t               _okra_push_boolean;
 242   static okra_push_byte_func_t                  _okra_push_byte;
 243   static okra_push_double_func_t                _okra_push_double;
 244   static okra_push_float_func_t                 _okra_push_float;
 245   static okra_push_int_func_t                   _okra_push_int;
 246   static okra_push_long_func_t                  _okra_push_long;
 247   static okra_execute_kernel_func_t             _okra_execute_kernel;
 248   static okra_clear_args_func_t                 _okra_clear_args;
 249   static okra_dispose_kernel_func_t             _okra_dispose_kernel;
 250   static okra_dispose_context_func_t            _okra_dispose_context;
 251   
 252 protected:
 253   static void* _device_context;
 254 
 255   // true if safepoints are activated
 256   static jint _notice_safepoints;
 257 };
 258 #endif // GPU_HSAIL_VM_GPU_HSAIL_HPP