1 /* 2 * Copyright (c) 2016, 2019, Red Hat, Inc. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef SHARE_GC_SHENANDOAH_SHENANDOAHTASKQUEUE_HPP 26 #define SHARE_GC_SHENANDOAH_SHENANDOAHTASKQUEUE_HPP 27 28 #include "gc/shared/taskTerminator.hpp" 29 #include "gc/shared/taskqueue.hpp" 30 #include "memory/allocation.hpp" 31 #include "runtime/atomic.hpp" 32 #include "runtime/mutex.hpp" 33 #include "runtime/thread.hpp" 34 35 template<class E, MEMFLAGS F, unsigned int N = TASKQUEUE_SIZE> 36 class BufferedOverflowTaskQueue: public OverflowTaskQueue<E, F, N> 37 { 38 public: 39 typedef OverflowTaskQueue<E, F, N> taskqueue_t; 40 41 BufferedOverflowTaskQueue() : _buf_empty(true) {}; 42 43 TASKQUEUE_STATS_ONLY(using taskqueue_t::stats;) 44 45 // Push task t into the queue. Returns true on success. 46 inline bool push(E t); 47 48 // Attempt to pop from the queue. Returns true on success. 49 inline bool pop(E &t); 50 51 inline void clear(); 52 53 inline bool is_empty() const { 54 return _buf_empty && taskqueue_t::is_empty(); 55 } 56 57 private: 58 bool _buf_empty; 59 E _elem; 60 }; 61 62 // ObjArrayChunkedTask 63 // 64 // Encodes both regular oops, and the array oops plus chunking data for parallel array processing. 65 // The design goal is to make the regular oop ops very fast, because that would be the prevailing 66 // case. On the other hand, it should not block parallel array processing from efficiently dividing 67 // the array work. 68 // 69 // The idea is to steal the bits from the 64-bit oop to encode array data, if needed. For the 70 // proper divide-and-conquer strategies, we want to encode the "blocking" data. It turns out, the 71 // most efficient way to do this is to encode the array block as (chunk * 2^pow), where it is assumed 72 // that the block has the size of 2^pow. This requires for pow to have only 5 bits (2^32) to encode 73 // all possible arrays. 74 // 75 // |---------oop---------|-pow-|--chunk---| 76 // 0 49 54 64 77 // 78 // By definition, chunk == 0 means "no chunk", i.e. chunking starts from 1. 79 // 80 // This encoding gives a few interesting benefits: 81 // 82 // a) Encoding/decoding regular oops is very simple, because the upper bits are zero in that task: 83 // 84 // |---------oop---------|00000|0000000000| // no chunk data 85 // 86 // This helps the most ubiquitous path. The initialization amounts to putting the oop into the word 87 // with zero padding. Testing for "chunkedness" is testing for zero with chunk mask. 88 // 89 // b) Splitting tasks for divide-and-conquer is possible. Suppose we have chunk <C, P> that covers 90 // interval [ (C-1)*2^P; C*2^P ). We can then split it into two chunks: 91 // <2*C - 1, P-1>, that covers interval [ (2*C - 2)*2^(P-1); (2*C - 1)*2^(P-1) ) 92 // <2*C, P-1>, that covers interval [ (2*C - 1)*2^(P-1); 2*C*2^(P-1) ) 93 // 94 // Observe that the union of these two intervals is: 95 // [ (2*C - 2)*2^(P-1); 2*C*2^(P-1) ) 96 // 97 // ...which is the original interval: 98 // [ (C-1)*2^P; C*2^P ) 99 // 100 // c) The divide-and-conquer strategy could even start with chunk <1, round-log2-len(arr)>, and split 101 // down in the parallel threads, which alleviates the upfront (serial) splitting costs. 102 // 103 // Encoding limitations caused by current bitscales mean: 104 // 10 bits for chunk: max 1024 blocks per array 105 // 5 bits for power: max 2^32 array 106 // 49 bits for oop: max 512 TB of addressable space 107 // 108 // Stealing bits from oop trims down the addressable space. Stealing too few bits for chunk ID limits 109 // potential parallelism. Stealing too few bits for pow limits the maximum array size that can be handled. 110 // In future, these might be rebalanced to favor one degree of freedom against another. For example, 111 // if/when Arrays 2.0 bring 2^64-sized arrays, we might need to steal another bit for power. We could regain 112 // some bits back if chunks are counted in ObjArrayMarkingStride units. 113 // 114 // There is also a fallback version that uses plain fields, when we don't have enough space to steal the 115 // bits from the native pointer. It is useful to debug the optimized version. 116 // 117 118 #ifdef _MSC_VER 119 #pragma warning(push) 120 // warning C4522: multiple assignment operators specified 121 #pragma warning( disable:4522 ) 122 #endif 123 124 #ifdef _LP64 125 #define SHENANDOAH_OPTIMIZED_OBJTASK 1 126 #else 127 #define SHENANDOAH_OPTIMIZED_OBJTASK 0 128 #endif 129 130 #if SHENANDOAH_OPTIMIZED_OBJTASK 131 class ObjArrayChunkedTask 132 { 133 public: 134 enum { 135 chunk_bits = 10, 136 pow_bits = 5, 137 oop_bits = sizeof(uintptr_t)*8 - chunk_bits - pow_bits 138 }; 139 enum { 140 oop_shift = 0, 141 pow_shift = oop_shift + oop_bits, 142 chunk_shift = pow_shift + pow_bits 143 }; 144 145 public: 146 ObjArrayChunkedTask(oop o = NULL) { 147 assert(decode_oop(encode_oop(o)) == o, "oop can be encoded: " PTR_FORMAT, p2i(o)); 148 _obj = encode_oop(o); 149 } 150 ObjArrayChunkedTask(oop o, int chunk, int pow) { 151 assert(decode_oop(encode_oop(o)) == o, "oop can be encoded: " PTR_FORMAT, p2i(o)); 152 assert(decode_chunk(encode_chunk(chunk)) == chunk, "chunk can be encoded: %d", chunk); 153 assert(decode_pow(encode_pow(pow)) == pow, "pow can be encoded: %d", pow); 154 _obj = encode_oop(o) | encode_chunk(chunk) | encode_pow(pow); 155 } 156 ObjArrayChunkedTask(const ObjArrayChunkedTask& t): _obj(t._obj) { } 157 158 ObjArrayChunkedTask& operator =(const ObjArrayChunkedTask& t) { 159 _obj = t._obj; 160 return *this; 161 } 162 volatile ObjArrayChunkedTask& 163 operator =(const volatile ObjArrayChunkedTask& t) volatile { 164 (void)const_cast<uintptr_t&>(_obj = t._obj); 165 return *this; 166 } 167 168 inline oop decode_oop(uintptr_t val) const { 169 return (oop) reinterpret_cast<void*>((val >> oop_shift) & right_n_bits(oop_bits)); 170 } 171 172 inline int decode_chunk(uintptr_t val) const { 173 return (int) ((val >> chunk_shift) & right_n_bits(chunk_bits)); 174 } 175 176 inline int decode_pow(uintptr_t val) const { 177 return (int) ((val >> pow_shift) & right_n_bits(pow_bits)); 178 } 179 180 inline uintptr_t encode_oop(oop obj) const { 181 return ((uintptr_t)(void*) obj) << oop_shift; 182 } 183 184 inline uintptr_t encode_chunk(int chunk) const { 185 return ((uintptr_t) chunk) << chunk_shift; 186 } 187 188 inline uintptr_t encode_pow(int pow) const { 189 return ((uintptr_t) pow) << pow_shift; 190 } 191 192 inline oop obj() const { return decode_oop(_obj); } 193 inline int chunk() const { return decode_chunk(_obj); } 194 inline int pow() const { return decode_pow(_obj); } 195 inline bool is_not_chunked() const { return (_obj & ~right_n_bits(oop_bits + pow_bits)) == 0; } 196 197 DEBUG_ONLY(bool is_valid() const); // Tasks to be pushed/popped must be valid. 198 199 static uintptr_t max_addressable() { 200 return nth_bit(oop_bits); 201 } 202 203 static int chunk_size() { 204 return nth_bit(chunk_bits); 205 } 206 207 private: 208 uintptr_t _obj; 209 }; 210 #else 211 class ObjArrayChunkedTask 212 { 213 public: 214 enum { 215 chunk_bits = 10, 216 pow_bits = 5, 217 }; 218 public: 219 ObjArrayChunkedTask(oop o = NULL, int chunk = 0, int pow = 0): _obj(o) { 220 assert(0 <= chunk && chunk < nth_bit(chunk_bits), "chunk is sane: %d", chunk); 221 assert(0 <= pow && pow < nth_bit(pow_bits), "pow is sane: %d", pow); 222 _chunk = chunk; 223 _pow = pow; 224 } 225 ObjArrayChunkedTask(const ObjArrayChunkedTask& t): _obj(t._obj), _chunk(t._chunk), _pow(t._pow) { } 226 227 ObjArrayChunkedTask& operator =(const ObjArrayChunkedTask& t) { 228 _obj = t._obj; 229 _chunk = t._chunk; 230 _pow = t._pow; 231 return *this; 232 } 233 volatile ObjArrayChunkedTask& 234 operator =(const volatile ObjArrayChunkedTask& t) volatile { 235 (void)const_cast<oop&>(_obj = t._obj); 236 _chunk = t._chunk; 237 _pow = t._pow; 238 return *this; 239 } 240 241 inline oop obj() const { return _obj; } 242 inline int chunk() const { return _chunk; } 243 inline int pow() const { return _pow; } 244 245 inline bool is_not_chunked() const { return _chunk == 0; } 246 247 DEBUG_ONLY(bool is_valid() const); // Tasks to be pushed/popped must be valid. 248 249 static size_t max_addressable() { 250 return sizeof(oop); 251 } 252 253 static int chunk_size() { 254 return nth_bit(chunk_bits); 255 } 256 257 private: 258 oop _obj; 259 int _chunk; 260 int _pow; 261 }; 262 #endif // SHENANDOAH_OPTIMIZED_OBJTASK 263 264 #ifdef _MSC_VER 265 #pragma warning(pop) 266 #endif 267 268 typedef ObjArrayChunkedTask ShenandoahMarkTask; 269 typedef BufferedOverflowTaskQueue<ShenandoahMarkTask, mtGC> ShenandoahBufferedOverflowTaskQueue; 270 typedef Padded<ShenandoahBufferedOverflowTaskQueue> ShenandoahObjToScanQueue; 271 272 template <class T, MEMFLAGS F> 273 class ParallelClaimableQueueSet: public GenericTaskQueueSet<T, F> { 274 private: 275 DEFINE_PAD_MINUS_SIZE(0, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile jint)); 276 volatile jint _claimed_index; 277 DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, 0); 278 279 debug_only(uint _reserved; ) 280 281 public: 282 using GenericTaskQueueSet<T, F>::size; 283 284 public: 285 ParallelClaimableQueueSet(int n) : GenericTaskQueueSet<T, F>(n), _claimed_index(0) { 286 debug_only(_reserved = 0; ) 287 } 288 289 void clear_claimed() { _claimed_index = 0; } 290 T* claim_next(); 291 292 // reserve queues that not for parallel claiming 293 void reserve(uint n) { 294 assert(n <= size(), "Sanity"); 295 _claimed_index = (jint)n; 296 debug_only(_reserved = n;) 297 } 298 299 debug_only(uint get_reserved() const { return (uint)_reserved; }) 300 }; 301 302 template <class T, MEMFLAGS F> 303 T* ParallelClaimableQueueSet<T, F>::claim_next() { 304 jint size = (jint)GenericTaskQueueSet<T, F>::size(); 305 306 if (_claimed_index >= size) { 307 return NULL; 308 } 309 310 jint index = Atomic::add(&_claimed_index, 1); 311 312 if (index <= size) { 313 return GenericTaskQueueSet<T, F>::queue((uint)index - 1); 314 } else { 315 return NULL; 316 } 317 } 318 319 class ShenandoahObjToScanQueueSet: public ParallelClaimableQueueSet<ShenandoahObjToScanQueue, mtGC> { 320 public: 321 ShenandoahObjToScanQueueSet(int n) : ParallelClaimableQueueSet<ShenandoahObjToScanQueue, mtGC>(n) {} 322 323 bool is_empty(); 324 void clear(); 325 326 #if TASKQUEUE_STATS 327 static void print_taskqueue_stats_hdr(outputStream* const st); 328 void print_taskqueue_stats() const; 329 void reset_taskqueue_stats(); 330 #endif // TASKQUEUE_STATS 331 }; 332 333 class ShenandoahTerminatorTerminator : public TerminatorTerminator { 334 private: 335 ShenandoahHeap* _heap; 336 public: 337 ShenandoahTerminatorTerminator(ShenandoahHeap* const heap) : _heap(heap) { } 338 // return true, terminates immediately, even if there's remaining work left 339 virtual bool should_exit_termination() { return _heap->cancelled_gc(); } 340 }; 341 342 class ShenandoahTaskTerminator : public StackObj { 343 private: 344 TaskTerminator _terminator; 345 public: 346 ShenandoahTaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set); 347 348 bool offer_termination(ShenandoahTerminatorTerminator* terminator) { 349 return _terminator.offer_termination(terminator); 350 } 351 352 void reset_for_reuse() { _terminator.reset_for_reuse(); } 353 bool offer_termination() { return offer_termination((ShenandoahTerminatorTerminator*)NULL); } 354 }; 355 356 #endif // SHARE_GC_SHENANDOAH_SHENANDOAHTASKQUEUE_HPP