1 /* 2 * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "gc/g1/g1BufferNodeList.hpp" 27 #include "gc/g1/g1CardTableEntryClosure.hpp" 28 #include "gc/g1/g1CollectedHeap.inline.hpp" 29 #include "gc/g1/g1DirtyCardQueue.hpp" 30 #include "gc/g1/g1FreeIdSet.hpp" 31 #include "gc/g1/g1RedirtyCardsQueue.hpp" 32 #include "gc/g1/g1RemSet.hpp" 33 #include "gc/g1/g1ThreadLocalData.hpp" 34 #include "gc/g1/heapRegionRemSet.hpp" 35 #include "gc/shared/suspendibleThreadSet.hpp" 36 #include "gc/shared/workgroup.hpp" 37 #include "memory/iterator.hpp" 38 #include "runtime/flags/flagSetting.hpp" 39 #include "runtime/mutexLocker.hpp" 40 #include "runtime/orderAccess.hpp" 41 #include "runtime/os.hpp" 42 #include "runtime/safepoint.hpp" 43 #include "runtime/thread.inline.hpp" 44 #include "runtime/threadSMR.hpp" 45 #include "utilities/quickSort.hpp" 46 47 G1DirtyCardQueue::G1DirtyCardQueue(G1DirtyCardQueueSet* qset) : 48 // Dirty card queues are always active, so we create them with their 49 // active field set to true. 50 PtrQueue(qset, true /* active */) 51 { } 52 53 G1DirtyCardQueue::~G1DirtyCardQueue() { 54 flush(); 55 } 56 57 void G1DirtyCardQueue::handle_completed_buffer() { 58 assert(_buf != NULL, "precondition"); 59 BufferNode* node = BufferNode::make_node_from_buffer(_buf, index()); 60 G1DirtyCardQueueSet* dcqs = dirty_card_qset(); 61 if (dcqs->process_or_enqueue_completed_buffer(node)) { 62 reset(); // Buffer fully processed, reset index. 63 } else { 64 allocate_buffer(); // Buffer enqueued, get a new one. 65 } 66 } 67 68 // Assumed to be zero by concurrent threads. 69 static uint par_ids_start() { return 0; } 70 71 G1DirtyCardQueueSet::G1DirtyCardQueueSet(Monitor* cbl_mon, 72 BufferNode::Allocator* allocator) : 73 PtrQueueSet(allocator), 74 _cbl_mon(cbl_mon), 75 _completed_buffers_head(NULL), 76 _completed_buffers_tail(NULL), 77 _num_cards(0), 78 _process_cards_threshold(ProcessCardsThresholdNever), 79 _process_completed_buffers(false), 80 _max_cards(MaxCardsUnlimited), 81 _max_cards_padding(0), 82 _free_ids(par_ids_start(), num_par_ids()), 83 _mutator_refined_cards_counters(NEW_C_HEAP_ARRAY(size_t, num_par_ids(), mtGC)) 84 { 85 ::memset(_mutator_refined_cards_counters, 0, num_par_ids() * sizeof(size_t)); 86 _all_active = true; 87 } 88 89 G1DirtyCardQueueSet::~G1DirtyCardQueueSet() { 90 abandon_completed_buffers(); 91 FREE_C_HEAP_ARRAY(size_t, _mutator_refined_cards_counters); 92 } 93 94 // Determines how many mutator threads can process the buffers in parallel. 95 uint G1DirtyCardQueueSet::num_par_ids() { 96 return (uint)os::initial_active_processor_count(); 97 } 98 99 size_t G1DirtyCardQueueSet::total_mutator_refined_cards() const { 100 size_t sum = 0; 101 for (uint i = 0; i < num_par_ids(); ++i) { 102 sum += _mutator_refined_cards_counters[i]; 103 } 104 return sum; 105 } 106 107 void G1DirtyCardQueueSet::handle_zero_index_for_thread(Thread* t) { 108 G1ThreadLocalData::dirty_card_queue(t).handle_zero_index(); 109 } 110 111 void G1DirtyCardQueueSet::enqueue_completed_buffer(BufferNode* cbn) { 112 MonitorLocker ml(_cbl_mon, Mutex::_no_safepoint_check_flag); 113 cbn->set_next(NULL); 114 if (_completed_buffers_tail == NULL) { 115 assert(_completed_buffers_head == NULL, "Well-formedness"); 116 _completed_buffers_head = cbn; 117 _completed_buffers_tail = cbn; 118 } else { 119 _completed_buffers_tail->set_next(cbn); 120 _completed_buffers_tail = cbn; 121 } 122 _num_cards += buffer_size() - cbn->index(); 123 124 if (!process_completed_buffers() && 125 (num_cards() > process_cards_threshold())) { 126 set_process_completed_buffers(true); 127 ml.notify_all(); 128 } 129 verify_num_cards(); 130 } 131 132 BufferNode* G1DirtyCardQueueSet::get_completed_buffer(size_t stop_at) { 133 MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); 134 135 if (num_cards() <= stop_at) { 136 return NULL; 137 } 138 139 assert(num_cards() > 0, "invariant"); 140 assert(_completed_buffers_head != NULL, "invariant"); 141 assert(_completed_buffers_tail != NULL, "invariant"); 142 143 BufferNode* bn = _completed_buffers_head; 144 _num_cards -= buffer_size() - bn->index(); 145 _completed_buffers_head = bn->next(); 146 if (_completed_buffers_head == NULL) { 147 assert(num_cards() == 0, "invariant"); 148 _completed_buffers_tail = NULL; 149 set_process_completed_buffers(false); 150 } 151 verify_num_cards(); 152 bn->set_next(NULL); 153 return bn; 154 } 155 156 #ifdef ASSERT 157 void G1DirtyCardQueueSet::verify_num_cards() const { 158 size_t actual = 0; 159 BufferNode* cur = _completed_buffers_head; 160 while (cur != NULL) { 161 actual += buffer_size() - cur->index(); 162 cur = cur->next(); 163 } 164 assert(actual == _num_cards, 165 "Num entries in completed buffers should be " SIZE_FORMAT " but are " SIZE_FORMAT, 166 _num_cards, actual); 167 } 168 #endif 169 170 void G1DirtyCardQueueSet::abandon_completed_buffers() { 171 BufferNode* buffers_to_delete = NULL; 172 { 173 MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); 174 buffers_to_delete = _completed_buffers_head; 175 _completed_buffers_head = NULL; 176 _completed_buffers_tail = NULL; 177 _num_cards = 0; 178 set_process_completed_buffers(false); 179 } 180 while (buffers_to_delete != NULL) { 181 BufferNode* bn = buffers_to_delete; 182 buffers_to_delete = bn->next(); 183 bn->set_next(NULL); 184 deallocate_buffer(bn); 185 } 186 } 187 188 void G1DirtyCardQueueSet::notify_if_necessary() { 189 MonitorLocker ml(_cbl_mon, Mutex::_no_safepoint_check_flag); 190 if (num_cards() > process_cards_threshold()) { 191 set_process_completed_buffers(true); 192 ml.notify_all(); 193 } 194 } 195 196 // Merge lists of buffers. Notify the processing threads. 197 // The source queue is emptied as a result. The queues 198 // must share the monitor. 199 void G1DirtyCardQueueSet::merge_bufferlists(G1RedirtyCardsQueueSet* src) { 200 assert(allocator() == src->allocator(), "precondition"); 201 const G1BufferNodeList from = src->take_all_completed_buffers(); 202 if (from._head == NULL) return; 203 204 MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); 205 if (_completed_buffers_tail == NULL) { 206 assert(_completed_buffers_head == NULL, "Well-formedness"); 207 _completed_buffers_head = from._head; 208 _completed_buffers_tail = from._tail; 209 } else { 210 assert(_completed_buffers_head != NULL, "Well formedness"); 211 _completed_buffers_tail->set_next(from._head); 212 _completed_buffers_tail = from._tail; 213 } 214 _num_cards += from._entry_count; 215 216 assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL || 217 _completed_buffers_head != NULL && _completed_buffers_tail != NULL, 218 "Sanity"); 219 verify_num_cards(); 220 } 221 222 G1BufferNodeList G1DirtyCardQueueSet::take_all_completed_buffers() { 223 MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); 224 G1BufferNodeList result(_completed_buffers_head, _completed_buffers_tail, _num_cards); 225 _completed_buffers_head = NULL; 226 _completed_buffers_tail = NULL; 227 _num_cards = 0; 228 return result; 229 } 230 231 class G1RefineBufferedCards : public StackObj { 232 BufferNode* const _node; 233 CardTable::CardValue** const _node_buffer; 234 const size_t _node_buffer_size; 235 const uint _worker_id; 236 size_t* _total_refined_cards; 237 G1RemSet* const _g1rs; 238 239 static inline int compare_card(const CardTable::CardValue* p1, 240 const CardTable::CardValue* p2) { 241 return p2 - p1; 242 } 243 244 // Sorts the cards from start_index to _node_buffer_size in *decreasing* 245 // address order. Tests showed that this order is preferable to not sorting 246 // or increasing address order. 247 void sort_cards(size_t start_index) { 248 QuickSort::sort(&_node_buffer[start_index], 249 _node_buffer_size - start_index, 250 compare_card, 251 false); 252 } 253 254 // Returns the index to the first clean card in the buffer. 255 size_t clean_cards() { 256 const size_t start = _node->index(); 257 assert(start <= _node_buffer_size, "invariant"); 258 259 // Two-fingered compaction algorithm similar to the filtering mechanism in 260 // SATBMarkQueue. The main difference is that clean_card_before_refine() 261 // could change the buffer element in-place. 262 // We don't check for SuspendibleThreadSet::should_yield(), because 263 // cleaning and redirtying the cards is fast. 264 CardTable::CardValue** src = &_node_buffer[start]; 265 CardTable::CardValue** dst = &_node_buffer[_node_buffer_size]; 266 assert(src <= dst, "invariant"); 267 for ( ; src < dst; ++src) { 268 // Search low to high for a card to keep. 269 if (_g1rs->clean_card_before_refine(src)) { 270 // Found keeper. Search high to low for a card to discard. 271 while (src < --dst) { 272 if (!_g1rs->clean_card_before_refine(dst)) { 273 *dst = *src; // Replace discard with keeper. 274 break; 275 } 276 } 277 // If discard search failed (src == dst), the outer loop will also end. 278 } 279 } 280 281 // dst points to the first retained clean card, or the end of the buffer 282 // if all the cards were discarded. 283 const size_t first_clean = dst - _node_buffer; 284 assert(first_clean >= start && first_clean <= _node_buffer_size, "invariant"); 285 // Discarded cards are considered as refined. 286 *_total_refined_cards += first_clean - start; 287 return first_clean; 288 } 289 290 bool refine_cleaned_cards(size_t start_index) { 291 bool result = true; 292 size_t i = start_index; 293 for ( ; i < _node_buffer_size; ++i) { 294 if (SuspendibleThreadSet::should_yield()) { 295 redirty_unrefined_cards(i); 296 result = false; 297 break; 298 } 299 _g1rs->refine_card_concurrently(_node_buffer[i], _worker_id); 300 } 301 _node->set_index(i); 302 *_total_refined_cards += i - start_index; 303 return result; 304 } 305 306 void redirty_unrefined_cards(size_t start) { 307 for ( ; start < _node_buffer_size; ++start) { 308 *_node_buffer[start] = G1CardTable::dirty_card_val(); 309 } 310 } 311 312 public: 313 G1RefineBufferedCards(BufferNode* node, 314 size_t node_buffer_size, 315 uint worker_id, 316 size_t* total_refined_cards) : 317 _node(node), 318 _node_buffer(reinterpret_cast<CardTable::CardValue**>(BufferNode::make_buffer_from_node(node))), 319 _node_buffer_size(node_buffer_size), 320 _worker_id(worker_id), 321 _total_refined_cards(total_refined_cards), 322 _g1rs(G1CollectedHeap::heap()->rem_set()) {} 323 324 bool refine() { 325 size_t first_clean_index = clean_cards(); 326 if (first_clean_index == _node_buffer_size) { 327 _node->set_index(first_clean_index); 328 return true; 329 } 330 // This fence serves two purposes. First, the cards must be cleaned 331 // before processing the contents. Second, we can't proceed with 332 // processing a region until after the read of the region's top in 333 // collect_and_clean_cards(), for synchronization with possibly concurrent 334 // humongous object allocation (see comment at the StoreStore fence before 335 // setting the regions' tops in humongous allocation path). 336 // It's okay that reading region's top and reading region's type were racy 337 // wrto each other. We need both set, in any order, to proceed. 338 OrderAccess::fence(); 339 sort_cards(first_clean_index); 340 return refine_cleaned_cards(first_clean_index); 341 } 342 }; 343 344 bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node, 345 uint worker_id, 346 size_t* total_refined_cards) { 347 G1RefineBufferedCards buffered_cards(node, 348 buffer_size(), 349 worker_id, 350 total_refined_cards); 351 return buffered_cards.refine(); 352 } 353 354 #ifndef ASSERT 355 #define assert_fully_consumed(node, buffer_size) 356 #else 357 #define assert_fully_consumed(node, buffer_size) \ 358 do { \ 359 size_t _afc_index = (node)->index(); \ 360 size_t _afc_size = (buffer_size); \ 361 assert(_afc_index == _afc_size, \ 362 "Buffer was not fully consumed as claimed: index: " \ 363 SIZE_FORMAT ", size: " SIZE_FORMAT, \ 364 _afc_index, _afc_size); \ 365 } while (0) 366 #endif // ASSERT 367 368 bool G1DirtyCardQueueSet::process_or_enqueue_completed_buffer(BufferNode* node) { 369 if (Thread::current()->is_Java_thread()) { 370 // If the number of buffers exceeds the limit, make this Java 371 // thread do the processing itself. We don't lock to access 372 // buffer count or padding; it is fine to be imprecise here. The 373 // add of padding could overflow, which is treated as unlimited. 374 size_t limit = max_cards() + max_cards_padding(); 375 if ((num_cards() > limit) && (limit >= max_cards())) { 376 if (mut_process_buffer(node)) { 377 return true; 378 } 379 } 380 } 381 enqueue_completed_buffer(node); 382 return false; 383 } 384 385 bool G1DirtyCardQueueSet::mut_process_buffer(BufferNode* node) { 386 uint worker_id = _free_ids.claim_par_id(); // temporarily claim an id 387 uint counter_index = worker_id - par_ids_start(); 388 size_t* counter = &_mutator_refined_cards_counters[counter_index]; 389 bool result = refine_buffer(node, worker_id, counter); 390 _free_ids.release_par_id(worker_id); // release the id 391 392 if (result) { 393 assert_fully_consumed(node, buffer_size()); 394 } 395 return result; 396 } 397 398 bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id, 399 size_t stop_at, 400 size_t* total_refined_cards) { 401 BufferNode* node = get_completed_buffer(stop_at); 402 if (node == NULL) { 403 return false; 404 } else if (refine_buffer(node, worker_id, total_refined_cards)) { 405 assert_fully_consumed(node, buffer_size()); 406 // Done with fully processed buffer. 407 deallocate_buffer(node); 408 return true; 409 } else { 410 // Return partially processed buffer to the queue. 411 enqueue_completed_buffer(node); 412 return true; 413 } 414 } 415 416 void G1DirtyCardQueueSet::abandon_logs() { 417 assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); 418 abandon_completed_buffers(); 419 420 // Since abandon is done only at safepoints, we can safely manipulate 421 // these queues. 422 struct AbandonThreadLogClosure : public ThreadClosure { 423 virtual void do_thread(Thread* t) { 424 G1ThreadLocalData::dirty_card_queue(t).reset(); 425 } 426 } closure; 427 Threads::threads_do(&closure); 428 429 G1BarrierSet::shared_dirty_card_queue().reset(); 430 } 431 432 void G1DirtyCardQueueSet::concatenate_logs() { 433 // Iterate over all the threads, if we find a partial log add it to 434 // the global list of logs. Temporarily turn off the limit on the number 435 // of outstanding buffers. 436 assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); 437 size_t old_limit = max_cards(); 438 set_max_cards(MaxCardsUnlimited); 439 440 struct ConcatenateThreadLogClosure : public ThreadClosure { 441 virtual void do_thread(Thread* t) { 442 G1DirtyCardQueue& dcq = G1ThreadLocalData::dirty_card_queue(t); 443 if (!dcq.is_empty()) { 444 dcq.flush(); 445 } 446 } 447 } closure; 448 Threads::threads_do(&closure); 449 450 G1BarrierSet::shared_dirty_card_queue().flush(); 451 set_max_cards(old_limit); 452 }