1 /* 2 * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "gc/g1/g1BufferNodeList.hpp" 27 #include "gc/g1/g1CardTableEntryClosure.hpp" 28 #include "gc/g1/g1CollectedHeap.inline.hpp" 29 #include "gc/g1/g1DirtyCardQueue.hpp" 30 #include "gc/g1/g1FreeIdSet.hpp" 31 #include "gc/g1/g1RedirtyCardsQueue.hpp" 32 #include "gc/g1/g1RemSet.hpp" 33 #include "gc/g1/g1ThreadLocalData.hpp" 34 #include "gc/g1/heapRegionRemSet.hpp" 35 #include "gc/shared/suspendibleThreadSet.hpp" 36 #include "gc/shared/workgroup.hpp" 37 #include "runtime/flags/flagSetting.hpp" 38 #include "runtime/mutexLocker.hpp" 39 #include "runtime/orderAccess.hpp" 40 #include "runtime/os.hpp" 41 #include "runtime/safepoint.hpp" 42 #include "runtime/thread.inline.hpp" 43 #include "runtime/threadSMR.hpp" 44 #include "utilities/quickSort.hpp" 45 46 G1DirtyCardQueue::G1DirtyCardQueue(G1DirtyCardQueueSet* qset) : 47 // Dirty card queues are always active, so we create them with their 48 // active field set to true. 49 PtrQueue(qset, true /* active */) 50 { } 51 52 G1DirtyCardQueue::~G1DirtyCardQueue() { 53 flush(); 54 } 55 56 void G1DirtyCardQueue::handle_completed_buffer() { 57 assert(_buf != NULL, "precondition"); 58 BufferNode* node = BufferNode::make_node_from_buffer(_buf, index()); 59 G1DirtyCardQueueSet* dcqs = dirty_card_qset(); 60 if (dcqs->process_or_enqueue_completed_buffer(node)) { 61 reset(); // Buffer fully processed, reset index. 62 } else { 63 allocate_buffer(); // Buffer enqueued, get a new one. 64 } 65 } 66 67 // Assumed to be zero by concurrent threads. 68 static uint par_ids_start() { return 0; } 69 70 G1DirtyCardQueueSet::G1DirtyCardQueueSet(Monitor* cbl_mon, 71 BufferNode::Allocator* allocator) : 72 PtrQueueSet(allocator), 73 _cbl_mon(cbl_mon), 74 _completed_buffers_head(NULL), 75 _completed_buffers_tail(NULL), 76 _num_cards(0), 77 _process_cards_threshold(ProcessCardsThresholdNever), 78 _process_completed_buffers(false), 79 _max_cards(MaxCardsUnlimited), 80 _max_cards_padding(0), 81 _free_ids(par_ids_start(), num_par_ids()), 82 _mutator_refined_cards_counters(NEW_C_HEAP_ARRAY(size_t, num_par_ids(), mtGC)) 83 { 84 ::memset(_mutator_refined_cards_counters, 0, num_par_ids() * sizeof(size_t)); 85 _all_active = true; 86 } 87 88 G1DirtyCardQueueSet::~G1DirtyCardQueueSet() { 89 abandon_completed_buffers(); 90 FREE_C_HEAP_ARRAY(size_t, _mutator_refined_cards_counters); 91 } 92 93 // Determines how many mutator threads can process the buffers in parallel. 94 uint G1DirtyCardQueueSet::num_par_ids() { 95 return (uint)os::initial_active_processor_count(); 96 } 97 98 size_t G1DirtyCardQueueSet::total_mutator_refined_cards() const { 99 size_t sum = 0; 100 for (uint i = 0; i < num_par_ids(); ++i) { 101 sum += _mutator_refined_cards_counters[i]; 102 } 103 return sum; 104 } 105 106 void G1DirtyCardQueueSet::handle_zero_index_for_thread(Thread* t) { 107 G1ThreadLocalData::dirty_card_queue(t).handle_zero_index(); 108 } 109 110 void G1DirtyCardQueueSet::enqueue_completed_buffer(BufferNode* cbn) { 111 MonitorLocker ml(_cbl_mon, Mutex::_no_safepoint_check_flag); 112 cbn->set_next(NULL); 113 if (_completed_buffers_tail == NULL) { 114 assert(_completed_buffers_head == NULL, "Well-formedness"); 115 _completed_buffers_head = cbn; 116 _completed_buffers_tail = cbn; 117 } else { 118 _completed_buffers_tail->set_next(cbn); 119 _completed_buffers_tail = cbn; 120 } 121 _num_cards += buffer_size() - cbn->index(); 122 123 if (!process_completed_buffers() && 124 (num_cards() > process_cards_threshold())) { 125 set_process_completed_buffers(true); 126 ml.notify_all(); 127 } 128 verify_num_cards(); 129 } 130 131 BufferNode* G1DirtyCardQueueSet::get_completed_buffer(size_t stop_at) { 132 MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); 133 134 if (num_cards() <= stop_at) { 135 return NULL; 136 } 137 138 assert(num_cards() > 0, "invariant"); 139 assert(_completed_buffers_head != NULL, "invariant"); 140 assert(_completed_buffers_tail != NULL, "invariant"); 141 142 BufferNode* bn = _completed_buffers_head; 143 _num_cards -= buffer_size() - bn->index(); 144 _completed_buffers_head = bn->next(); 145 if (_completed_buffers_head == NULL) { 146 assert(num_cards() == 0, "invariant"); 147 _completed_buffers_tail = NULL; 148 set_process_completed_buffers(false); 149 } 150 verify_num_cards(); 151 bn->set_next(NULL); 152 return bn; 153 } 154 155 #ifdef ASSERT 156 void G1DirtyCardQueueSet::verify_num_cards() const { 157 size_t actual = 0; 158 BufferNode* cur = _completed_buffers_head; 159 while (cur != NULL) { 160 actual += buffer_size() - cur->index(); 161 cur = cur->next(); 162 } 163 assert(actual == _num_cards, 164 "Num entries in completed buffers should be " SIZE_FORMAT " but are " SIZE_FORMAT, 165 _num_cards, actual); 166 } 167 #endif 168 169 void G1DirtyCardQueueSet::abandon_completed_buffers() { 170 BufferNode* buffers_to_delete = NULL; 171 { 172 MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); 173 buffers_to_delete = _completed_buffers_head; 174 _completed_buffers_head = NULL; 175 _completed_buffers_tail = NULL; 176 _num_cards = 0; 177 set_process_completed_buffers(false); 178 } 179 while (buffers_to_delete != NULL) { 180 BufferNode* bn = buffers_to_delete; 181 buffers_to_delete = bn->next(); 182 bn->set_next(NULL); 183 deallocate_buffer(bn); 184 } 185 } 186 187 void G1DirtyCardQueueSet::notify_if_necessary() { 188 MonitorLocker ml(_cbl_mon, Mutex::_no_safepoint_check_flag); 189 if (num_cards() > process_cards_threshold()) { 190 set_process_completed_buffers(true); 191 ml.notify_all(); 192 } 193 } 194 195 // Merge lists of buffers. Notify the processing threads. 196 // The source queue is emptied as a result. The queues 197 // must share the monitor. 198 void G1DirtyCardQueueSet::merge_bufferlists(G1RedirtyCardsQueueSet* src) { 199 assert(allocator() == src->allocator(), "precondition"); 200 const G1BufferNodeList from = src->take_all_completed_buffers(); 201 if (from._head == NULL) return; 202 203 MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); 204 if (_completed_buffers_tail == NULL) { 205 assert(_completed_buffers_head == NULL, "Well-formedness"); 206 _completed_buffers_head = from._head; 207 _completed_buffers_tail = from._tail; 208 } else { 209 assert(_completed_buffers_head != NULL, "Well formedness"); 210 _completed_buffers_tail->set_next(from._head); 211 _completed_buffers_tail = from._tail; 212 } 213 _num_cards += from._entry_count; 214 215 assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL || 216 _completed_buffers_head != NULL && _completed_buffers_tail != NULL, 217 "Sanity"); 218 verify_num_cards(); 219 } 220 221 G1BufferNodeList G1DirtyCardQueueSet::take_all_completed_buffers() { 222 MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); 223 G1BufferNodeList result(_completed_buffers_head, _completed_buffers_tail, _num_cards); 224 _completed_buffers_head = NULL; 225 _completed_buffers_tail = NULL; 226 _num_cards = 0; 227 return result; 228 } 229 230 class G1RefineBufferedCards : public StackObj { 231 BufferNode* const _node; 232 CardTable::CardValue** const _node_buffer; 233 const size_t _node_buffer_size; 234 const uint _worker_id; 235 size_t* _total_refined_cards; 236 G1RemSet* const _g1rs; 237 238 static inline int compare_card(const CardTable::CardValue* p1, 239 const CardTable::CardValue* p2) { 240 return p2 - p1; 241 } 242 243 // Sorts the cards from start_index to _node_buffer_size in *decreasing* 244 // address order. Tests showed that this order is preferable to not sorting 245 // or increasing address order. 246 void sort_cards(size_t start_index) { 247 QuickSort::sort(&_node_buffer[start_index], 248 _node_buffer_size - start_index, 249 compare_card, 250 false); 251 } 252 253 // Returns the index to the first clean card in the buffer. 254 size_t clean_cards() { 255 const size_t start = _node->index(); 256 assert(start <= _node_buffer_size, "invariant"); 257 258 // Two-fingered compaction algorithm similar to the filtering mechanism in 259 // SATBMarkQueue. The main difference is that clean_card_before_refine() 260 // could change the buffer element in-place. 261 // We don't check for SuspendibleThreadSet::should_yield(), because 262 // cleaning and redirtying the cards is fast. 263 CardTable::CardValue** src = &_node_buffer[start]; 264 CardTable::CardValue** dst = &_node_buffer[_node_buffer_size]; 265 assert(src <= dst, "invariant"); 266 for ( ; src < dst; ++src) { 267 // Search low to high for a card to keep. 268 if (_g1rs->clean_card_before_refine(src)) { 269 // Found keeper. Search high to low for a card to discard. 270 while (src < --dst) { 271 if (!_g1rs->clean_card_before_refine(dst)) { 272 *dst = *src; // Replace discard with keeper. 273 break; 274 } 275 } 276 // If discard search failed (src == dst), the outer loop will also end. 277 } 278 } 279 280 // dst points to the first retained clean card, or the end of the buffer 281 // if all the cards were discarded. 282 const size_t first_clean = dst - _node_buffer; 283 assert(first_clean >= start && first_clean <= _node_buffer_size, "invariant"); 284 // Discarded cards are considered as refined. 285 *_total_refined_cards += first_clean - start; 286 return first_clean; 287 } 288 289 bool refine_cleaned_cards(size_t start_index) { 290 bool result = true; 291 size_t i = start_index; 292 for ( ; i < _node_buffer_size; ++i) { 293 if (SuspendibleThreadSet::should_yield()) { 294 redirty_unrefined_cards(i); 295 result = false; 296 break; 297 } 298 _g1rs->refine_card_concurrently(_node_buffer[i], _worker_id); 299 } 300 _node->set_index(i); 301 *_total_refined_cards += i - start_index; 302 return result; 303 } 304 305 void redirty_unrefined_cards(size_t start) { 306 for ( ; start < _node_buffer_size; ++start) { 307 *_node_buffer[start] = G1CardTable::dirty_card_val(); 308 } 309 } 310 311 public: 312 G1RefineBufferedCards(BufferNode* node, 313 size_t node_buffer_size, 314 uint worker_id, 315 size_t* total_refined_cards) : 316 _node(node), 317 _node_buffer(reinterpret_cast<CardTable::CardValue**>(BufferNode::make_buffer_from_node(node))), 318 _node_buffer_size(node_buffer_size), 319 _worker_id(worker_id), 320 _total_refined_cards(total_refined_cards), 321 _g1rs(G1CollectedHeap::heap()->rem_set()) {} 322 323 bool refine() { 324 size_t first_clean_index = clean_cards(); 325 if (first_clean_index == _node_buffer_size) { 326 _node->set_index(first_clean_index); 327 return true; 328 } 329 // This fence serves two purposes. First, the cards must be cleaned 330 // before processing the contents. Second, we can't proceed with 331 // processing a region until after the read of the region's top in 332 // collect_and_clean_cards(), for synchronization with possibly concurrent 333 // humongous object allocation (see comment at the StoreStore fence before 334 // setting the regions' tops in humongous allocation path). 335 // It's okay that reading region's top and reading region's type were racy 336 // wrto each other. We need both set, in any order, to proceed. 337 OrderAccess::fence(); 338 sort_cards(first_clean_index); 339 return refine_cleaned_cards(first_clean_index); 340 } 341 }; 342 343 bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node, 344 uint worker_id, 345 size_t* total_refined_cards) { 346 G1RefineBufferedCards buffered_cards(node, 347 buffer_size(), 348 worker_id, 349 total_refined_cards); 350 return buffered_cards.refine(); 351 } 352 353 #ifndef ASSERT 354 #define assert_fully_consumed(node, buffer_size) 355 #else 356 #define assert_fully_consumed(node, buffer_size) \ 357 do { \ 358 size_t _afc_index = (node)->index(); \ 359 size_t _afc_size = (buffer_size); \ 360 assert(_afc_index == _afc_size, \ 361 "Buffer was not fully consumed as claimed: index: " \ 362 SIZE_FORMAT ", size: " SIZE_FORMAT, \ 363 _afc_index, _afc_size); \ 364 } while (0) 365 #endif // ASSERT 366 367 bool G1DirtyCardQueueSet::process_or_enqueue_completed_buffer(BufferNode* node) { 368 if (Thread::current()->is_Java_thread()) { 369 // If the number of buffers exceeds the limit, make this Java 370 // thread do the processing itself. We don't lock to access 371 // buffer count or padding; it is fine to be imprecise here. The 372 // add of padding could overflow, which is treated as unlimited. 373 size_t limit = max_cards() + max_cards_padding(); 374 if ((num_cards() > limit) && (limit >= max_cards())) { 375 if (mut_process_buffer(node)) { 376 return true; 377 } 378 } 379 } 380 enqueue_completed_buffer(node); 381 return false; 382 } 383 384 bool G1DirtyCardQueueSet::mut_process_buffer(BufferNode* node) { 385 uint worker_id = _free_ids.claim_par_id(); // temporarily claim an id 386 uint counter_index = worker_id - par_ids_start(); 387 size_t* counter = &_mutator_refined_cards_counters[counter_index]; 388 bool result = refine_buffer(node, worker_id, counter); 389 _free_ids.release_par_id(worker_id); // release the id 390 391 if (result) { 392 assert_fully_consumed(node, buffer_size()); 393 } 394 return result; 395 } 396 397 bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id, 398 size_t stop_at, 399 size_t* total_refined_cards) { 400 BufferNode* node = get_completed_buffer(stop_at); 401 if (node == NULL) { 402 return false; 403 } else if (refine_buffer(node, worker_id, total_refined_cards)) { 404 assert_fully_consumed(node, buffer_size()); 405 // Done with fully processed buffer. 406 deallocate_buffer(node); 407 return true; 408 } else { 409 // Return partially processed buffer to the queue. 410 enqueue_completed_buffer(node); 411 return true; 412 } 413 } 414 415 void G1DirtyCardQueueSet::abandon_logs() { 416 assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); 417 abandon_completed_buffers(); 418 419 // Since abandon is done only at safepoints, we can safely manipulate 420 // these queues. 421 struct AbandonThreadLogClosure : public ThreadClosure { 422 virtual void do_thread(Thread* t) { 423 G1ThreadLocalData::dirty_card_queue(t).reset(); 424 } 425 } closure; 426 Threads::threads_do(&closure); 427 428 G1BarrierSet::shared_dirty_card_queue().reset(); 429 } 430 431 void G1DirtyCardQueueSet::concatenate_logs() { 432 // Iterate over all the threads, if we find a partial log add it to 433 // the global list of logs. Temporarily turn off the limit on the number 434 // of outstanding buffers. 435 assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); 436 size_t old_limit = max_cards(); 437 set_max_cards(MaxCardsUnlimited); 438 439 struct ConcatenateThreadLogClosure : public ThreadClosure { 440 virtual void do_thread(Thread* t) { 441 G1DirtyCardQueue& dcq = G1ThreadLocalData::dirty_card_queue(t); 442 if (!dcq.is_empty()) { 443 dcq.flush(); 444 } 445 } 446 } closure; 447 Threads::threads_do(&closure); 448 449 G1BarrierSet::shared_dirty_card_queue().flush(); 450 set_max_cards(old_limit); 451 }