New src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp

   1 /*
   2  * Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "gc/g1/g1BufferNodeList.hpp"
  27 #include "gc/g1/g1CardTableEntryClosure.hpp"
  28 #include "gc/g1/g1CollectedHeap.inline.hpp"
  29 #include "gc/g1/g1ConcurrentRefineThread.hpp"
  30 #include "gc/g1/g1DirtyCardQueue.hpp"
  31 #include "gc/g1/g1FreeIdSet.hpp"
  32 #include "gc/g1/g1RedirtyCardsQueue.hpp"
  33 #include "gc/g1/g1RemSet.hpp"
  34 #include "gc/g1/g1ThreadLocalData.hpp"
  35 #include "gc/g1/heapRegionRemSet.hpp"
  36 #include "gc/shared/suspendibleThreadSet.hpp"
  37 #include "memory/iterator.hpp"
  38 #include "runtime/atomic.hpp"
  39 #include "runtime/os.hpp"
  40 #include "runtime/safepoint.hpp"
  41 #include "runtime/thread.inline.hpp"
  42 #include "runtime/threadSMR.hpp"
  43 #include "utilities/globalCounter.inline.hpp"
  44 #include "utilities/macros.hpp"
  45 #include "utilities/quickSort.hpp"
  46 
  47 G1DirtyCardQueue::G1DirtyCardQueue(G1DirtyCardQueueSet* qset) :
  48   // Dirty card queues are always active, so we create them with their
  49   // active field set to true.
  50   PtrQueue(qset, true /* active */)
  51 { }
  52 
  53 G1DirtyCardQueue::~G1DirtyCardQueue() {
  54   flush();
  55 }
  56 
  57 void G1DirtyCardQueue::handle_completed_buffer() {
  58   assert(_buf != NULL, "precondition");
  59   BufferNode* node = BufferNode::make_node_from_buffer(_buf, index());
  60   G1DirtyCardQueueSet* dcqs = dirty_card_qset();
  61   if (dcqs->process_or_enqueue_completed_buffer(node)) {
  62     reset();                    // Buffer fully processed, reset index.
  63   } else {
  64     allocate_buffer();          // Buffer enqueued, get a new one.
  65   }
  66 }
  67 
  68 // Assumed to be zero by concurrent threads.
  69 static uint par_ids_start() { return 0; }
  70 
  71 G1DirtyCardQueueSet::G1DirtyCardQueueSet(BufferNode::Allocator* allocator) :
  72   PtrQueueSet(allocator),
  73   _primary_refinement_thread(NULL),
  74   _num_cards(0),
  75   _completed(),
  76   _paused(),
  77   _free_ids(par_ids_start(), num_par_ids()),
  78   _process_cards_threshold(ProcessCardsThresholdNever),
  79   _max_cards(MaxCardsUnlimited),
  80   _max_cards_padding(0),
  81   _mutator_refined_cards_counters(NEW_C_HEAP_ARRAY(size_t, num_par_ids(), mtGC))
  82 {
  83   ::memset(_mutator_refined_cards_counters, 0, num_par_ids() * sizeof(size_t));
  84   _all_active = true;
  85 }
  86 
  87 G1DirtyCardQueueSet::~G1DirtyCardQueueSet() {
  88   abandon_completed_buffers();
  89   FREE_C_HEAP_ARRAY(size_t, _mutator_refined_cards_counters);
  90 }
  91 
  92 // Determines how many mutator threads can process the buffers in parallel.
  93 uint G1DirtyCardQueueSet::num_par_ids() {
  94   return (uint)os::initial_active_processor_count();
  95 }
  96 
  97 size_t G1DirtyCardQueueSet::total_mutator_refined_cards() const {
  98   size_t sum = 0;
  99   for (uint i = 0; i < num_par_ids(); ++i) {
 100     sum += _mutator_refined_cards_counters[i];
 101   }
 102   return sum;
 103 }
 104 
 105 void G1DirtyCardQueueSet::handle_zero_index_for_thread(Thread* t) {
 106   G1ThreadLocalData::dirty_card_queue(t).handle_zero_index();
 107 }
 108 
 109 #ifdef ASSERT
 110 G1DirtyCardQueueSet::Queue::~Queue() {
 111   assert(_head == NULL, "precondition");
 112   assert(_tail == NULL, "precondition");
 113 }
 114 #endif // ASSERT
 115 
 116 BufferNode* G1DirtyCardQueueSet::Queue::top() const {
 117   return Atomic::load(&_head);
 118 }
 119 
 120 // An append operation atomically exchanges the new tail with the queue tail.
 121 // It then sets the "next" value of the old tail to the head of the list being
 122 // appended; it is an invariant that the old tail's "next" value is NULL.
 123 // But if the old tail is NULL then the queue was empty.  In this case the
 124 // head of the list being appended is instead stored in the queue head; it is
 125 // an invariant that the queue head is NULL in this case.
 126 //
 127 // This means there is a period between the exchange and the old tail update
 128 // where the queue sequence is split into two parts, the list from the queue
 129 // head to the old tail, and the list being appended.  If there are concurrent
 130 // push/append operations, each may introduce another such segment.  But they
 131 // all eventually get resolved by their respective updates of their old tail's
 132 // "next" value.  This also means that pop operations must handle a buffer
 133 // with a NULL "next" value specially.
 134 //
 135 // A push operation is just a degenerate append, where the buffer being pushed
 136 // is both the head and the tail of the list being appended.
 137 void G1DirtyCardQueueSet::Queue::append(BufferNode& first, BufferNode& last) {
 138   assert(last.next() == NULL, "precondition");
 139   BufferNode* old_tail = Atomic::xchg(&_tail, &last);
 140   if (old_tail == NULL) {       // Was empty.
 141     Atomic::store(&_head, &first);
 142   } else {
 143     assert(old_tail->next() == NULL, "invariant");
 144     old_tail->set_next(&first);
 145   }
 146 }
 147 
 148 BufferNode* G1DirtyCardQueueSet::Queue::pop() {
 149   Thread* current_thread = Thread::current();
 150   while (true) {
 151     // Use a critical section per iteration, rather than over the whole
 152     // operation.  We're not guaranteed to make progress.  Lingering in one
 153     // CS could lead to excessive allocation of buffers, because the CS
 154     // blocks return of released buffers to the free list for reuse.
 155     GlobalCounter::CriticalSection cs(current_thread);
 156 
 157     BufferNode* result = Atomic::load_acquire(&_head);
 158     if (result == NULL) return NULL; // Queue is empty.
 159 
 160     BufferNode* next = Atomic::load_acquire(BufferNode::next_ptr(*result));
 161     if (next != NULL) {
 162       // The "usual" lock-free pop from the head of a singly linked list.
 163       if (result == Atomic::cmpxchg(&_head, result, next)) {
 164         // Former head successfully taken; it is not the last.
 165         assert(Atomic::load(&_tail) != result, "invariant");
 166         assert(result->next() != NULL, "invariant");
 167         result->set_next(NULL);
 168         return result;
 169       }
 170       // Lost the race; try again.
 171       continue;
 172     }
 173 
 174     // next is NULL.  This case is handled differently from the "usual"
 175     // lock-free pop from the head of a singly linked list.
 176 
 177     // If _tail == result then result is the only element in the list. We can
 178     // remove it from the list by first setting _tail to NULL and then setting
 179     // _head to NULL, the order being important.  We set _tail with cmpxchg in
 180     // case of a concurrent push/append/pop also changing _tail.  If we win
 181     // then we've claimed result.
 182     if (Atomic::cmpxchg(&_tail, result, (BufferNode*)NULL) == result) {
 183       assert(result->next() == NULL, "invariant");
 184       // Now that we've claimed result, also set _head to NULL.  But we must
 185       // be careful of a concurrent push/append after we NULLed _tail, since
 186       // it may have already performed its list-was-empty update of _head,
 187       // which we must not overwrite.
 188       Atomic::cmpxchg(&_head, result, (BufferNode*)NULL);
 189       return result;
 190     }
 191 
 192     // If _head != result then we lost the race to take result; try again.
 193     if (result != Atomic::load_acquire(&_head)) {
 194       continue;
 195     }
 196 
 197     // An in-progress concurrent operation interfered with taking the head
 198     // element when it was the only element.  A concurrent pop may have won
 199     // the race to clear the tail but not yet cleared the head. Alternatively,
 200     // a concurrent push/append may have changed the tail but not yet linked
 201     // result->next().  We cannot take result in either case.  We don't just
 202     // try again, because we could spin for a long time waiting for that
 203     // concurrent operation to finish.  In the first case, returning NULL is
 204     // fine; we lost the race for the only element to another thread.  We
 205     // also return NULL for the second case, and let the caller cope.
 206     return NULL;
 207   }
 208 }
 209 
 210 G1DirtyCardQueueSet::HeadTail G1DirtyCardQueueSet::Queue::take_all() {
 211   assert_at_safepoint();
 212   HeadTail result(Atomic::load(&_head), Atomic::load(&_tail));
 213   Atomic::store(&_head, (BufferNode*)NULL);
 214   Atomic::store(&_tail, (BufferNode*)NULL);
 215   return result;
 216 }
 217 
 218 void G1DirtyCardQueueSet::enqueue_completed_buffer(BufferNode* cbn) {
 219   assert(cbn != NULL, "precondition");
 220   // Increment _num_cards before adding to queue, so queue removal doesn't
 221   // need to deal with _num_cards possibly going negative.
 222   size_t new_num_cards = Atomic::add(&_num_cards, buffer_size() - cbn->index());
 223   _completed.push(*cbn);
 224   if ((new_num_cards > process_cards_threshold()) &&
 225       (_primary_refinement_thread != NULL)) {
 226     _primary_refinement_thread->activate();
 227   }
 228 }
 229 
 230 BufferNode* G1DirtyCardQueueSet::get_completed_buffer(size_t stop_at) {
 231   enqueue_previous_paused_buffers();
 232 
 233   // Check for insufficient cards to satisfy request.  We only do this once,
 234   // up front, rather than on each iteration below, since the test is racy
 235   // regardless of when we do it.
 236   if (Atomic::load_acquire(&_num_cards) <= stop_at) {
 237     return NULL;
 238   }
 239 
 240   BufferNode* result = _completed.pop();
 241   if (result != NULL) {
 242     Atomic::sub(&_num_cards, buffer_size() - result->index());
 243   }
 244   return result;
 245 }
 246 
 247 #ifdef ASSERT
 248 void G1DirtyCardQueueSet::verify_num_cards() const {
 249   size_t actual = 0;
 250   BufferNode* cur = _completed.top();
 251   for ( ; cur != NULL; cur = cur->next()) {
 252     actual += buffer_size() - cur->index();
 253   }
 254   assert(actual == Atomic::load(&_num_cards),
 255          "Num entries in completed buffers should be " SIZE_FORMAT " but are " SIZE_FORMAT,
 256          Atomic::load(&_num_cards), actual);
 257 }
 258 #endif // ASSERT
 259 
 260 G1DirtyCardQueueSet::PausedBuffers::PausedList::PausedList() :
 261   _head(NULL), _tail(NULL),
 262   _safepoint_id(SafepointSynchronize::safepoint_id())
 263 {}
 264 
 265 #ifdef ASSERT
 266 G1DirtyCardQueueSet::PausedBuffers::PausedList::~PausedList() {
 267   assert(Atomic::load(&_head) == NULL, "precondition");
 268   assert(_tail == NULL, "precondition");
 269 }
 270 #endif // ASSERT
 271 
 272 bool G1DirtyCardQueueSet::PausedBuffers::PausedList::is_next() const {
 273   assert_not_at_safepoint();
 274   return _safepoint_id == SafepointSynchronize::safepoint_id();
 275 }
 276 
 277 void G1DirtyCardQueueSet::PausedBuffers::PausedList::add(BufferNode* node) {
 278   assert_not_at_safepoint();
 279   assert(is_next(), "precondition");
 280   BufferNode* old_head = Atomic::xchg(&_head, node);
 281   if (old_head == NULL) {
 282     assert(_tail == NULL, "invariant");
 283     _tail = node;
 284   } else {
 285     node->set_next(old_head);
 286   }
 287 }
 288 
 289 G1DirtyCardQueueSet::HeadTail G1DirtyCardQueueSet::PausedBuffers::PausedList::take() {
 290   BufferNode* head = Atomic::load(&_head);
 291   BufferNode* tail = _tail;
 292   Atomic::store(&_head, (BufferNode*)NULL);
 293   _tail = NULL;
 294   return HeadTail(head, tail);
 295 }
 296 
 297 G1DirtyCardQueueSet::PausedBuffers::PausedBuffers() : _plist(NULL) {}
 298 
 299 #ifdef ASSERT
 300 G1DirtyCardQueueSet::PausedBuffers::~PausedBuffers() {
 301   assert(is_empty(), "invariant");
 302 }
 303 #endif // ASSERT
 304 
 305 bool G1DirtyCardQueueSet::PausedBuffers::is_empty() const {
 306   return Atomic::load(&_plist) == NULL;
 307 }
 308 
 309 void G1DirtyCardQueueSet::PausedBuffers::add(BufferNode* node) {
 310   assert_not_at_safepoint();
 311   PausedList* plist = Atomic::load_acquire(&_plist);
 312   if (plist != NULL) {
 313     // Already have a next list, so use it.  We know it's a next list because
 314     // of the precondition that take_previous() has already been called.
 315     assert(plist->is_next(), "invariant");
 316   } else {
 317     // Try to install a new next list.
 318     plist = new PausedList();
 319     PausedList* old_plist = Atomic::cmpxchg(&_plist, (PausedList*)NULL, plist);
 320     if (old_plist != NULL) {
 321       // Some other thread installed a new next list. Use it instead.
 322       delete plist;
 323       plist = old_plist;
 324     }
 325   }
 326   plist->add(node);
 327 }
 328 
 329 G1DirtyCardQueueSet::HeadTail G1DirtyCardQueueSet::PausedBuffers::take_previous() {
 330   assert_not_at_safepoint();
 331   PausedList* previous;
 332   {
 333     // Deal with plist in a critical section, to prevent it from being
 334     // deleted out from under us by a concurrent take_previous().
 335     GlobalCounter::CriticalSection cs(Thread::current());
 336     previous = Atomic::load_acquire(&_plist);
 337     if ((previous == NULL) ||   // Nothing to take.
 338         previous->is_next() ||  // Not from a previous safepoint.
 339         // Some other thread stole it.
 340         (Atomic::cmpxchg(&_plist, previous, (PausedList*)NULL) != previous)) {
 341       return HeadTail();
 342     }
 343   }
 344   // We now own previous.
 345   HeadTail result = previous->take();
 346   // There might be other threads examining previous (in concurrent
 347   // take_previous()).  Synchronize to wait until any such threads are
 348   // done with such examination before deleting.
 349   GlobalCounter::write_synchronize();
 350   delete previous;
 351   return result;
 352 }
 353 
 354 G1DirtyCardQueueSet::HeadTail G1DirtyCardQueueSet::PausedBuffers::take_all() {
 355   assert_at_safepoint();
 356   HeadTail result;
 357   PausedList* plist = Atomic::load(&_plist);
 358   if (plist != NULL) {
 359     Atomic::store(&_plist, (PausedList*)NULL);
 360     result = plist->take();
 361     delete plist;
 362   }
 363   return result;
 364 }
 365 
 366 void G1DirtyCardQueueSet::record_paused_buffer(BufferNode* node) {
 367   assert_not_at_safepoint();
 368   assert(node->next() == NULL, "precondition");
 369   // Cards for paused buffers are included in count, to contribute to
 370   // notification checking after the coming safepoint if it doesn't GC.
 371   // Note that this means the queue's _num_cards differs from the number
 372   // of cards in the queued buffers when there are paused buffers.
 373   Atomic::add(&_num_cards, buffer_size() - node->index());
 374   _paused.add(node);
 375 }
 376 
 377 void G1DirtyCardQueueSet::enqueue_paused_buffers_aux(const HeadTail& paused) {
 378   if (paused._head != NULL) {
 379     assert(paused._tail != NULL, "invariant");
 380     // Cards from paused buffers are already recorded in the queue count.
 381     _completed.append(*paused._head, *paused._tail);
 382   }
 383 }
 384 
 385 void G1DirtyCardQueueSet::enqueue_previous_paused_buffers() {
 386   assert_not_at_safepoint();
 387   // The fast-path still satisfies the precondition for record_paused_buffer
 388   // and PausedBuffers::add, even with a racy test.  If there are paused
 389   // buffers from a previous safepoint, is_empty() will return false; there
 390   // will have been a safepoint between recording and test, so there can't be
 391   // a false negative (is_empty() returns true) while such buffers are present.
 392   // If is_empty() is false, there are two cases:
 393   //
 394   // (1) There were paused buffers from a previous safepoint.  A concurrent
 395   // caller may take and enqueue them first, but that's okay; the precondition
 396   // for a possible later record_paused_buffer by this thread will still hold.
 397   //
 398   // (2) There are paused buffers for a requested next safepoint.
 399   //
 400   // In each of those cases some effort may be spent detecting and dealing
 401   // with those circumstances; any wasted effort in such cases is expected to
 402   // be well compensated by the fast path.
 403   if (!_paused.is_empty()) {
 404     enqueue_paused_buffers_aux(_paused.take_previous());
 405   }
 406 }
 407 
 408 void G1DirtyCardQueueSet::enqueue_all_paused_buffers() {
 409   assert_at_safepoint();
 410   enqueue_paused_buffers_aux(_paused.take_all());
 411 }
 412 
 413 void G1DirtyCardQueueSet::abandon_completed_buffers() {
 414   enqueue_all_paused_buffers();
 415   verify_num_cards();
 416   G1BufferNodeList list = take_all_completed_buffers();
 417   BufferNode* buffers_to_delete = list._head;
 418   while (buffers_to_delete != NULL) {
 419     BufferNode* bn = buffers_to_delete;
 420     buffers_to_delete = bn->next();
 421     bn->set_next(NULL);
 422     deallocate_buffer(bn);
 423   }
 424 }
 425 
 426 void G1DirtyCardQueueSet::notify_if_necessary() {
 427   if ((_primary_refinement_thread != NULL) &&
 428       (num_cards() > process_cards_threshold())) {
 429     _primary_refinement_thread->activate();
 430   }
 431 }
 432 
 433 // Merge lists of buffers. The source queue set is emptied as a
 434 // result. The queue sets must share the same allocator.
 435 void G1DirtyCardQueueSet::merge_bufferlists(G1RedirtyCardsQueueSet* src) {
 436   assert(allocator() == src->allocator(), "precondition");
 437   const G1BufferNodeList from = src->take_all_completed_buffers();
 438   if (from._head != NULL) {
 439     Atomic::add(&_num_cards, from._entry_count);
 440     _completed.append(*from._head, *from._tail);
 441   }
 442 }
 443 
 444 G1BufferNodeList G1DirtyCardQueueSet::take_all_completed_buffers() {
 445   enqueue_all_paused_buffers();
 446   verify_num_cards();
 447   HeadTail buffers = _completed.take_all();
 448   size_t num_cards = Atomic::load(&_num_cards);
 449   Atomic::store(&_num_cards, size_t(0));
 450   return G1BufferNodeList(buffers._head, buffers._tail, num_cards);
 451 }
 452 
 453 class G1RefineBufferedCards : public StackObj {
 454   BufferNode* const _node;
 455   CardTable::CardValue** const _node_buffer;
 456   const size_t _node_buffer_size;
 457   const uint _worker_id;
 458   size_t* _total_refined_cards;
 459   G1RemSet* const _g1rs;
 460 
 461   static inline int compare_card(const CardTable::CardValue* p1,
 462                                  const CardTable::CardValue* p2) {
 463     return p2 - p1;
 464   }
 465 
 466   // Sorts the cards from start_index to _node_buffer_size in *decreasing*
 467   // address order. Tests showed that this order is preferable to not sorting
 468   // or increasing address order.
 469   void sort_cards(size_t start_index) {
 470     QuickSort::sort(&_node_buffer[start_index],
 471                     _node_buffer_size - start_index,
 472                     compare_card,
 473                     false);
 474   }
 475 
 476   // Returns the index to the first clean card in the buffer.
 477   size_t clean_cards() {
 478     const size_t start = _node->index();
 479     assert(start <= _node_buffer_size, "invariant");
 480 
 481     // Two-fingered compaction algorithm similar to the filtering mechanism in
 482     // SATBMarkQueue. The main difference is that clean_card_before_refine()
 483     // could change the buffer element in-place.
 484     // We don't check for SuspendibleThreadSet::should_yield(), because
 485     // cleaning and redirtying the cards is fast.
 486     CardTable::CardValue** src = &_node_buffer[start];
 487     CardTable::CardValue** dst = &_node_buffer[_node_buffer_size];
 488     assert(src <= dst, "invariant");
 489     for ( ; src < dst; ++src) {
 490       // Search low to high for a card to keep.
 491       if (_g1rs->clean_card_before_refine(src)) {
 492         // Found keeper.  Search high to low for a card to discard.
 493         while (src < --dst) {
 494           if (!_g1rs->clean_card_before_refine(dst)) {
 495             *dst = *src;         // Replace discard with keeper.
 496             break;
 497           }
 498         }
 499         // If discard search failed (src == dst), the outer loop will also end.
 500       }
 501     }
 502 
 503     // dst points to the first retained clean card, or the end of the buffer
 504     // if all the cards were discarded.
 505     const size_t first_clean = dst - _node_buffer;
 506     assert(first_clean >= start && first_clean <= _node_buffer_size, "invariant");
 507     // Discarded cards are considered as refined.
 508     *_total_refined_cards += first_clean - start;
 509     return first_clean;
 510   }
 511 
 512   bool refine_cleaned_cards(size_t start_index) {
 513     bool result = true;
 514     size_t i = start_index;
 515     for ( ; i < _node_buffer_size; ++i) {
 516       if (SuspendibleThreadSet::should_yield()) {
 517         redirty_unrefined_cards(i);
 518         result = false;
 519         break;
 520       }
 521       _g1rs->refine_card_concurrently(_node_buffer[i], _worker_id);
 522     }
 523     _node->set_index(i);
 524     *_total_refined_cards += i - start_index;
 525     return result;
 526   }
 527 
 528   void redirty_unrefined_cards(size_t start) {
 529     for ( ; start < _node_buffer_size; ++start) {
 530       *_node_buffer[start] = G1CardTable::dirty_card_val();
 531     }
 532   }
 533 
 534 public:
 535   G1RefineBufferedCards(BufferNode* node,
 536                         size_t node_buffer_size,
 537                         uint worker_id,
 538                         size_t* total_refined_cards) :
 539     _node(node),
 540     _node_buffer(reinterpret_cast<CardTable::CardValue**>(BufferNode::make_buffer_from_node(node))),
 541     _node_buffer_size(node_buffer_size),
 542     _worker_id(worker_id),
 543     _total_refined_cards(total_refined_cards),
 544     _g1rs(G1CollectedHeap::heap()->rem_set()) {}
 545 
 546   bool refine() {
 547     size_t first_clean_index = clean_cards();
 548     if (first_clean_index == _node_buffer_size) {
 549       _node->set_index(first_clean_index);
 550       return true;
 551     }
 552     // This fence serves two purposes. First, the cards must be cleaned
 553     // before processing the contents. Second, we can't proceed with
 554     // processing a region until after the read of the region's top in
 555     // collect_and_clean_cards(), for synchronization with possibly concurrent
 556     // humongous object allocation (see comment at the StoreStore fence before
 557     // setting the regions' tops in humongous allocation path).
 558     // It's okay that reading region's top and reading region's type were racy
 559     // wrto each other. We need both set, in any order, to proceed.
 560     OrderAccess::fence();
 561     sort_cards(first_clean_index);
 562     return refine_cleaned_cards(first_clean_index);
 563   }
 564 };
 565 
 566 bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node,
 567                                         uint worker_id,
 568                                         size_t* total_refined_cards) {
 569   G1RefineBufferedCards buffered_cards(node,
 570                                        buffer_size(),
 571                                        worker_id,
 572                                        total_refined_cards);
 573   return buffered_cards.refine();
 574 }
 575 
 576 #ifndef ASSERT
 577 #define assert_fully_consumed(node, buffer_size)
 578 #else
 579 #define assert_fully_consumed(node, buffer_size)                \
 580   do {                                                          \
 581     size_t _afc_index = (node)->index();                        \
 582     size_t _afc_size = (buffer_size);                           \
 583     assert(_afc_index == _afc_size,                             \
 584            "Buffer was not fully consumed as claimed: index: "  \
 585            SIZE_FORMAT ", size: " SIZE_FORMAT,                  \
 586             _afc_index, _afc_size);                             \
 587   } while (0)
 588 #endif // ASSERT
 589 
 590 bool G1DirtyCardQueueSet::process_or_enqueue_completed_buffer(BufferNode* node) {
 591   if (Thread::current()->is_Java_thread()) {
 592     // If the number of buffers exceeds the limit, make this Java
 593     // thread do the processing itself.  Calculation is racy but we
 594     // don't need precision here.  The add of padding could overflow,
 595     // which is treated as unlimited.
 596     size_t limit = max_cards() + max_cards_padding();
 597     if ((num_cards() > limit) && (limit >= max_cards())) {
 598       if (mut_process_buffer(node)) {
 599         return true;
 600       }
 601       // Buffer was incompletely processed because of a pending safepoint
 602       // request.  Unlike with refinement thread processing, for mutator
 603       // processing the buffer did not come from the completed buffer queue,
 604       // so it is okay to add it to the queue rather than to the paused set.
 605       // Indeed, it can't be added to the paused set because we didn't pass
 606       // through enqueue_previous_paused_buffers.
 607     }
 608   }
 609   enqueue_completed_buffer(node);
 610   return false;
 611 }
 612 
 613 bool G1DirtyCardQueueSet::mut_process_buffer(BufferNode* node) {
 614   uint worker_id = _free_ids.claim_par_id(); // temporarily claim an id
 615   uint counter_index = worker_id - par_ids_start();
 616   size_t* counter = &_mutator_refined_cards_counters[counter_index];
 617   bool result = refine_buffer(node, worker_id, counter);
 618   _free_ids.release_par_id(worker_id); // release the id
 619 
 620   if (result) {
 621     assert_fully_consumed(node, buffer_size());
 622   }
 623   return result;
 624 }
 625 
 626 bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id,
 627                                                                size_t stop_at,
 628                                                                size_t* total_refined_cards) {
 629   BufferNode* node = get_completed_buffer(stop_at);
 630   if (node == NULL) {
 631     return false;
 632   } else if (refine_buffer(node, worker_id, total_refined_cards)) {
 633     assert_fully_consumed(node, buffer_size());
 634     // Done with fully processed buffer.
 635     deallocate_buffer(node);
 636     return true;
 637   } else {
 638     // Buffer incompletely processed because there is a pending safepoint.
 639     // Record partially processed buffer, to be finished later.
 640     record_paused_buffer(node);
 641     return true;
 642   }
 643 }
 644 
 645 void G1DirtyCardQueueSet::abandon_logs() {
 646   assert_at_safepoint();
 647   abandon_completed_buffers();
 648 
 649   // Since abandon is done only at safepoints, we can safely manipulate
 650   // these queues.
 651   struct AbandonThreadLogClosure : public ThreadClosure {
 652     virtual void do_thread(Thread* t) {
 653       G1ThreadLocalData::dirty_card_queue(t).reset();
 654     }
 655   } closure;
 656   Threads::threads_do(&closure);
 657 
 658   G1BarrierSet::shared_dirty_card_queue().reset();
 659 }
 660 
 661 void G1DirtyCardQueueSet::concatenate_logs() {
 662   // Iterate over all the threads, if we find a partial log add it to
 663   // the global list of logs.  Temporarily turn off the limit on the number
 664   // of outstanding buffers.
 665   assert_at_safepoint();
 666   size_t old_limit = max_cards();
 667   set_max_cards(MaxCardsUnlimited);
 668 
 669   struct ConcatenateThreadLogClosure : public ThreadClosure {
 670     virtual void do_thread(Thread* t) {
 671       G1DirtyCardQueue& dcq = G1ThreadLocalData::dirty_card_queue(t);
 672       if (!dcq.is_empty()) {
 673         dcq.flush();
 674       }
 675     }
 676   } closure;
 677   Threads::threads_do(&closure);
 678 
 679   G1BarrierSet::shared_dirty_card_queue().flush();
 680   enqueue_all_paused_buffers();
 681   verify_num_cards();
 682   set_max_cards(old_limit);
 683 }