1 /*
   2  * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "gc/g1/g1BufferNodeList.hpp"
  27 #include "gc/g1/g1CardTableEntryClosure.hpp"
  28 #include "gc/g1/g1CollectedHeap.inline.hpp"
  29 #include "gc/g1/g1DirtyCardQueue.hpp"
  30 #include "gc/g1/g1FreeIdSet.hpp"
  31 #include "gc/g1/g1RedirtyCardsQueue.hpp"
  32 #include "gc/g1/g1RemSet.hpp"
  33 #include "gc/g1/g1ThreadLocalData.hpp"
  34 #include "gc/g1/heapRegionRemSet.hpp"
  35 #include "gc/shared/suspendibleThreadSet.hpp"
  36 #include "gc/shared/workgroup.hpp"
  37 #include "memory/iterator.hpp"
  38 #include "runtime/flags/flagSetting.hpp"
  39 #include "runtime/mutexLocker.hpp"
  40 #include "runtime/orderAccess.hpp"
  41 #include "runtime/os.hpp"
  42 #include "runtime/safepoint.hpp"
  43 #include "runtime/thread.inline.hpp"
  44 #include "runtime/threadSMR.hpp"
  45 #include "utilities/quickSort.hpp"
  46 
  47 G1DirtyCardQueue::G1DirtyCardQueue(G1DirtyCardQueueSet* qset) :
  48   // Dirty card queues are always active, so we create them with their
  49   // active field set to true.
  50   PtrQueue(qset, true /* active */)
  51 { }
  52 
  53 G1DirtyCardQueue::~G1DirtyCardQueue() {
  54   flush();
  55 }
  56 
  57 void G1DirtyCardQueue::handle_completed_buffer() {
  58   assert(_buf != NULL, "precondition");
  59   BufferNode* node = BufferNode::make_node_from_buffer(_buf, index());
  60   G1DirtyCardQueueSet* dcqs = dirty_card_qset();
  61   if (dcqs->process_or_enqueue_completed_buffer(node)) {
  62     reset();                    // Buffer fully processed, reset index.
  63   } else {
  64     allocate_buffer();          // Buffer enqueued, get a new one.
  65   }
  66 }
  67 
  68 // Assumed to be zero by concurrent threads.
  69 static uint par_ids_start() { return 0; }
  70 
  71 G1DirtyCardQueueSet::G1DirtyCardQueueSet(Monitor* cbl_mon,
  72                                          BufferNode::Allocator* allocator) :
  73   PtrQueueSet(allocator),
  74   _cbl_mon(cbl_mon),
  75   _completed_buffers_head(NULL),
  76   _completed_buffers_tail(NULL),
  77   _num_cards(0),
  78   _process_cards_threshold(ProcessCardsThresholdNever),
  79   _process_completed_buffers(false),
  80   _max_cards(MaxCardsUnlimited),
  81   _max_cards_padding(0),
  82   _free_ids(par_ids_start(), num_par_ids()),
  83   _mutator_refined_cards_counters(NEW_C_HEAP_ARRAY(size_t, num_par_ids(), mtGC))
  84 {
  85   ::memset(_mutator_refined_cards_counters, 0, num_par_ids() * sizeof(size_t));
  86   _all_active = true;
  87 }
  88 
  89 G1DirtyCardQueueSet::~G1DirtyCardQueueSet() {
  90   abandon_completed_buffers();
  91   FREE_C_HEAP_ARRAY(size_t, _mutator_refined_cards_counters);
  92 }
  93 
  94 // Determines how many mutator threads can process the buffers in parallel.
  95 uint G1DirtyCardQueueSet::num_par_ids() {
  96   return (uint)os::initial_active_processor_count();
  97 }
  98 
  99 size_t G1DirtyCardQueueSet::total_mutator_refined_cards() const {
 100   size_t sum = 0;
 101   for (uint i = 0; i < num_par_ids(); ++i) {
 102     sum += _mutator_refined_cards_counters[i];
 103   }
 104   return sum;
 105 }
 106 
 107 void G1DirtyCardQueueSet::handle_zero_index_for_thread(Thread* t) {
 108   G1ThreadLocalData::dirty_card_queue(t).handle_zero_index();
 109 }
 110 
 111 void G1DirtyCardQueueSet::enqueue_completed_buffer(BufferNode* cbn) {
 112   MonitorLocker ml(_cbl_mon, Mutex::_no_safepoint_check_flag);
 113   cbn->set_next(NULL);
 114   if (_completed_buffers_tail == NULL) {
 115     assert(_completed_buffers_head == NULL, "Well-formedness");
 116     _completed_buffers_head = cbn;
 117     _completed_buffers_tail = cbn;
 118   } else {
 119     _completed_buffers_tail->set_next(cbn);
 120     _completed_buffers_tail = cbn;
 121   }
 122   _num_cards += buffer_size() - cbn->index();
 123 
 124   if (!process_completed_buffers() &&
 125       (num_cards() > process_cards_threshold())) {
 126     set_process_completed_buffers(true);
 127     ml.notify_all();
 128   }
 129   verify_num_cards();
 130 }
 131 
 132 BufferNode* G1DirtyCardQueueSet::get_completed_buffer(size_t stop_at) {
 133   MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
 134 
 135   if (num_cards() <= stop_at) {
 136     return NULL;
 137   }
 138 
 139   assert(num_cards() > 0, "invariant");
 140   assert(_completed_buffers_head != NULL, "invariant");
 141   assert(_completed_buffers_tail != NULL, "invariant");
 142 
 143   BufferNode* bn = _completed_buffers_head;
 144   _num_cards -= buffer_size() - bn->index();
 145   _completed_buffers_head = bn->next();
 146   if (_completed_buffers_head == NULL) {
 147     assert(num_cards() == 0, "invariant");
 148     _completed_buffers_tail = NULL;
 149     set_process_completed_buffers(false);
 150   }
 151   verify_num_cards();
 152   bn->set_next(NULL);
 153   return bn;
 154 }
 155 
 156 #ifdef ASSERT
 157 void G1DirtyCardQueueSet::verify_num_cards() const {
 158   size_t actual = 0;
 159   BufferNode* cur = _completed_buffers_head;
 160   while (cur != NULL) {
 161     actual += buffer_size() - cur->index();
 162     cur = cur->next();
 163   }
 164   assert(actual == _num_cards,
 165          "Num entries in completed buffers should be " SIZE_FORMAT " but are " SIZE_FORMAT,
 166          _num_cards, actual);
 167 }
 168 #endif
 169 
 170 void G1DirtyCardQueueSet::abandon_completed_buffers() {
 171   BufferNode* buffers_to_delete = NULL;
 172   {
 173     MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
 174     buffers_to_delete = _completed_buffers_head;
 175     _completed_buffers_head = NULL;
 176     _completed_buffers_tail = NULL;
 177     _num_cards = 0;
 178     set_process_completed_buffers(false);
 179   }
 180   while (buffers_to_delete != NULL) {
 181     BufferNode* bn = buffers_to_delete;
 182     buffers_to_delete = bn->next();
 183     bn->set_next(NULL);
 184     deallocate_buffer(bn);
 185   }
 186 }
 187 
 188 void G1DirtyCardQueueSet::notify_if_necessary() {
 189   MonitorLocker ml(_cbl_mon, Mutex::_no_safepoint_check_flag);
 190   if (num_cards() > process_cards_threshold()) {
 191     set_process_completed_buffers(true);
 192     ml.notify_all();
 193   }
 194 }
 195 
 196 // Merge lists of buffers. Notify the processing threads.
 197 // The source queue is emptied as a result. The queues
 198 // must share the monitor.
 199 void G1DirtyCardQueueSet::merge_bufferlists(G1RedirtyCardsQueueSet* src) {
 200   assert(allocator() == src->allocator(), "precondition");
 201   const G1BufferNodeList from = src->take_all_completed_buffers();
 202   if (from._head == NULL) return;
 203 
 204   MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
 205   if (_completed_buffers_tail == NULL) {
 206     assert(_completed_buffers_head == NULL, "Well-formedness");
 207     _completed_buffers_head = from._head;
 208     _completed_buffers_tail = from._tail;
 209   } else {
 210     assert(_completed_buffers_head != NULL, "Well formedness");
 211     _completed_buffers_tail->set_next(from._head);
 212     _completed_buffers_tail = from._tail;
 213   }
 214   _num_cards += from._entry_count;
 215 
 216   assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL ||
 217          _completed_buffers_head != NULL && _completed_buffers_tail != NULL,
 218          "Sanity");
 219   verify_num_cards();
 220 }
 221 
 222 G1BufferNodeList G1DirtyCardQueueSet::take_all_completed_buffers() {
 223   MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
 224   G1BufferNodeList result(_completed_buffers_head, _completed_buffers_tail, _num_cards);
 225   _completed_buffers_head = NULL;
 226   _completed_buffers_tail = NULL;
 227   _num_cards = 0;
 228   return result;
 229 }
 230 
 231 class G1RefineBufferedCards : public StackObj {
 232   BufferNode* const _node;
 233   CardTable::CardValue** const _node_buffer;
 234   const size_t _node_buffer_size;
 235   const uint _worker_id;
 236   size_t* _total_refined_cards;
 237   G1RemSet* const _g1rs;
 238 
 239   static inline int compare_card(const CardTable::CardValue* p1,
 240                                  const CardTable::CardValue* p2) {
 241     return p2 - p1;
 242   }
 243 
 244   // Sorts the cards from start_index to _node_buffer_size in *decreasing*
 245   // address order. Tests showed that this order is preferable to not sorting
 246   // or increasing address order.
 247   void sort_cards(size_t start_index) {
 248     QuickSort::sort(&_node_buffer[start_index],
 249                     _node_buffer_size - start_index,
 250                     compare_card,
 251                     false);
 252   }
 253 
 254   // Returns the index to the first clean card in the buffer.
 255   size_t clean_cards() {
 256     const size_t start = _node->index();
 257     assert(start <= _node_buffer_size, "invariant");
 258 
 259     // Two-fingered compaction algorithm similar to the filtering mechanism in
 260     // SATBMarkQueue. The main difference is that clean_card_before_refine()
 261     // could change the buffer element in-place.
 262     // We don't check for SuspendibleThreadSet::should_yield(), because
 263     // cleaning and redirtying the cards is fast.
 264     CardTable::CardValue** src = &_node_buffer[start];
 265     CardTable::CardValue** dst = &_node_buffer[_node_buffer_size];
 266     assert(src <= dst, "invariant");
 267     for ( ; src < dst; ++src) {
 268       // Search low to high for a card to keep.
 269       if (_g1rs->clean_card_before_refine(src)) {
 270         // Found keeper.  Search high to low for a card to discard.
 271         while (src < --dst) {
 272           if (!_g1rs->clean_card_before_refine(dst)) {
 273             *dst = *src;         // Replace discard with keeper.
 274             break;
 275           }
 276         }
 277         // If discard search failed (src == dst), the outer loop will also end.
 278       }
 279     }
 280 
 281     // dst points to the first retained clean card, or the end of the buffer
 282     // if all the cards were discarded.
 283     const size_t first_clean = dst - _node_buffer;
 284     assert(first_clean >= start && first_clean <= _node_buffer_size, "invariant");
 285     // Discarded cards are considered as refined.
 286     *_total_refined_cards += first_clean - start;
 287     return first_clean;
 288   }
 289 
 290   bool refine_cleaned_cards(size_t start_index) {
 291     bool result = true;
 292     size_t i = start_index;
 293     for ( ; i < _node_buffer_size; ++i) {
 294       if (SuspendibleThreadSet::should_yield()) {
 295         redirty_unrefined_cards(i);
 296         result = false;
 297         break;
 298       }
 299       _g1rs->refine_card_concurrently(_node_buffer[i], _worker_id);
 300     }
 301     _node->set_index(i);
 302     *_total_refined_cards += i - start_index;
 303     return result;
 304   }
 305 
 306   void redirty_unrefined_cards(size_t start) {
 307     for ( ; start < _node_buffer_size; ++start) {
 308       *_node_buffer[start] = G1CardTable::dirty_card_val();
 309     }
 310   }
 311 
 312 public:
 313   G1RefineBufferedCards(BufferNode* node,
 314                         size_t node_buffer_size,
 315                         uint worker_id,
 316                         size_t* total_refined_cards) :
 317     _node(node),
 318     _node_buffer(reinterpret_cast<CardTable::CardValue**>(BufferNode::make_buffer_from_node(node))),
 319     _node_buffer_size(node_buffer_size),
 320     _worker_id(worker_id),
 321     _total_refined_cards(total_refined_cards),
 322     _g1rs(G1CollectedHeap::heap()->rem_set()) {}
 323 
 324   bool refine() {
 325     size_t first_clean_index = clean_cards();
 326     if (first_clean_index == _node_buffer_size) {
 327       _node->set_index(first_clean_index);
 328       return true;
 329     }
 330     // This fence serves two purposes. First, the cards must be cleaned
 331     // before processing the contents. Second, we can't proceed with
 332     // processing a region until after the read of the region's top in
 333     // collect_and_clean_cards(), for synchronization with possibly concurrent
 334     // humongous object allocation (see comment at the StoreStore fence before
 335     // setting the regions' tops in humongous allocation path).
 336     // It's okay that reading region's top and reading region's type were racy
 337     // wrto each other. We need both set, in any order, to proceed.
 338     OrderAccess::fence();
 339     sort_cards(first_clean_index);
 340     return refine_cleaned_cards(first_clean_index);
 341   }
 342 };
 343 
 344 bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node,
 345                                         uint worker_id,
 346                                         size_t* total_refined_cards) {
 347   G1RefineBufferedCards buffered_cards(node,
 348                                        buffer_size(),
 349                                        worker_id,
 350                                        total_refined_cards);
 351   return buffered_cards.refine();
 352 }
 353 
 354 #ifndef ASSERT
 355 #define assert_fully_consumed(node, buffer_size)
 356 #else
 357 #define assert_fully_consumed(node, buffer_size)                \
 358   do {                                                          \
 359     size_t _afc_index = (node)->index();                        \
 360     size_t _afc_size = (buffer_size);                           \
 361     assert(_afc_index == _afc_size,                             \
 362            "Buffer was not fully consumed as claimed: index: "  \
 363            SIZE_FORMAT ", size: " SIZE_FORMAT,                  \
 364             _afc_index, _afc_size);                             \
 365   } while (0)
 366 #endif // ASSERT
 367 
 368 bool G1DirtyCardQueueSet::process_or_enqueue_completed_buffer(BufferNode* node) {
 369   if (Thread::current()->is_Java_thread()) {
 370     // If the number of buffers exceeds the limit, make this Java
 371     // thread do the processing itself.  We don't lock to access
 372     // buffer count or padding; it is fine to be imprecise here.  The
 373     // add of padding could overflow, which is treated as unlimited.
 374     size_t limit = max_cards() + max_cards_padding();
 375     if ((num_cards() > limit) && (limit >= max_cards())) {
 376       if (mut_process_buffer(node)) {
 377         return true;
 378       }
 379     }
 380   }
 381   enqueue_completed_buffer(node);
 382   return false;
 383 }
 384 
 385 bool G1DirtyCardQueueSet::mut_process_buffer(BufferNode* node) {
 386   uint worker_id = _free_ids.claim_par_id(); // temporarily claim an id
 387   uint counter_index = worker_id - par_ids_start();
 388   size_t* counter = &_mutator_refined_cards_counters[counter_index];
 389   bool result = refine_buffer(node, worker_id, counter);
 390   _free_ids.release_par_id(worker_id); // release the id
 391 
 392   if (result) {
 393     assert_fully_consumed(node, buffer_size());
 394   }
 395   return result;
 396 }
 397 
 398 bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id,
 399                                                                size_t stop_at,
 400                                                                size_t* total_refined_cards) {
 401   BufferNode* node = get_completed_buffer(stop_at);
 402   if (node == NULL) {
 403     return false;
 404   } else if (refine_buffer(node, worker_id, total_refined_cards)) {
 405     assert_fully_consumed(node, buffer_size());
 406     // Done with fully processed buffer.
 407     deallocate_buffer(node);
 408     return true;
 409   } else {
 410     // Return partially processed buffer to the queue.
 411     enqueue_completed_buffer(node);
 412     return true;
 413   }
 414 }
 415 
 416 void G1DirtyCardQueueSet::abandon_logs() {
 417   assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
 418   abandon_completed_buffers();
 419 
 420   // Since abandon is done only at safepoints, we can safely manipulate
 421   // these queues.
 422   struct AbandonThreadLogClosure : public ThreadClosure {
 423     virtual void do_thread(Thread* t) {
 424       G1ThreadLocalData::dirty_card_queue(t).reset();
 425     }
 426   } closure;
 427   Threads::threads_do(&closure);
 428 
 429   G1BarrierSet::shared_dirty_card_queue().reset();
 430 }
 431 
 432 void G1DirtyCardQueueSet::concatenate_logs() {
 433   // Iterate over all the threads, if we find a partial log add it to
 434   // the global list of logs.  Temporarily turn off the limit on the number
 435   // of outstanding buffers.
 436   assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
 437   size_t old_limit = max_cards();
 438   set_max_cards(MaxCardsUnlimited);
 439 
 440   struct ConcatenateThreadLogClosure : public ThreadClosure {
 441     virtual void do_thread(Thread* t) {
 442       G1DirtyCardQueue& dcq = G1ThreadLocalData::dirty_card_queue(t);
 443       if (!dcq.is_empty()) {
 444         dcq.flush();
 445       }
 446     }
 447   } closure;
 448   Threads::threads_do(&closure);
 449 
 450   G1BarrierSet::shared_dirty_card_queue().flush();
 451   set_max_cards(old_limit);
 452 }