1 /*
   2  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "gc_implementation/g1/concurrentG1Refine.hpp"
  27 #include "gc_implementation/g1/concurrentG1RefineThread.hpp"
  28 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  30 #include "gc_implementation/g1/g1RemSet.hpp"
  31 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  32 #include "memory/space.inline.hpp"
  33 #include "runtime/atomic.hpp"
  34 #include "utilities/copy.hpp"
  35 
  36 // Possible sizes for the card counts cache: odd primes that roughly double in size.
  37 // (See jvmtiTagMap.cpp).
  38 int ConcurrentG1Refine::_cc_cache_sizes[] = {
  39         16381,    32771,    76831,    150001,   307261,
  40        614563,  1228891,  2457733,   4915219,  9830479,
  41      19660831, 39321619, 78643219, 157286461,       -1
  42   };
  43 
  44 ConcurrentG1Refine::ConcurrentG1Refine() :
  45   _card_counts(NULL), _card_epochs(NULL),
  46   _n_card_counts(0), _max_n_card_counts(0),
  47   _cache_size_index(0), _expand_card_counts(false),
  48   _hot_cache(NULL),
  49   _def_use_cache(false), _use_cache(false),
  50   _n_periods(0),
  51   _threads(NULL), _n_threads(0)
  52 {
  53 
  54   // Ergomonically select initial concurrent refinement parameters
  55   if (FLAG_IS_DEFAULT(G1ConcRefinementGreenZone)) {
  56     FLAG_SET_DEFAULT(G1ConcRefinementGreenZone, MAX2<int>(ParallelGCThreads, 1));
  57   }
  58   set_green_zone(G1ConcRefinementGreenZone);
  59 
  60   if (FLAG_IS_DEFAULT(G1ConcRefinementYellowZone)) {
  61     FLAG_SET_DEFAULT(G1ConcRefinementYellowZone, green_zone() * 3);
  62   }
  63   set_yellow_zone(MAX2<int>(G1ConcRefinementYellowZone, green_zone()));
  64 
  65   if (FLAG_IS_DEFAULT(G1ConcRefinementRedZone)) {
  66     FLAG_SET_DEFAULT(G1ConcRefinementRedZone, yellow_zone() * 2);
  67   }
  68   set_red_zone(MAX2<int>(G1ConcRefinementRedZone, yellow_zone()));
  69   _n_worker_threads = thread_num();
  70   // We need one extra thread to do the young gen rset size sampling.
  71   _n_threads = _n_worker_threads + 1;
  72   reset_threshold_step();
  73 
  74   _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads);
  75   int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids();
  76   ConcurrentG1RefineThread *next = NULL;
  77   for (int i = _n_threads - 1; i >= 0; i--) {
  78     ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i);
  79     assert(t != NULL, "Conc refine should have been created");
  80     assert(t->cg1r() == this, "Conc refine thread should refer to this");
  81     _threads[i] = t;
  82     next = t;
  83   }
  84 }
  85 
  86 void ConcurrentG1Refine::reset_threshold_step() {
  87   if (FLAG_IS_DEFAULT(G1ConcRefinementThresholdStep)) {
  88     _thread_threshold_step = (yellow_zone() - green_zone()) / (worker_thread_num() + 1);
  89   } else {
  90     _thread_threshold_step = G1ConcRefinementThresholdStep;
  91   }
  92 }
  93 
  94 int ConcurrentG1Refine::thread_num() {
  95   return MAX2<int>((G1ConcRefinementThreads > 0) ? G1ConcRefinementThreads : ParallelGCThreads, 1);
  96 }
  97 
  98 void ConcurrentG1Refine::init() {
  99   if (G1ConcRSLogCacheSize > 0) {
 100     _g1h = G1CollectedHeap::heap();
 101     _max_n_card_counts =
 102       (unsigned) (_g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift);
 103 
 104     size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1;
 105     guarantee(_max_n_card_counts < max_card_num, "card_num representation");
 106 
 107     int desired = _max_n_card_counts / InitialCacheFraction;
 108     for (_cache_size_index = 0;
 109               _cc_cache_sizes[_cache_size_index] >= 0; _cache_size_index++) {
 110       if (_cc_cache_sizes[_cache_size_index] >= desired) break;
 111     }
 112     _cache_size_index = MAX2(0, (_cache_size_index - 1));
 113 
 114     int initial_size = _cc_cache_sizes[_cache_size_index];
 115     if (initial_size < 0) initial_size = _max_n_card_counts;
 116 
 117     // Make sure we don't go bigger than we will ever need
 118     _n_card_counts = MIN2((unsigned) initial_size, _max_n_card_counts);
 119 
 120     _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts);
 121     _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts);
 122 
 123     Copy::fill_to_bytes(&_card_counts[0],
 124                         _n_card_counts * sizeof(CardCountCacheEntry));
 125     Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
 126 
 127     ModRefBarrierSet* bs = _g1h->mr_bs();
 128     guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
 129     _ct_bs = (CardTableModRefBS*)bs;
 130     _ct_bot = _ct_bs->byte_for_const(_g1h->reserved_region().start());
 131 
 132     _def_use_cache = true;
 133     _use_cache = true;
 134     _hot_cache_size = (1 << G1ConcRSLogCacheSize);
 135     _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size);
 136     _n_hot = 0;
 137     _hot_cache_idx = 0;
 138 
 139     // For refining the cards in the hot cache in parallel
 140     int n_workers = (ParallelGCThreads > 0 ?
 141                         _g1h->workers()->total_workers() : 1);
 142     _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers);
 143     _hot_cache_par_claimed_idx = 0;
 144   }
 145 }
 146 
 147 void ConcurrentG1Refine::stop() {
 148   if (_threads != NULL) {
 149     for (int i = 0; i < _n_threads; i++) {
 150       _threads[i]->stop();
 151     }
 152   }
 153 }
 154 
 155 void ConcurrentG1Refine::reinitialize_threads() {
 156   reset_threshold_step();
 157   if (_threads != NULL) {
 158     for (int i = 0; i < _n_threads; i++) {
 159       _threads[i]->initialize();
 160     }
 161   }
 162 }
 163 
 164 ConcurrentG1Refine::~ConcurrentG1Refine() {
 165   if (G1ConcRSLogCacheSize > 0) {
 166     assert(_card_counts != NULL, "Logic");
 167     FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts);
 168     assert(_card_epochs != NULL, "Logic");
 169     FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs);
 170     assert(_hot_cache != NULL, "Logic");
 171     FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
 172   }
 173   if (_threads != NULL) {
 174     for (int i = 0; i < _n_threads; i++) {
 175       delete _threads[i];
 176     }
 177     FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads);
 178   }
 179 }
 180 
 181 void ConcurrentG1Refine::threads_do(ThreadClosure *tc) {
 182   if (_threads != NULL) {
 183     for (int i = 0; i < _n_threads; i++) {
 184       tc->do_thread(_threads[i]);
 185     }
 186   }
 187 }
 188 
 189 bool ConcurrentG1Refine::is_young_card(jbyte* card_ptr) {
 190   HeapWord* start = _ct_bs->addr_for(card_ptr);
 191   HeapRegion* r = _g1h->heap_region_containing(start);
 192   if (r != NULL && r->is_young()) {
 193     return true;
 194   }
 195   // This card is not associated with a heap region
 196   // so can't be young.
 197   return false;
 198 }
 199 
 200 jbyte* ConcurrentG1Refine::add_card_count(jbyte* card_ptr, int* count, bool* defer) {
 201   unsigned new_card_num = ptr_2_card_num(card_ptr);
 202   unsigned bucket = hash(new_card_num);
 203   assert(0 <= bucket && bucket < _n_card_counts, "Bounds");
 204 
 205   CardCountCacheEntry* count_ptr = &_card_counts[bucket];
 206   CardEpochCacheEntry* epoch_ptr = &_card_epochs[bucket];
 207 
 208   // We have to construct a new entry if we haven't updated the counts
 209   // during the current period, or if the count was updated for a
 210   // different card number.
 211   unsigned int new_epoch = (unsigned int) _n_periods;
 212   julong new_epoch_entry = make_epoch_entry(new_card_num, new_epoch);
 213 
 214   while (true) {
 215     // Fetch the previous epoch value
 216     julong prev_epoch_entry = epoch_ptr->_value;
 217     julong cas_res;
 218 
 219     if (extract_epoch(prev_epoch_entry) != new_epoch) {
 220       // This entry has not yet been updated during this period.
 221       // Note: we update the epoch value atomically to ensure
 222       // that there is only one winner that updates the cached
 223       // card_ptr value even though all the refine threads share
 224       // the same epoch value.
 225 
 226       cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,
 227                                          (volatile jlong*)&epoch_ptr->_value,
 228                                          (jlong) prev_epoch_entry);
 229 
 230       if (cas_res == prev_epoch_entry) {
 231         // We have successfully won the race to update the
 232         // epoch and card_num value. Make it look like the
 233         // count and eviction count were previously cleared.
 234         count_ptr->_count = 1;
 235         count_ptr->_evict_count = 0;
 236         *count = 0;
 237         // We can defer the processing of card_ptr
 238         *defer = true;
 239         return card_ptr;
 240       }
 241       // We did not win the race to update the epoch field, so some other
 242       // thread must have done it. The value that gets returned by CAS
 243       // should be the new epoch value.
 244       assert(extract_epoch(cas_res) == new_epoch, "unexpected epoch");
 245       // We could 'continue' here or just re-read the previous epoch value
 246       prev_epoch_entry = epoch_ptr->_value;
 247     }
 248 
 249     // The epoch entry for card_ptr has been updated during this period.
 250     unsigned old_card_num = extract_card_num(prev_epoch_entry);
 251 
 252     // The card count that will be returned to caller
 253     *count = count_ptr->_count;
 254 
 255     // Are we updating the count for the same card?
 256     if (new_card_num == old_card_num) {
 257       // Same card - just update the count. We could have more than one
 258       // thread racing to update count for the current card. It should be
 259       // OK not to use a CAS as the only penalty should be some missed
 260       // increments of the count which delays identifying the card as "hot".
 261 
 262       if (*count < max_jubyte) count_ptr->_count++;
 263       // We can defer the processing of card_ptr
 264       *defer = true;
 265       return card_ptr;
 266     }
 267 
 268     // Different card - evict old card info
 269     if (count_ptr->_evict_count < max_jubyte) count_ptr->_evict_count++;
 270     if (count_ptr->_evict_count > G1CardCountCacheExpandThreshold) {
 271       // Trigger a resize the next time we clear
 272       _expand_card_counts = true;
 273     }
 274 
 275     cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,
 276                                        (volatile jlong*)&epoch_ptr->_value,
 277                                        (jlong) prev_epoch_entry);
 278 
 279     if (cas_res == prev_epoch_entry) {
 280       // We successfully updated the card num value in the epoch entry
 281       count_ptr->_count = 0; // initialize counter for new card num
 282       jbyte* old_card_ptr = card_num_2_ptr(old_card_num);
 283 
 284       // Even though the region containg the card at old_card_num was not
 285       // in the young list when old_card_num was recorded in the epoch
 286       // cache it could have been added to the free list and subsequently
 287       // added to the young list in the intervening time. See CR 6817995.
 288       // We do not deal with this case here - it will be handled in
 289       // HeapRegion::oops_on_card_seq_iterate_careful after it has been
 290       // determined that the region containing the card has been allocated
 291       // to, and it's safe to check the young type of the region.
 292 
 293       // We do not want to defer processing of card_ptr in this case
 294       // (we need to refine old_card_ptr and card_ptr)
 295       *defer = false;
 296       return old_card_ptr;
 297     }
 298     // Someone else beat us - try again.
 299   }
 300 }
 301 
 302 jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) {
 303   int count;
 304   jbyte* cached_ptr = add_card_count(card_ptr, &count, defer);
 305   assert(cached_ptr != NULL, "bad cached card ptr");
 306 
 307   // We've just inserted a card pointer into the card count cache
 308   // and got back the card that we just inserted or (evicted) the
 309   // previous contents of that count slot.
 310 
 311   // The card we got back could be in a young region. When the
 312   // returned card (if evicted) was originally inserted, we had
 313   // determined that its containing region was not young. However
 314   // it is possible for the region to be freed during a cleanup
 315   // pause, then reallocated and tagged as young which will result
 316   // in the returned card residing in a young region.
 317   //
 318   // We do not deal with this case here - the change from non-young
 319   // to young could be observed at any time - it will be handled in
 320   // HeapRegion::oops_on_card_seq_iterate_careful after it has been
 321   // determined that the region containing the card has been allocated
 322   // to.
 323 
 324   // The card pointer we obtained from card count cache is not hot
 325   // so do not store it in the cache; return it for immediate
 326   // refining.
 327   if (count < G1ConcRSHotCardLimit) {
 328     return cached_ptr;
 329   }
 330 
 331   // Otherwise, the pointer we got from the _card_counts cache is hot.
 332   jbyte* res = NULL;
 333   MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
 334   if (_n_hot == _hot_cache_size) {
 335     res = _hot_cache[_hot_cache_idx];
 336     _n_hot--;
 337   }
 338   // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
 339   _hot_cache[_hot_cache_idx] = cached_ptr;
 340   _hot_cache_idx++;
 341   if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0;
 342   _n_hot++;
 343 
 344   // The card obtained from the hot card cache could be in a young
 345   // region. See above on how this can happen.
 346 
 347   return res;
 348 }
 349 
 350 void ConcurrentG1Refine::clean_up_cache(int worker_i,
 351                                         G1RemSet* g1rs,
 352                                         DirtyCardQueue* into_cset_dcq) {
 353   assert(!use_cache(), "cache should be disabled");
 354   int start_idx;
 355 
 356   while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once
 357     int end_idx = start_idx + _hot_cache_par_chunk_size;
 358 
 359     if (start_idx ==
 360         Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) {
 361       // The current worker has successfully claimed the chunk [start_idx..end_idx)
 362       end_idx = MIN2(end_idx, _n_hot);
 363       for (int i = start_idx; i < end_idx; i++) {
 364         jbyte* entry = _hot_cache[i];
 365         if (entry != NULL) {
 366           if (g1rs->concurrentRefineOneCard(entry, worker_i, true)) {
 367             // 'entry' contains references that point into the current
 368             // collection set. We need to record 'entry' in the DCQS
 369             // that's used for that purpose.
 370             //
 371             // The only time we care about recording cards that contain
 372             // references that point into the collection set is during
 373             // RSet updating while within an evacuation pause.
 374             // In this case worker_i should be the id of a GC worker thread
 375             assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");
 376             assert(worker_i < (int) DirtyCardQueueSet::num_par_ids(), "incorrect worker id");
 377             into_cset_dcq->enqueue(entry);
 378           }
 379         }
 380       }
 381     }
 382   }
 383 }
 384 
 385 void ConcurrentG1Refine::expand_card_count_cache() {
 386   if (_n_card_counts < _max_n_card_counts) {
 387     int new_idx = _cache_size_index+1;
 388     int new_size = _cc_cache_sizes[new_idx];
 389     if (new_size < 0) new_size = _max_n_card_counts;
 390 
 391     // Make sure we don't go bigger than we will ever need
 392     new_size = MIN2((unsigned) new_size, _max_n_card_counts);
 393 
 394     // Expand the card count and card epoch tables
 395     if (new_size > (int)_n_card_counts) {
 396       // We can just free and allocate a new array as we're
 397       // not interested in preserving the contents
 398       assert(_card_counts != NULL, "Logic!");
 399       assert(_card_epochs != NULL, "Logic!");
 400       FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts);
 401       FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs);
 402       _n_card_counts = new_size;
 403       _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts);
 404       _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts);
 405       _cache_size_index = new_idx;
 406     }
 407   }
 408 }
 409 
 410 void ConcurrentG1Refine::clear_and_record_card_counts() {
 411   if (G1ConcRSLogCacheSize == 0) return;
 412 
 413 #ifndef PRODUCT
 414   double start = os::elapsedTime();
 415 #endif
 416 
 417   if (_expand_card_counts) {
 418     expand_card_count_cache();
 419     _expand_card_counts = false;
 420     // Only need to clear the epochs.
 421     Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
 422   }
 423 
 424   int this_epoch = (int) _n_periods;
 425   assert((this_epoch+1) <= max_jint, "to many periods");
 426   // Update epoch
 427   _n_periods++;
 428 
 429 #ifndef PRODUCT
 430   double elapsed = os::elapsedTime() - start;
 431   _g1h->g1_policy()->record_cc_clear_time(elapsed * 1000.0);
 432 #endif
 433 }
 434 
 435 void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const {
 436   for (int i = 0; i < _n_threads; ++i) {
 437     _threads[i]->print_on(st);
 438     st->cr();
 439   }
 440 }