1 /* 2 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "gc_implementation/g1/concurrentG1Refine.hpp" 27 #include "gc_implementation/g1/concurrentG1RefineThread.hpp" 28 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 29 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 30 #include "gc_implementation/g1/g1RemSet.hpp" 31 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 32 #include "memory/space.inline.hpp" 33 #include "runtime/atomic.hpp" 34 #include "utilities/copy.hpp" 35 36 // Possible sizes for the card counts cache: odd primes that roughly double in size. 37 // (See jvmtiTagMap.cpp). 38 int ConcurrentG1Refine::_cc_cache_sizes[] = { 39 16381, 32771, 76831, 150001, 307261, 40 614563, 1228891, 2457733, 4915219, 9830479, 41 19660831, 39321619, 78643219, 157286461, -1 42 }; 43 44 ConcurrentG1Refine::ConcurrentG1Refine() : 45 _card_counts(NULL), _card_epochs(NULL), 46 _n_card_counts(0), _max_n_card_counts(0), 47 _cache_size_index(0), _expand_card_counts(false), 48 _hot_cache(NULL), 49 _def_use_cache(false), _use_cache(false), 50 _n_periods(0), 51 _threads(NULL), _n_threads(0) 52 { 53 54 // Ergomonically select initial concurrent refinement parameters 55 if (FLAG_IS_DEFAULT(G1ConcRefinementGreenZone)) { 56 FLAG_SET_DEFAULT(G1ConcRefinementGreenZone, MAX2<int>(ParallelGCThreads, 1)); 57 } 58 set_green_zone(G1ConcRefinementGreenZone); 59 60 if (FLAG_IS_DEFAULT(G1ConcRefinementYellowZone)) { 61 FLAG_SET_DEFAULT(G1ConcRefinementYellowZone, green_zone() * 3); 62 } 63 set_yellow_zone(MAX2<int>(G1ConcRefinementYellowZone, green_zone())); 64 65 if (FLAG_IS_DEFAULT(G1ConcRefinementRedZone)) { 66 FLAG_SET_DEFAULT(G1ConcRefinementRedZone, yellow_zone() * 2); 67 } 68 set_red_zone(MAX2<int>(G1ConcRefinementRedZone, yellow_zone())); 69 _n_worker_threads = thread_num(); 70 // We need one extra thread to do the young gen rset size sampling. 71 _n_threads = _n_worker_threads + 1; 72 reset_threshold_step(); 73 74 _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads); 75 int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids(); 76 ConcurrentG1RefineThread *next = NULL; 77 for (int i = _n_threads - 1; i >= 0; i--) { 78 ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i); 79 assert(t != NULL, "Conc refine should have been created"); 80 assert(t->cg1r() == this, "Conc refine thread should refer to this"); 81 _threads[i] = t; 82 next = t; 83 } 84 } 85 86 void ConcurrentG1Refine::reset_threshold_step() { 87 if (FLAG_IS_DEFAULT(G1ConcRefinementThresholdStep)) { 88 _thread_threshold_step = (yellow_zone() - green_zone()) / (worker_thread_num() + 1); 89 } else { 90 _thread_threshold_step = G1ConcRefinementThresholdStep; 91 } 92 } 93 94 int ConcurrentG1Refine::thread_num() { 95 return MAX2<int>((G1ConcRefinementThreads > 0) ? G1ConcRefinementThreads : ParallelGCThreads, 1); 96 } 97 98 void ConcurrentG1Refine::init() { 99 if (G1ConcRSLogCacheSize > 0) { 100 _g1h = G1CollectedHeap::heap(); 101 _max_n_card_counts = 102 (unsigned) (_g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift); 103 104 size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1; 105 guarantee(_max_n_card_counts < max_card_num, "card_num representation"); 106 107 int desired = _max_n_card_counts / InitialCacheFraction; 108 for (_cache_size_index = 0; 109 _cc_cache_sizes[_cache_size_index] >= 0; _cache_size_index++) { 110 if (_cc_cache_sizes[_cache_size_index] >= desired) break; 111 } 112 _cache_size_index = MAX2(0, (_cache_size_index - 1)); 113 114 int initial_size = _cc_cache_sizes[_cache_size_index]; 115 if (initial_size < 0) initial_size = _max_n_card_counts; 116 117 // Make sure we don't go bigger than we will ever need 118 _n_card_counts = MIN2((unsigned) initial_size, _max_n_card_counts); 119 120 _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts); 121 _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts); 122 123 Copy::fill_to_bytes(&_card_counts[0], 124 _n_card_counts * sizeof(CardCountCacheEntry)); 125 Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry)); 126 127 ModRefBarrierSet* bs = _g1h->mr_bs(); 128 guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition"); 129 _ct_bs = (CardTableModRefBS*)bs; 130 _ct_bot = _ct_bs->byte_for_const(_g1h->reserved_region().start()); 131 132 _def_use_cache = true; 133 _use_cache = true; 134 _hot_cache_size = (1 << G1ConcRSLogCacheSize); 135 _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size); 136 _n_hot = 0; 137 _hot_cache_idx = 0; 138 139 // For refining the cards in the hot cache in parallel 140 int n_workers = (ParallelGCThreads > 0 ? 141 _g1h->workers()->total_workers() : 1); 142 _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers); 143 _hot_cache_par_claimed_idx = 0; 144 } 145 } 146 147 void ConcurrentG1Refine::stop() { 148 if (_threads != NULL) { 149 for (int i = 0; i < _n_threads; i++) { 150 _threads[i]->stop(); 151 } 152 } 153 } 154 155 void ConcurrentG1Refine::reinitialize_threads() { 156 reset_threshold_step(); 157 if (_threads != NULL) { 158 for (int i = 0; i < _n_threads; i++) { 159 _threads[i]->initialize(); 160 } 161 } 162 } 163 164 ConcurrentG1Refine::~ConcurrentG1Refine() { 165 if (G1ConcRSLogCacheSize > 0) { 166 assert(_card_counts != NULL, "Logic"); 167 FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts); 168 assert(_card_epochs != NULL, "Logic"); 169 FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs); 170 assert(_hot_cache != NULL, "Logic"); 171 FREE_C_HEAP_ARRAY(jbyte*, _hot_cache); 172 } 173 if (_threads != NULL) { 174 for (int i = 0; i < _n_threads; i++) { 175 delete _threads[i]; 176 } 177 FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads); 178 } 179 } 180 181 void ConcurrentG1Refine::threads_do(ThreadClosure *tc) { 182 if (_threads != NULL) { 183 for (int i = 0; i < _n_threads; i++) { 184 tc->do_thread(_threads[i]); 185 } 186 } 187 } 188 189 bool ConcurrentG1Refine::is_young_card(jbyte* card_ptr) { 190 HeapWord* start = _ct_bs->addr_for(card_ptr); 191 HeapRegion* r = _g1h->heap_region_containing(start); 192 if (r != NULL && r->is_young()) { 193 return true; 194 } 195 // This card is not associated with a heap region 196 // so can't be young. 197 return false; 198 } 199 200 jbyte* ConcurrentG1Refine::add_card_count(jbyte* card_ptr, int* count, bool* defer) { 201 unsigned new_card_num = ptr_2_card_num(card_ptr); 202 unsigned bucket = hash(new_card_num); 203 assert(0 <= bucket && bucket < _n_card_counts, "Bounds"); 204 205 CardCountCacheEntry* count_ptr = &_card_counts[bucket]; 206 CardEpochCacheEntry* epoch_ptr = &_card_epochs[bucket]; 207 208 // We have to construct a new entry if we haven't updated the counts 209 // during the current period, or if the count was updated for a 210 // different card number. 211 unsigned int new_epoch = (unsigned int) _n_periods; 212 julong new_epoch_entry = make_epoch_entry(new_card_num, new_epoch); 213 214 while (true) { 215 // Fetch the previous epoch value 216 julong prev_epoch_entry = epoch_ptr->_value; 217 julong cas_res; 218 219 if (extract_epoch(prev_epoch_entry) != new_epoch) { 220 // This entry has not yet been updated during this period. 221 // Note: we update the epoch value atomically to ensure 222 // that there is only one winner that updates the cached 223 // card_ptr value even though all the refine threads share 224 // the same epoch value. 225 226 cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry, 227 (volatile jlong*)&epoch_ptr->_value, 228 (jlong) prev_epoch_entry); 229 230 if (cas_res == prev_epoch_entry) { 231 // We have successfully won the race to update the 232 // epoch and card_num value. Make it look like the 233 // count and eviction count were previously cleared. 234 count_ptr->_count = 1; 235 count_ptr->_evict_count = 0; 236 *count = 0; 237 // We can defer the processing of card_ptr 238 *defer = true; 239 return card_ptr; 240 } 241 // We did not win the race to update the epoch field, so some other 242 // thread must have done it. The value that gets returned by CAS 243 // should be the new epoch value. 244 assert(extract_epoch(cas_res) == new_epoch, "unexpected epoch"); 245 // We could 'continue' here or just re-read the previous epoch value 246 prev_epoch_entry = epoch_ptr->_value; 247 } 248 249 // The epoch entry for card_ptr has been updated during this period. 250 unsigned old_card_num = extract_card_num(prev_epoch_entry); 251 252 // The card count that will be returned to caller 253 *count = count_ptr->_count; 254 255 // Are we updating the count for the same card? 256 if (new_card_num == old_card_num) { 257 // Same card - just update the count. We could have more than one 258 // thread racing to update count for the current card. It should be 259 // OK not to use a CAS as the only penalty should be some missed 260 // increments of the count which delays identifying the card as "hot". 261 262 if (*count < max_jubyte) count_ptr->_count++; 263 // We can defer the processing of card_ptr 264 *defer = true; 265 return card_ptr; 266 } 267 268 // Different card - evict old card info 269 if (count_ptr->_evict_count < max_jubyte) count_ptr->_evict_count++; 270 if (count_ptr->_evict_count > G1CardCountCacheExpandThreshold) { 271 // Trigger a resize the next time we clear 272 _expand_card_counts = true; 273 } 274 275 cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry, 276 (volatile jlong*)&epoch_ptr->_value, 277 (jlong) prev_epoch_entry); 278 279 if (cas_res == prev_epoch_entry) { 280 // We successfully updated the card num value in the epoch entry 281 count_ptr->_count = 0; // initialize counter for new card num 282 jbyte* old_card_ptr = card_num_2_ptr(old_card_num); 283 284 // Even though the region containg the card at old_card_num was not 285 // in the young list when old_card_num was recorded in the epoch 286 // cache it could have been added to the free list and subsequently 287 // added to the young list in the intervening time. See CR 6817995. 288 // We do not deal with this case here - it will be handled in 289 // HeapRegion::oops_on_card_seq_iterate_careful after it has been 290 // determined that the region containing the card has been allocated 291 // to, and it's safe to check the young type of the region. 292 293 // We do not want to defer processing of card_ptr in this case 294 // (we need to refine old_card_ptr and card_ptr) 295 *defer = false; 296 return old_card_ptr; 297 } 298 // Someone else beat us - try again. 299 } 300 } 301 302 jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) { 303 int count; 304 jbyte* cached_ptr = add_card_count(card_ptr, &count, defer); 305 assert(cached_ptr != NULL, "bad cached card ptr"); 306 307 // We've just inserted a card pointer into the card count cache 308 // and got back the card that we just inserted or (evicted) the 309 // previous contents of that count slot. 310 311 // The card we got back could be in a young region. When the 312 // returned card (if evicted) was originally inserted, we had 313 // determined that its containing region was not young. However 314 // it is possible for the region to be freed during a cleanup 315 // pause, then reallocated and tagged as young which will result 316 // in the returned card residing in a young region. 317 // 318 // We do not deal with this case here - the change from non-young 319 // to young could be observed at any time - it will be handled in 320 // HeapRegion::oops_on_card_seq_iterate_careful after it has been 321 // determined that the region containing the card has been allocated 322 // to. 323 324 // The card pointer we obtained from card count cache is not hot 325 // so do not store it in the cache; return it for immediate 326 // refining. 327 if (count < G1ConcRSHotCardLimit) { 328 return cached_ptr; 329 } 330 331 // Otherwise, the pointer we got from the _card_counts cache is hot. 332 jbyte* res = NULL; 333 MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag); 334 if (_n_hot == _hot_cache_size) { 335 res = _hot_cache[_hot_cache_idx]; 336 _n_hot--; 337 } 338 // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx. 339 _hot_cache[_hot_cache_idx] = cached_ptr; 340 _hot_cache_idx++; 341 if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0; 342 _n_hot++; 343 344 // The card obtained from the hot card cache could be in a young 345 // region. See above on how this can happen. 346 347 return res; 348 } 349 350 void ConcurrentG1Refine::clean_up_cache(int worker_i, 351 G1RemSet* g1rs, 352 DirtyCardQueue* into_cset_dcq) { 353 assert(!use_cache(), "cache should be disabled"); 354 int start_idx; 355 356 while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once 357 int end_idx = start_idx + _hot_cache_par_chunk_size; 358 359 if (start_idx == 360 Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) { 361 // The current worker has successfully claimed the chunk [start_idx..end_idx) 362 end_idx = MIN2(end_idx, _n_hot); 363 for (int i = start_idx; i < end_idx; i++) { 364 jbyte* entry = _hot_cache[i]; 365 if (entry != NULL) { 366 if (g1rs->concurrentRefineOneCard(entry, worker_i, true)) { 367 // 'entry' contains references that point into the current 368 // collection set. We need to record 'entry' in the DCQS 369 // that's used for that purpose. 370 // 371 // The only time we care about recording cards that contain 372 // references that point into the collection set is during 373 // RSet updating while within an evacuation pause. 374 // In this case worker_i should be the id of a GC worker thread 375 assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause"); 376 assert(worker_i < (int) DirtyCardQueueSet::num_par_ids(), "incorrect worker id"); 377 into_cset_dcq->enqueue(entry); 378 } 379 } 380 } 381 } 382 } 383 } 384 385 void ConcurrentG1Refine::expand_card_count_cache() { 386 if (_n_card_counts < _max_n_card_counts) { 387 int new_idx = _cache_size_index+1; 388 int new_size = _cc_cache_sizes[new_idx]; 389 if (new_size < 0) new_size = _max_n_card_counts; 390 391 // Make sure we don't go bigger than we will ever need 392 new_size = MIN2((unsigned) new_size, _max_n_card_counts); 393 394 // Expand the card count and card epoch tables 395 if (new_size > (int)_n_card_counts) { 396 // We can just free and allocate a new array as we're 397 // not interested in preserving the contents 398 assert(_card_counts != NULL, "Logic!"); 399 assert(_card_epochs != NULL, "Logic!"); 400 FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts); 401 FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs); 402 _n_card_counts = new_size; 403 _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts); 404 _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts); 405 _cache_size_index = new_idx; 406 } 407 } 408 } 409 410 void ConcurrentG1Refine::clear_and_record_card_counts() { 411 if (G1ConcRSLogCacheSize == 0) return; 412 413 #ifndef PRODUCT 414 double start = os::elapsedTime(); 415 #endif 416 417 if (_expand_card_counts) { 418 expand_card_count_cache(); 419 _expand_card_counts = false; 420 // Only need to clear the epochs. 421 Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry)); 422 } 423 424 int this_epoch = (int) _n_periods; 425 assert((this_epoch+1) <= max_jint, "to many periods"); 426 // Update epoch 427 _n_periods++; 428 429 #ifndef PRODUCT 430 double elapsed = os::elapsedTime() - start; 431 _g1h->g1_policy()->record_cc_clear_time(elapsed * 1000.0); 432 #endif 433 } 434 435 void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const { 436 for (int i = 0; i < _n_threads; ++i) { 437 _threads[i]->print_on(st); 438 st->cr(); 439 } 440 }