1 #ifdef USE_PRAGMA_IDENT_SRC 2 #pragma ident "@(#)concurrentMarkSweepGeneration.cpp 1.293 08/10/30 20:45:16 JVM" 3 #endif 4 /* 5 * Copyright 2001-2008 Sun Microsystems, Inc. All Rights Reserved. 6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 7 * 8 * This code is free software; you can redistribute it and/or modify it 9 * under the terms of the GNU General Public License version 2 only, as 10 * published by the Free Software Foundation. 11 * 12 * This code is distributed in the hope that it will be useful, but WITHOUT 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 * version 2 for more details (a copy is included in the LICENSE file that 16 * accompanied this code). 17 * 18 * You should have received a copy of the GNU General Public License version 19 * 2 along with this work; if not, write to the Free Software Foundation, 20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 21 * 22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 23 * CA 95054 USA or visit www.sun.com if you need additional information or 24 * have any questions. 25 * 26 */ 27 28 # include "incls/_precompiled.incl" 29 # include "incls/_concurrentMarkSweepGeneration.cpp.incl" 30 31 // statics 32 CMSCollector* ConcurrentMarkSweepGeneration::_collector = NULL; 33 bool CMSCollector::_full_gc_requested = false; 34 35 ////////////////////////////////////////////////////////////////// 36 // In support of CMS/VM thread synchronization 37 ////////////////////////////////////////////////////////////////// 38 // We split use of the CGC_lock into 2 "levels". 39 // The low-level locking is of the usual CGC_lock monitor. We introduce 40 // a higher level "token" (hereafter "CMS token") built on top of the 41 // low level monitor (hereafter "CGC lock"). 42 // The token-passing protocol gives priority to the VM thread. The 43 // CMS-lock doesn't provide any fairness guarantees, but clients 44 // should ensure that it is only held for very short, bounded 45 // durations. 46 // 47 // When either of the CMS thread or the VM thread is involved in 48 // collection operations during which it does not want the other 49 // thread to interfere, it obtains the CMS token. 50 // 51 // If either thread tries to get the token while the other has 52 // it, that thread waits. However, if the VM thread and CMS thread 53 // both want the token, then the VM thread gets priority while the 54 // CMS thread waits. This ensures, for instance, that the "concurrent" 55 // phases of the CMS thread's work do not block out the VM thread 56 // for long periods of time as the CMS thread continues to hog 57 // the token. (See bug 4616232). 58 // 59 // The baton-passing functions are, however, controlled by the 60 // flags _foregroundGCShouldWait and _foregroundGCIsActive, 61 // and here the low-level CMS lock, not the high level token, 62 // ensures mutual exclusion. 63 // 64 // Two important conditions that we have to satisfy: 65 // 1. if a thread does a low-level wait on the CMS lock, then it 66 // relinquishes the CMS token if it were holding that token 67 // when it acquired the low-level CMS lock. 68 // 2. any low-level notifications on the low-level lock 69 // should only be sent when a thread has relinquished the token. 70 // 71 // In the absence of either property, we'd have potential deadlock. 72 // 73 // We protect each of the CMS (concurrent and sequential) phases 74 // with the CMS _token_, not the CMS _lock_. 75 // 76 // The only code protected by CMS lock is the token acquisition code 77 // itself, see ConcurrentMarkSweepThread::[de]synchronize(), and the 78 // baton-passing code. 79 // 80 // Unfortunately, i couldn't come up with a good abstraction to factor and 81 // hide the naked CGC_lock manipulation in the baton-passing code 82 // further below. That's something we should try to do. Also, the proof 83 // of correctness of this 2-level locking scheme is far from obvious, 84 // and potentially quite slippery. We have an uneasy supsicion, for instance, 85 // that there may be a theoretical possibility of delay/starvation in the 86 // low-level lock/wait/notify scheme used for the baton-passing because of 87 // potential intereference with the priority scheme embodied in the 88 // CMS-token-passing protocol. See related comments at a CGC_lock->wait() 89 // invocation further below and marked with "XXX 20011219YSR". 90 // Indeed, as we note elsewhere, this may become yet more slippery 91 // in the presence of multiple CMS and/or multiple VM threads. XXX 92 93 class CMSTokenSync: public StackObj { 94 private: 95 bool _is_cms_thread; 96 public: 97 CMSTokenSync(bool is_cms_thread): 98 _is_cms_thread(is_cms_thread) { 99 assert(is_cms_thread == Thread::current()->is_ConcurrentGC_thread(), 100 "Incorrect argument to constructor"); 101 ConcurrentMarkSweepThread::synchronize(_is_cms_thread); 102 } 103 104 ~CMSTokenSync() { 105 assert(_is_cms_thread ? 106 ConcurrentMarkSweepThread::cms_thread_has_cms_token() : 107 ConcurrentMarkSweepThread::vm_thread_has_cms_token(), 108 "Incorrect state"); 109 ConcurrentMarkSweepThread::desynchronize(_is_cms_thread); 110 } 111 }; 112 113 // Convenience class that does a CMSTokenSync, and then acquires 114 // upto three locks. 115 class CMSTokenSyncWithLocks: public CMSTokenSync { 116 private: 117 // Note: locks are acquired in textual declaration order 118 // and released in the opposite order 119 MutexLockerEx _locker1, _locker2, _locker3; 120 public: 121 CMSTokenSyncWithLocks(bool is_cms_thread, Mutex* mutex1, 122 Mutex* mutex2 = NULL, Mutex* mutex3 = NULL): 123 CMSTokenSync(is_cms_thread), 124 _locker1(mutex1, Mutex::_no_safepoint_check_flag), 125 _locker2(mutex2, Mutex::_no_safepoint_check_flag), 126 _locker3(mutex3, Mutex::_no_safepoint_check_flag) 127 { } 128 }; 129 130 131 // Wrapper class to temporarily disable icms during a foreground cms collection. 132 class ICMSDisabler: public StackObj { 133 public: 134 // The ctor disables icms and wakes up the thread so it notices the change; 135 // the dtor re-enables icms. Note that the CMSCollector methods will check 136 // CMSIncrementalMode. 137 ICMSDisabler() { CMSCollector::disable_icms(); CMSCollector::start_icms(); } 138 ~ICMSDisabler() { CMSCollector::enable_icms(); } 139 }; 140 141 ////////////////////////////////////////////////////////////////// 142 // Concurrent Mark-Sweep Generation ///////////////////////////// 143 ////////////////////////////////////////////////////////////////// 144 145 NOT_PRODUCT(CompactibleFreeListSpace* debug_cms_space;) 146 147 // This struct contains per-thread things necessary to support parallel 148 // young-gen collection. 149 class CMSParGCThreadState: public CHeapObj { 150 public: 151 CFLS_LAB lab; 152 PromotionInfo promo; 153 154 // Constructor. 155 CMSParGCThreadState(CompactibleFreeListSpace* cfls) : lab(cfls) { 156 promo.setSpace(cfls); 157 } 158 }; 159 160 ConcurrentMarkSweepGeneration::ConcurrentMarkSweepGeneration( 161 ReservedSpace rs, size_t initial_byte_size, int level, 162 CardTableRS* ct, bool use_adaptive_freelists, 163 FreeBlockDictionary::DictionaryChoice dictionaryChoice) : 164 CardGeneration(rs, initial_byte_size, level, ct), 165 _dilatation_factor(((double)MinChunkSize)/((double)(oopDesc::header_size()))), 166 _debug_collection_type(Concurrent_collection_type) 167 { 168 HeapWord* bottom = (HeapWord*) _virtual_space.low(); 169 HeapWord* end = (HeapWord*) _virtual_space.high(); 170 171 _direct_allocated_words = 0; 172 NOT_PRODUCT( 173 _numObjectsPromoted = 0; 174 _numWordsPromoted = 0; 175 _numObjectsAllocated = 0; 176 _numWordsAllocated = 0; 177 ) 178 179 _cmsSpace = new CompactibleFreeListSpace(_bts, MemRegion(bottom, end), 180 use_adaptive_freelists, 181 dictionaryChoice); 182 NOT_PRODUCT(debug_cms_space = _cmsSpace;) 183 if (_cmsSpace == NULL) { 184 vm_exit_during_initialization( 185 "CompactibleFreeListSpace allocation failure"); 186 } 187 _cmsSpace->_gen = this; 188 189 _gc_stats = new CMSGCStats(); 190 191 // Verify the assumption that FreeChunk::_prev and OopDesc::_klass 192 // offsets match. The ability to tell free chunks from objects 193 // depends on this property. 194 debug_only( 195 FreeChunk* junk = NULL; 196 assert(junk->prev_addr() == (void*)(oop(junk)->klass_addr()), 197 "Offset of FreeChunk::_prev within FreeChunk must match" 198 " that of OopDesc::_klass within OopDesc"); 199 ) 200 if (ParallelGCThreads > 0) { 201 typedef CMSParGCThreadState* CMSParGCThreadStatePtr; 202 _par_gc_thread_states = 203 NEW_C_HEAP_ARRAY(CMSParGCThreadStatePtr, ParallelGCThreads); 204 if (_par_gc_thread_states == NULL) { 205 vm_exit_during_initialization("Could not allocate par gc structs"); 206 } 207 for (uint i = 0; i < ParallelGCThreads; i++) { 208 _par_gc_thread_states[i] = new CMSParGCThreadState(cmsSpace()); 209 if (_par_gc_thread_states[i] == NULL) { 210 vm_exit_during_initialization("Could not allocate par gc structs"); 211 } 212 } 213 } else { 214 _par_gc_thread_states = NULL; 215 } 216 _incremental_collection_failed = false; 217 // The "dilatation_factor" is the expansion that can occur on 218 // account of the fact that the minimum object size in the CMS 219 // generation may be larger than that in, say, a contiguous young 220 // generation. 221 // Ideally, in the calculation below, we'd compute the dilatation 222 // factor as: MinChunkSize/(promoting_gen's min object size) 223 // Since we do not have such a general query interface for the 224 // promoting generation, we'll instead just use the mimimum 225 // object size (which today is a header's worth of space); 226 // note that all arithmetic is in units of HeapWords. 227 assert(MinChunkSize >= oopDesc::header_size(), "just checking"); 228 assert(_dilatation_factor >= 1.0, "from previous assert"); 229 } 230 231 void ConcurrentMarkSweepGeneration::ref_processor_init() { 232 assert(collector() != NULL, "no collector"); 233 collector()->ref_processor_init(); 234 } 235 236 void CMSCollector::ref_processor_init() { 237 if (_ref_processor == NULL) { 238 // Allocate and initialize a reference processor 239 _ref_processor = ReferenceProcessor::create_ref_processor( 240 _span, // span 241 _cmsGen->refs_discovery_is_atomic(), // atomic_discovery 242 _cmsGen->refs_discovery_is_mt(), // mt_discovery 243 &_is_alive_closure, 244 ParallelGCThreads, 245 ParallelRefProcEnabled); 246 // Initialize the _ref_processor field of CMSGen 247 _cmsGen->set_ref_processor(_ref_processor); 248 249 // Allocate a dummy ref processor for perm gen. 250 ReferenceProcessor* rp2 = new ReferenceProcessor(); 251 if (rp2 == NULL) { 252 vm_exit_during_initialization("Could not allocate ReferenceProcessor object"); 253 } 254 _permGen->set_ref_processor(rp2); 255 } 256 } 257 258 CMSAdaptiveSizePolicy* CMSCollector::size_policy() { 259 GenCollectedHeap* gch = GenCollectedHeap::heap(); 260 assert(gch->kind() == CollectedHeap::GenCollectedHeap, 261 "Wrong type of heap"); 262 CMSAdaptiveSizePolicy* sp = (CMSAdaptiveSizePolicy*) 263 gch->gen_policy()->size_policy(); 264 assert(sp->is_gc_cms_adaptive_size_policy(), 265 "Wrong type of size policy"); 266 return sp; 267 } 268 269 CMSGCAdaptivePolicyCounters* CMSCollector::gc_adaptive_policy_counters() { 270 CMSGCAdaptivePolicyCounters* results = 271 (CMSGCAdaptivePolicyCounters*) collector_policy()->counters(); 272 assert( 273 results->kind() == GCPolicyCounters::CMSGCAdaptivePolicyCountersKind, 274 "Wrong gc policy counter kind"); 275 return results; 276 } 277 278 279 void ConcurrentMarkSweepGeneration::initialize_performance_counters() { 280 281 const char* gen_name = "old"; 282 283 // Generation Counters - generation 1, 1 subspace 284 _gen_counters = new GenerationCounters(gen_name, 1, 1, &_virtual_space); 285 286 _space_counters = new GSpaceCounters(gen_name, 0, 287 _virtual_space.reserved_size(), 288 this, _gen_counters); 289 } 290 291 CMSStats::CMSStats(ConcurrentMarkSweepGeneration* cms_gen, unsigned int alpha): 292 _cms_gen(cms_gen) 293 { 294 assert(alpha <= 100, "bad value"); 295 _saved_alpha = alpha; 296 297 // Initialize the alphas to the bootstrap value of 100. 298 _gc0_alpha = _cms_alpha = 100; 299 300 _cms_begin_time.update(); 301 _cms_end_time.update(); 302 303 _gc0_duration = 0.0; 304 _gc0_period = 0.0; 305 _gc0_promoted = 0; 306 307 _cms_duration = 0.0; 308 _cms_period = 0.0; 309 _cms_allocated = 0; 310 311 _cms_used_at_gc0_begin = 0; 312 _cms_used_at_gc0_end = 0; 313 _allow_duty_cycle_reduction = false; 314 _valid_bits = 0; 315 _icms_duty_cycle = CMSIncrementalDutyCycle; 316 } 317 318 // If promotion failure handling is on use 319 // the padded average size of the promotion for each 320 // young generation collection. 321 double CMSStats::time_until_cms_gen_full() const { 322 size_t cms_free = _cms_gen->cmsSpace()->free(); 323 GenCollectedHeap* gch = GenCollectedHeap::heap(); 324 size_t expected_promotion = gch->get_gen(0)->capacity(); 325 if (HandlePromotionFailure) { 326 expected_promotion = MIN2( 327 (size_t) _cms_gen->gc_stats()->avg_promoted()->padded_average(), 328 expected_promotion); 329 } 330 if (cms_free > expected_promotion) { 331 // Start a cms collection if there isn't enough space to promote 332 // for the next minor collection. Use the padded average as 333 // a safety factor. 334 cms_free -= expected_promotion; 335 336 // Adjust by the safety factor. 337 double cms_free_dbl = (double)cms_free; 338 cms_free_dbl = cms_free_dbl * (100.0 - CMSIncrementalSafetyFactor) / 100.0; 339 340 if (PrintGCDetails && Verbose) { 341 gclog_or_tty->print_cr("CMSStats::time_until_cms_gen_full: cms_free " 342 SIZE_FORMAT " expected_promotion " SIZE_FORMAT, 343 cms_free, expected_promotion); 344 gclog_or_tty->print_cr(" cms_free_dbl %f cms_consumption_rate %f", 345 cms_free_dbl, cms_consumption_rate() + 1.0); 346 } 347 // Add 1 in case the consumption rate goes to zero. 348 return cms_free_dbl / (cms_consumption_rate() + 1.0); 349 } 350 return 0.0; 351 } 352 353 // Compare the duration of the cms collection to the 354 // time remaining before the cms generation is empty. 355 // Note that the time from the start of the cms collection 356 // to the start of the cms sweep (less than the total 357 // duration of the cms collection) can be used. This 358 // has been tried and some applications experienced 359 // promotion failures early in execution. This was 360 // possibly because the averages were not accurate 361 // enough at the beginning. 362 double CMSStats::time_until_cms_start() const { 363 // We add "gc0_period" to the "work" calculation 364 // below because this query is done (mostly) at the 365 // end of a scavenge, so we need to conservatively 366 // account for that much possible delay 367 // in the query so as to avoid concurrent mode failures 368 // due to starting the collection just a wee bit too 369 // late. 370 double work = cms_duration() + gc0_period(); 371 double deadline = time_until_cms_gen_full(); 372 if (work > deadline) { 373 if (Verbose && PrintGCDetails) { 374 gclog_or_tty->print( 375 " CMSCollector: collect because of anticipated promotion " 376 "before full %3.7f + %3.7f > %3.7f ", cms_duration(), 377 gc0_period(), time_until_cms_gen_full()); 378 } 379 return 0.0; 380 } 381 return work - deadline; 382 } 383 384 // Return a duty cycle based on old_duty_cycle and new_duty_cycle, limiting the 385 // amount of change to prevent wild oscillation. 386 unsigned int CMSStats::icms_damped_duty_cycle(unsigned int old_duty_cycle, 387 unsigned int new_duty_cycle) { 388 assert(old_duty_cycle <= 100, "bad input value"); 389 assert(new_duty_cycle <= 100, "bad input value"); 390 391 // Note: use subtraction with caution since it may underflow (values are 392 // unsigned). Addition is safe since we're in the range 0-100. 393 unsigned int damped_duty_cycle = new_duty_cycle; 394 if (new_duty_cycle < old_duty_cycle) { 395 const unsigned int largest_delta = MAX2(old_duty_cycle / 4, 5U); 396 if (new_duty_cycle + largest_delta < old_duty_cycle) { 397 damped_duty_cycle = old_duty_cycle - largest_delta; 398 } 399 } else if (new_duty_cycle > old_duty_cycle) { 400 const unsigned int largest_delta = MAX2(old_duty_cycle / 4, 15U); 401 if (new_duty_cycle > old_duty_cycle + largest_delta) { 402 damped_duty_cycle = MIN2(old_duty_cycle + largest_delta, 100U); 403 } 404 } 405 assert(damped_duty_cycle <= 100, "invalid duty cycle computed"); 406 407 if (CMSTraceIncrementalPacing) { 408 gclog_or_tty->print(" [icms_damped_duty_cycle(%d,%d) = %d] ", 409 old_duty_cycle, new_duty_cycle, damped_duty_cycle); 410 } 411 return damped_duty_cycle; 412 } 413 414 unsigned int CMSStats::icms_update_duty_cycle_impl() { 415 assert(CMSIncrementalPacing && valid(), 416 "should be handled in icms_update_duty_cycle()"); 417 418 double cms_time_so_far = cms_timer().seconds(); 419 double scaled_duration = cms_duration_per_mb() * _cms_used_at_gc0_end / M; 420 double scaled_duration_remaining = fabsd(scaled_duration - cms_time_so_far); 421 422 // Avoid division by 0. 423 double time_until_full = MAX2(time_until_cms_gen_full(), 0.01); 424 double duty_cycle_dbl = 100.0 * scaled_duration_remaining / time_until_full; 425 426 unsigned int new_duty_cycle = MIN2((unsigned int)duty_cycle_dbl, 100U); 427 if (new_duty_cycle > _icms_duty_cycle) { 428 // Avoid very small duty cycles (1 or 2); 0 is allowed. 429 if (new_duty_cycle > 2) { 430 _icms_duty_cycle = icms_damped_duty_cycle(_icms_duty_cycle, 431 new_duty_cycle); 432 } 433 } else if (_allow_duty_cycle_reduction) { 434 // The duty cycle is reduced only once per cms cycle (see record_cms_end()). 435 new_duty_cycle = icms_damped_duty_cycle(_icms_duty_cycle, new_duty_cycle); 436 // Respect the minimum duty cycle. 437 unsigned int min_duty_cycle = (unsigned int)CMSIncrementalDutyCycleMin; 438 _icms_duty_cycle = MAX2(new_duty_cycle, min_duty_cycle); 439 } 440 441 if (PrintGCDetails || CMSTraceIncrementalPacing) { 442 gclog_or_tty->print(" icms_dc=%d ", _icms_duty_cycle); 443 } 444 445 _allow_duty_cycle_reduction = false; 446 return _icms_duty_cycle; 447 } 448 449 #ifndef PRODUCT 450 void CMSStats::print_on(outputStream *st) const { 451 st->print(" gc0_alpha=%d,cms_alpha=%d", _gc0_alpha, _cms_alpha); 452 st->print(",gc0_dur=%g,gc0_per=%g,gc0_promo=" SIZE_FORMAT, 453 gc0_duration(), gc0_period(), gc0_promoted()); 454 st->print(",cms_dur=%g,cms_dur_per_mb=%g,cms_per=%g,cms_alloc=" SIZE_FORMAT, 455 cms_duration(), cms_duration_per_mb(), 456 cms_period(), cms_allocated()); 457 st->print(",cms_since_beg=%g,cms_since_end=%g", 458 cms_time_since_begin(), cms_time_since_end()); 459 st->print(",cms_used_beg=" SIZE_FORMAT ",cms_used_end=" SIZE_FORMAT, 460 _cms_used_at_gc0_begin, _cms_used_at_gc0_end); 461 if (CMSIncrementalMode) { 462 st->print(",dc=%d", icms_duty_cycle()); 463 } 464 465 if (valid()) { 466 st->print(",promo_rate=%g,cms_alloc_rate=%g", 467 promotion_rate(), cms_allocation_rate()); 468 st->print(",cms_consumption_rate=%g,time_until_full=%g", 469 cms_consumption_rate(), time_until_cms_gen_full()); 470 } 471 st->print(" "); 472 } 473 #endif // #ifndef PRODUCT 474 475 CMSCollector::CollectorState CMSCollector::_collectorState = 476 CMSCollector::Idling; 477 bool CMSCollector::_foregroundGCIsActive = false; 478 bool CMSCollector::_foregroundGCShouldWait = false; 479 480 CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen, 481 ConcurrentMarkSweepGeneration* permGen, 482 CardTableRS* ct, 483 ConcurrentMarkSweepPolicy* cp): 484 _cmsGen(cmsGen), 485 _permGen(permGen), 486 _ct(ct), 487 _ref_processor(NULL), // will be set later 488 _conc_workers(NULL), // may be set later 489 _abort_preclean(false), 490 _start_sampling(false), 491 _between_prologue_and_epilogue(false), 492 _markBitMap(0, Mutex::leaf + 1, "CMS_markBitMap_lock"), 493 _perm_gen_verify_bit_map(0, -1 /* no mutex */, "No_lock"), 494 _modUnionTable((CardTableModRefBS::card_shift - LogHeapWordSize), 495 -1 /* lock-free */, "No_lock" /* dummy */), 496 _modUnionClosure(&_modUnionTable), 497 _modUnionClosurePar(&_modUnionTable), 498 // Adjust my span to cover old (cms) gen and perm gen 499 _span(cmsGen->reserved()._union(permGen->reserved())), 500 // Construct the is_alive_closure with _span & markBitMap 501 _is_alive_closure(_span, &_markBitMap), 502 _restart_addr(NULL), 503 _overflow_list(NULL), 504 _preserved_oop_stack(NULL), 505 _preserved_mark_stack(NULL), 506 _stats(cmsGen), 507 _eden_chunk_array(NULL), // may be set in ctor body 508 _eden_chunk_capacity(0), // -- ditto -- 509 _eden_chunk_index(0), // -- ditto -- 510 _survivor_plab_array(NULL), // -- ditto -- 511 _survivor_chunk_array(NULL), // -- ditto -- 512 _survivor_chunk_capacity(0), // -- ditto -- 513 _survivor_chunk_index(0), // -- ditto -- 514 _ser_pmc_preclean_ovflw(0), 515 _ser_pmc_remark_ovflw(0), 516 _par_pmc_remark_ovflw(0), 517 _ser_kac_ovflw(0), 518 _par_kac_ovflw(0), 519 #ifndef PRODUCT 520 _num_par_pushes(0), 521 #endif 522 _collection_count_start(0), 523 _verifying(false), 524 _icms_start_limit(NULL), 525 _icms_stop_limit(NULL), 526 _verification_mark_bm(0, Mutex::leaf + 1, "CMS_verification_mark_bm_lock"), 527 _completed_initialization(false), 528 _collector_policy(cp), 529 _unload_classes(false), 530 _unloaded_classes_last_cycle(false), 531 _sweep_estimate(CMS_SweepWeight, CMS_SweepPadding) 532 { 533 if (ExplicitGCInvokesConcurrentAndUnloadsClasses) { 534 ExplicitGCInvokesConcurrent = true; 535 } 536 // Now expand the span and allocate the collection support structures 537 // (MUT, marking bit map etc.) to cover both generations subject to 538 // collection. 539 540 // First check that _permGen is adjacent to _cmsGen and above it. 541 assert( _cmsGen->reserved().word_size() > 0 542 && _permGen->reserved().word_size() > 0, 543 "generations should not be of zero size"); 544 assert(_cmsGen->reserved().intersection(_permGen->reserved()).is_empty(), 545 "_cmsGen and _permGen should not overlap"); 546 assert(_cmsGen->reserved().end() == _permGen->reserved().start(), 547 "_cmsGen->end() different from _permGen->start()"); 548 549 // For use by dirty card to oop closures. 550 _cmsGen->cmsSpace()->set_collector(this); 551 _permGen->cmsSpace()->set_collector(this); 552 553 // Allocate MUT and marking bit map 554 { 555 MutexLockerEx x(_markBitMap.lock(), Mutex::_no_safepoint_check_flag); 556 if (!_markBitMap.allocate(_span)) { 557 warning("Failed to allocate CMS Bit Map"); 558 return; 559 } 560 assert(_markBitMap.covers(_span), "_markBitMap inconsistency?"); 561 } 562 { 563 _modUnionTable.allocate(_span); 564 assert(_modUnionTable.covers(_span), "_modUnionTable inconsistency?"); 565 } 566 567 if (!_markStack.allocate(CMSMarkStackSize)) { 568 warning("Failed to allocate CMS Marking Stack"); 569 return; 570 } 571 if (!_revisitStack.allocate(CMSRevisitStackSize)) { 572 warning("Failed to allocate CMS Revisit Stack"); 573 return; 574 } 575 576 // Support for multi-threaded concurrent phases 577 if (ParallelGCThreads > 0 && CMSConcurrentMTEnabled) { 578 if (FLAG_IS_DEFAULT(ParallelCMSThreads)) { 579 // just for now 580 FLAG_SET_DEFAULT(ParallelCMSThreads, (ParallelGCThreads + 3)/4); 581 } 582 if (ParallelCMSThreads > 1) { 583 _conc_workers = new YieldingFlexibleWorkGang("Parallel CMS Threads", 584 ParallelCMSThreads, true); 585 if (_conc_workers == NULL) { 586 warning("GC/CMS: _conc_workers allocation failure: " 587 "forcing -CMSConcurrentMTEnabled"); 588 CMSConcurrentMTEnabled = false; 589 } 590 } else { 591 CMSConcurrentMTEnabled = false; 592 } 593 } 594 if (!CMSConcurrentMTEnabled) { 595 ParallelCMSThreads = 0; 596 } else { 597 // Turn off CMSCleanOnEnter optimization temporarily for 598 // the MT case where it's not fixed yet; see 6178663. 599 CMSCleanOnEnter = false; 600 } 601 assert((_conc_workers != NULL) == (ParallelCMSThreads > 1), 602 "Inconsistency"); 603 604 // Parallel task queues; these are shared for the 605 // concurrent and stop-world phases of CMS, but 606 // are not shared with parallel scavenge (ParNew). 607 { 608 uint i; 609 uint num_queues = (uint) MAX2(ParallelGCThreads, ParallelCMSThreads); 610 611 if ((CMSParallelRemarkEnabled || CMSConcurrentMTEnabled 612 || ParallelRefProcEnabled) 613 && num_queues > 0) { 614 _task_queues = new OopTaskQueueSet(num_queues); 615 if (_task_queues == NULL) { 616 warning("task_queues allocation failure."); 617 return; 618 } 619 _hash_seed = NEW_C_HEAP_ARRAY(int, num_queues); 620 if (_hash_seed == NULL) { 621 warning("_hash_seed array allocation failure"); 622 return; 623 } 624 625 // XXX use a global constant instead of 64! 626 typedef struct OopTaskQueuePadded { 627 OopTaskQueue work_queue; 628 char pad[64 - sizeof(OopTaskQueue)]; // prevent false sharing 629 } OopTaskQueuePadded; 630 631 for (i = 0; i < num_queues; i++) { 632 OopTaskQueuePadded *q_padded = new OopTaskQueuePadded(); 633 if (q_padded == NULL) { 634 warning("work_queue allocation failure."); 635 return; 636 } 637 _task_queues->register_queue(i, &q_padded->work_queue); 638 } 639 for (i = 0; i < num_queues; i++) { 640 _task_queues->queue(i)->initialize(); 641 _hash_seed[i] = 17; // copied from ParNew 642 } 643 } 644 } 645 646 // "initiatingOccupancy" is the occupancy ratio at which we trigger 647 // a new collection cycle. Unless explicitly specified via 648 // CMSTriggerRatio, it is calculated by: 649 // Let "f" be MinHeapFreeRatio in 650 // 651 // intiatingOccupancy = 100-f + 652 // f * (CMSTriggerRatio/100) 653 // That is, if we assume the heap is at its desired maximum occupancy at the 654 // end of a collection, we let CMSTriggerRatio of the (purported) free 655 // space be allocated before initiating a new collection cycle. 656 if (CMSInitiatingOccupancyFraction > 0) { 657 _initiatingOccupancy = (double)CMSInitiatingOccupancyFraction / 100.0; 658 } else { 659 _initiatingOccupancy = ((100 - MinHeapFreeRatio) + 660 (double)(CMSTriggerRatio * 661 MinHeapFreeRatio) / 100.0) 662 / 100.0; 663 } 664 // Clip CMSBootstrapOccupancy between 0 and 100. 665 _bootstrap_occupancy = ((double)MIN2((intx)100, MAX2((intx)0, CMSBootstrapOccupancy))) 666 /(double)100; 667 668 _full_gcs_since_conc_gc = 0; 669 670 // Now tell CMS generations the identity of their collector 671 ConcurrentMarkSweepGeneration::set_collector(this); 672 673 // Create & start a CMS thread for this CMS collector 674 _cmsThread = ConcurrentMarkSweepThread::start(this); 675 assert(cmsThread() != NULL, "CMS Thread should have been created"); 676 assert(cmsThread()->collector() == this, 677 "CMS Thread should refer to this gen"); 678 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 679 680 // Support for parallelizing young gen rescan 681 GenCollectedHeap* gch = GenCollectedHeap::heap(); 682 _young_gen = gch->prev_gen(_cmsGen); 683 if (gch->supports_inline_contig_alloc()) { 684 _top_addr = gch->top_addr(); 685 _end_addr = gch->end_addr(); 686 assert(_young_gen != NULL, "no _young_gen"); 687 _eden_chunk_index = 0; 688 _eden_chunk_capacity = (_young_gen->max_capacity()+CMSSamplingGrain)/CMSSamplingGrain; 689 _eden_chunk_array = NEW_C_HEAP_ARRAY(HeapWord*, _eden_chunk_capacity); 690 if (_eden_chunk_array == NULL) { 691 _eden_chunk_capacity = 0; 692 warning("GC/CMS: _eden_chunk_array allocation failure"); 693 } 694 } 695 assert(_eden_chunk_array != NULL || _eden_chunk_capacity == 0, "Error"); 696 697 // Support for parallelizing survivor space rescan 698 if (CMSParallelRemarkEnabled && CMSParallelSurvivorRemarkEnabled) { 699 size_t max_plab_samples = MaxNewSize/((SurvivorRatio+2)*MinTLABSize); 700 _survivor_plab_array = NEW_C_HEAP_ARRAY(ChunkArray, ParallelGCThreads); 701 _survivor_chunk_array = NEW_C_HEAP_ARRAY(HeapWord*, 2*max_plab_samples); 702 _cursor = NEW_C_HEAP_ARRAY(size_t, ParallelGCThreads); 703 if (_survivor_plab_array == NULL || _survivor_chunk_array == NULL 704 || _cursor == NULL) { 705 warning("Failed to allocate survivor plab/chunk array"); 706 if (_survivor_plab_array != NULL) { 707 FREE_C_HEAP_ARRAY(ChunkArray, _survivor_plab_array); 708 _survivor_plab_array = NULL; 709 } 710 if (_survivor_chunk_array != NULL) { 711 FREE_C_HEAP_ARRAY(HeapWord*, _survivor_chunk_array); 712 _survivor_chunk_array = NULL; 713 } 714 if (_cursor != NULL) { 715 FREE_C_HEAP_ARRAY(size_t, _cursor); 716 _cursor = NULL; 717 } 718 } else { 719 _survivor_chunk_capacity = 2*max_plab_samples; 720 for (uint i = 0; i < ParallelGCThreads; i++) { 721 HeapWord** vec = NEW_C_HEAP_ARRAY(HeapWord*, max_plab_samples); 722 if (vec == NULL) { 723 warning("Failed to allocate survivor plab array"); 724 for (int j = i; j > 0; j--) { 725 FREE_C_HEAP_ARRAY(HeapWord*, _survivor_plab_array[j-1].array()); 726 } 727 FREE_C_HEAP_ARRAY(ChunkArray, _survivor_plab_array); 728 FREE_C_HEAP_ARRAY(HeapWord*, _survivor_chunk_array); 729 _survivor_plab_array = NULL; 730 _survivor_chunk_array = NULL; 731 _survivor_chunk_capacity = 0; 732 break; 733 } else { 734 ChunkArray* cur = 735 ::new (&_survivor_plab_array[i]) ChunkArray(vec, 736 max_plab_samples); 737 assert(cur->end() == 0, "Should be 0"); 738 assert(cur->array() == vec, "Should be vec"); 739 assert(cur->capacity() == max_plab_samples, "Error"); 740 } 741 } 742 } 743 } 744 assert( ( _survivor_plab_array != NULL 745 && _survivor_chunk_array != NULL) 746 || ( _survivor_chunk_capacity == 0 747 && _survivor_chunk_index == 0), 748 "Error"); 749 750 // Choose what strong roots should be scanned depending on verification options 751 // and perm gen collection mode. 752 if (!CMSClassUnloadingEnabled) { 753 // If class unloading is disabled we want to include all classes into the root set. 754 add_root_scanning_option(SharedHeap::SO_AllClasses); 755 } else { 756 add_root_scanning_option(SharedHeap::SO_SystemClasses); 757 } 758 759 NOT_PRODUCT(_overflow_counter = CMSMarkStackOverflowInterval;) 760 _gc_counters = new CollectorCounters("CMS", 1); 761 _completed_initialization = true; 762 _sweep_timer.start(); // start of time 763 } 764 765 const char* ConcurrentMarkSweepGeneration::name() const { 766 return "concurrent mark-sweep generation"; 767 } 768 void ConcurrentMarkSweepGeneration::update_counters() { 769 if (UsePerfData) { 770 _space_counters->update_all(); 771 _gen_counters->update_all(); 772 } 773 } 774 775 // this is an optimized version of update_counters(). it takes the 776 // used value as a parameter rather than computing it. 777 // 778 void ConcurrentMarkSweepGeneration::update_counters(size_t used) { 779 if (UsePerfData) { 780 _space_counters->update_used(used); 781 _space_counters->update_capacity(); 782 _gen_counters->update_all(); 783 } 784 } 785 786 void ConcurrentMarkSweepGeneration::print() const { 787 Generation::print(); 788 cmsSpace()->print(); 789 } 790 791 #ifndef PRODUCT 792 void ConcurrentMarkSweepGeneration::print_statistics() { 793 cmsSpace()->printFLCensus(0); 794 } 795 #endif 796 797 void ConcurrentMarkSweepGeneration::printOccupancy(const char *s) { 798 GenCollectedHeap* gch = GenCollectedHeap::heap(); 799 if (PrintGCDetails) { 800 if (Verbose) { 801 gclog_or_tty->print(" [%d %s-%s: "SIZE_FORMAT"("SIZE_FORMAT")]", 802 level(), short_name(), s, used(), capacity()); 803 } else { 804 gclog_or_tty->print(" [%d %s-%s: "SIZE_FORMAT"K("SIZE_FORMAT"K)]", 805 level(), short_name(), s, used() / K, capacity() / K); 806 } 807 } 808 if (Verbose) { 809 gclog_or_tty->print(" "SIZE_FORMAT"("SIZE_FORMAT")", 810 gch->used(), gch->capacity()); 811 } else { 812 gclog_or_tty->print(" "SIZE_FORMAT"K("SIZE_FORMAT"K)", 813 gch->used() / K, gch->capacity() / K); 814 } 815 } 816 817 size_t 818 ConcurrentMarkSweepGeneration::contiguous_available() const { 819 // dld proposes an improvement in precision here. If the committed 820 // part of the space ends in a free block we should add that to 821 // uncommitted size in the calculation below. Will make this 822 // change later, staying with the approximation below for the 823 // time being. -- ysr. 824 return MAX2(_virtual_space.uncommitted_size(), unsafe_max_alloc_nogc()); 825 } 826 827 size_t 828 ConcurrentMarkSweepGeneration::unsafe_max_alloc_nogc() const { 829 return _cmsSpace->max_alloc_in_words() * HeapWordSize; 830 } 831 832 size_t ConcurrentMarkSweepGeneration::max_available() const { 833 return free() + _virtual_space.uncommitted_size(); 834 } 835 836 bool ConcurrentMarkSweepGeneration::promotion_attempt_is_safe( 837 size_t max_promotion_in_bytes, 838 bool younger_handles_promotion_failure) const { 839 840 // This is the most conservative test. Full promotion is 841 // guaranteed if this is used. The multiplicative factor is to 842 // account for the worst case "dilatation". 843 double adjusted_max_promo_bytes = _dilatation_factor * max_promotion_in_bytes; 844 if (adjusted_max_promo_bytes > (double)max_uintx) { // larger than size_t 845 adjusted_max_promo_bytes = (double)max_uintx; 846 } 847 bool result = (max_contiguous_available() >= (size_t)adjusted_max_promo_bytes); 848 849 if (younger_handles_promotion_failure && !result) { 850 // Full promotion is not guaranteed because fragmentation 851 // of the cms generation can prevent the full promotion. 852 result = (max_available() >= (size_t)adjusted_max_promo_bytes); 853 854 if (!result) { 855 // With promotion failure handling the test for the ability 856 // to support the promotion does not have to be guaranteed. 857 // Use an average of the amount promoted. 858 result = max_available() >= (size_t) 859 gc_stats()->avg_promoted()->padded_average(); 860 if (PrintGC && Verbose && result) { 861 gclog_or_tty->print_cr( 862 "\nConcurrentMarkSweepGeneration::promotion_attempt_is_safe" 863 " max_available: " SIZE_FORMAT 864 " avg_promoted: " SIZE_FORMAT, 865 max_available(), (size_t) 866 gc_stats()->avg_promoted()->padded_average()); 867 } 868 } else { 869 if (PrintGC && Verbose) { 870 gclog_or_tty->print_cr( 871 "\nConcurrentMarkSweepGeneration::promotion_attempt_is_safe" 872 " max_available: " SIZE_FORMAT 873 " adj_max_promo_bytes: " SIZE_FORMAT, 874 max_available(), (size_t)adjusted_max_promo_bytes); 875 } 876 } 877 } else { 878 if (PrintGC && Verbose) { 879 gclog_or_tty->print_cr( 880 "\nConcurrentMarkSweepGeneration::promotion_attempt_is_safe" 881 " contiguous_available: " SIZE_FORMAT 882 " adj_max_promo_bytes: " SIZE_FORMAT, 883 max_contiguous_available(), (size_t)adjusted_max_promo_bytes); 884 } 885 } 886 return result; 887 } 888 889 CompactibleSpace* 890 ConcurrentMarkSweepGeneration::first_compaction_space() const { 891 return _cmsSpace; 892 } 893 894 void ConcurrentMarkSweepGeneration::reset_after_compaction() { 895 // Clear the promotion information. These pointers can be adjusted 896 // along with all the other pointers into the heap but 897 // compaction is expected to be a rare event with 898 // a heap using cms so don't do it without seeing the need. 899 if (ParallelGCThreads > 0) { 900 for (uint i = 0; i < ParallelGCThreads; i++) { 901 _par_gc_thread_states[i]->promo.reset(); 902 } 903 } 904 } 905 906 void ConcurrentMarkSweepGeneration::space_iterate(SpaceClosure* blk, bool usedOnly) { 907 blk->do_space(_cmsSpace); 908 } 909 910 void ConcurrentMarkSweepGeneration::compute_new_size() { 911 assert_locked_or_safepoint(Heap_lock); 912 913 // If incremental collection failed, we just want to expand 914 // to the limit. 915 if (incremental_collection_failed()) { 916 clear_incremental_collection_failed(); 917 grow_to_reserved(); 918 return; 919 } 920 921 size_t expand_bytes = 0; 922 double free_percentage = ((double) free()) / capacity(); 923 double desired_free_percentage = (double) MinHeapFreeRatio / 100; 924 double maximum_free_percentage = (double) MaxHeapFreeRatio / 100; 925 926 // compute expansion delta needed for reaching desired free percentage 927 if (free_percentage < desired_free_percentage) { 928 size_t desired_capacity = (size_t)(used() / ((double) 1 - desired_free_percentage)); 929 assert(desired_capacity >= capacity(), "invalid expansion size"); 930 expand_bytes = MAX2(desired_capacity - capacity(), MinHeapDeltaBytes); 931 } 932 if (expand_bytes > 0) { 933 if (PrintGCDetails && Verbose) { 934 size_t desired_capacity = (size_t)(used() / ((double) 1 - desired_free_percentage)); 935 gclog_or_tty->print_cr("\nFrom compute_new_size: "); 936 gclog_or_tty->print_cr(" Free fraction %f", free_percentage); 937 gclog_or_tty->print_cr(" Desired free fraction %f", 938 desired_free_percentage); 939 gclog_or_tty->print_cr(" Maximum free fraction %f", 940 maximum_free_percentage); 941 gclog_or_tty->print_cr(" Capactiy "SIZE_FORMAT, capacity()/1000); 942 gclog_or_tty->print_cr(" Desired capacity "SIZE_FORMAT, 943 desired_capacity/1000); 944 int prev_level = level() - 1; 945 if (prev_level >= 0) { 946 size_t prev_size = 0; 947 GenCollectedHeap* gch = GenCollectedHeap::heap(); 948 Generation* prev_gen = gch->_gens[prev_level]; 949 prev_size = prev_gen->capacity(); 950 gclog_or_tty->print_cr(" Younger gen size "SIZE_FORMAT, 951 prev_size/1000); 952 } 953 gclog_or_tty->print_cr(" unsafe_max_alloc_nogc "SIZE_FORMAT, 954 unsafe_max_alloc_nogc()/1000); 955 gclog_or_tty->print_cr(" contiguous available "SIZE_FORMAT, 956 contiguous_available()/1000); 957 gclog_or_tty->print_cr(" Expand by "SIZE_FORMAT" (bytes)", 958 expand_bytes); 959 } 960 // safe if expansion fails 961 expand(expand_bytes, 0, CMSExpansionCause::_satisfy_free_ratio); 962 if (PrintGCDetails && Verbose) { 963 gclog_or_tty->print_cr(" Expanded free fraction %f", 964 ((double) free()) / capacity()); 965 } 966 } 967 } 968 969 Mutex* ConcurrentMarkSweepGeneration::freelistLock() const { 970 return cmsSpace()->freelistLock(); 971 } 972 973 HeapWord* ConcurrentMarkSweepGeneration::allocate(size_t size, 974 bool tlab) { 975 CMSSynchronousYieldRequest yr; 976 MutexLockerEx x(freelistLock(), 977 Mutex::_no_safepoint_check_flag); 978 return have_lock_and_allocate(size, tlab); 979 } 980 981 HeapWord* ConcurrentMarkSweepGeneration::have_lock_and_allocate(size_t size, 982 bool tlab) { 983 assert_lock_strong(freelistLock()); 984 size_t adjustedSize = CompactibleFreeListSpace::adjustObjectSize(size); 985 HeapWord* res = cmsSpace()->allocate(adjustedSize); 986 // Allocate the object live (grey) if the background collector has 987 // started marking. This is necessary because the marker may 988 // have passed this address and consequently this object will 989 // not otherwise be greyed and would be incorrectly swept up. 990 // Note that if this object contains references, the writing 991 // of those references will dirty the card containing this object 992 // allowing the object to be blackened (and its references scanned) 993 // either during a preclean phase or at the final checkpoint. 994 if (res != NULL) { 995 collector()->direct_allocated(res, adjustedSize); 996 _direct_allocated_words += adjustedSize; 997 // allocation counters 998 NOT_PRODUCT( 999 _numObjectsAllocated++; 1000 _numWordsAllocated += (int)adjustedSize; 1001 ) 1002 } 1003 return res; 1004 } 1005 1006 // In the case of direct allocation by mutators in a generation that 1007 // is being concurrently collected, the object must be allocated 1008 // live (grey) if the background collector has started marking. 1009 // This is necessary because the marker may 1010 // have passed this address and consequently this object will 1011 // not otherwise be greyed and would be incorrectly swept up. 1012 // Note that if this object contains references, the writing 1013 // of those references will dirty the card containing this object 1014 // allowing the object to be blackened (and its references scanned) 1015 // either during a preclean phase or at the final checkpoint. 1016 void CMSCollector::direct_allocated(HeapWord* start, size_t size) { 1017 assert(_markBitMap.covers(start, size), "Out of bounds"); 1018 if (_collectorState >= Marking) { 1019 MutexLockerEx y(_markBitMap.lock(), 1020 Mutex::_no_safepoint_check_flag); 1021 // [see comments preceding SweepClosure::do_blk() below for details] 1022 // 1. need to mark the object as live so it isn't collected 1023 // 2. need to mark the 2nd bit to indicate the object may be uninitialized 1024 // 3. need to mark the end of the object so sweeper can skip over it 1025 // if it's uninitialized when the sweeper reaches it. 1026 _markBitMap.mark(start); // object is live 1027 _markBitMap.mark(start + 1); // object is potentially uninitialized? 1028 _markBitMap.mark(start + size - 1); 1029 // mark end of object 1030 } 1031 // check that oop looks uninitialized 1032 assert(oop(start)->klass() == NULL, "_klass should be NULL"); 1033 } 1034 1035 void CMSCollector::promoted(bool par, HeapWord* start, 1036 bool is_obj_array, size_t obj_size) { 1037 assert(_markBitMap.covers(start), "Out of bounds"); 1038 // See comment in direct_allocated() about when objects should 1039 // be allocated live. 1040 if (_collectorState >= Marking) { 1041 // we already hold the marking bit map lock, taken in 1042 // the prologue 1043 if (par) { 1044 _markBitMap.par_mark(start); 1045 } else { 1046 _markBitMap.mark(start); 1047 } 1048 // We don't need to mark the object as uninitialized (as 1049 // in direct_allocated above) because this is being done with the 1050 // world stopped and the object will be initialized by the 1051 // time the sweeper gets to look at it. 1052 assert(SafepointSynchronize::is_at_safepoint(), 1053 "expect promotion only at safepoints"); 1054 1055 if (_collectorState < Sweeping) { 1056 // Mark the appropriate cards in the modUnionTable, so that 1057 // this object gets scanned before the sweep. If this is 1058 // not done, CMS generation references in the object might 1059 // not get marked. 1060 // For the case of arrays, which are otherwise precisely 1061 // marked, we need to dirty the entire array, not just its head. 1062 if (is_obj_array) { 1063 // The [par_]mark_range() method expects mr.end() below to 1064 // be aligned to the granularity of a bit's representation 1065 // in the heap. In the case of the MUT below, that's a 1066 // card size. 1067 MemRegion mr(start, 1068 (HeapWord*)round_to((intptr_t)(start + obj_size), 1069 CardTableModRefBS::card_size /* bytes */)); 1070 if (par) { 1071 _modUnionTable.par_mark_range(mr); 1072 } else { 1073 _modUnionTable.mark_range(mr); 1074 } 1075 } else { // not an obj array; we can just mark the head 1076 if (par) { 1077 _modUnionTable.par_mark(start); 1078 } else { 1079 _modUnionTable.mark(start); 1080 } 1081 } 1082 } 1083 } 1084 } 1085 1086 static inline size_t percent_of_space(Space* space, HeapWord* addr) 1087 { 1088 size_t delta = pointer_delta(addr, space->bottom()); 1089 return (size_t)(delta * 100.0 / (space->capacity() / HeapWordSize)); 1090 } 1091 1092 void CMSCollector::icms_update_allocation_limits() 1093 { 1094 Generation* gen0 = GenCollectedHeap::heap()->get_gen(0); 1095 EdenSpace* eden = gen0->as_DefNewGeneration()->eden(); 1096 1097 const unsigned int duty_cycle = stats().icms_update_duty_cycle(); 1098 if (CMSTraceIncrementalPacing) { 1099 stats().print(); 1100 } 1101 1102 assert(duty_cycle <= 100, "invalid duty cycle"); 1103 if (duty_cycle != 0) { 1104 // The duty_cycle is a percentage between 0 and 100; convert to words and 1105 // then compute the offset from the endpoints of the space. 1106 size_t free_words = eden->free() / HeapWordSize; 1107 double free_words_dbl = (double)free_words; 1108 size_t duty_cycle_words = (size_t)(free_words_dbl * duty_cycle / 100.0); 1109 size_t offset_words = (free_words - duty_cycle_words) / 2; 1110 1111 _icms_start_limit = eden->top() + offset_words; 1112 _icms_stop_limit = eden->end() - offset_words; 1113 1114 // The limits may be adjusted (shifted to the right) by 1115 // CMSIncrementalOffset, to allow the application more mutator time after a 1116 // young gen gc (when all mutators were stopped) and before CMS starts and 1117 // takes away one or more cpus. 1118 if (CMSIncrementalOffset != 0) { 1119 double adjustment_dbl = free_words_dbl * CMSIncrementalOffset / 100.0; 1120 size_t adjustment = (size_t)adjustment_dbl; 1121 HeapWord* tmp_stop = _icms_stop_limit + adjustment; 1122 if (tmp_stop > _icms_stop_limit && tmp_stop < eden->end()) { 1123 _icms_start_limit += adjustment; 1124 _icms_stop_limit = tmp_stop; 1125 } 1126 } 1127 } 1128 if (duty_cycle == 0 || (_icms_start_limit == _icms_stop_limit)) { 1129 _icms_start_limit = _icms_stop_limit = eden->end(); 1130 } 1131 1132 // Install the new start limit. 1133 eden->set_soft_end(_icms_start_limit); 1134 1135 if (CMSTraceIncrementalMode) { 1136 gclog_or_tty->print(" icms alloc limits: " 1137 PTR_FORMAT "," PTR_FORMAT 1138 " (" SIZE_FORMAT "%%," SIZE_FORMAT "%%) ", 1139 _icms_start_limit, _icms_stop_limit, 1140 percent_of_space(eden, _icms_start_limit), 1141 percent_of_space(eden, _icms_stop_limit)); 1142 if (Verbose) { 1143 gclog_or_tty->print("eden: "); 1144 eden->print_on(gclog_or_tty); 1145 } 1146 } 1147 } 1148 1149 // Any changes here should try to maintain the invariant 1150 // that if this method is called with _icms_start_limit 1151 // and _icms_stop_limit both NULL, then it should return NULL 1152 // and not notify the icms thread. 1153 HeapWord* 1154 CMSCollector::allocation_limit_reached(Space* space, HeapWord* top, 1155 size_t word_size) 1156 { 1157 // A start_limit equal to end() means the duty cycle is 0, so treat that as a 1158 // nop. 1159 if (CMSIncrementalMode && _icms_start_limit != space->end()) { 1160 if (top <= _icms_start_limit) { 1161 if (CMSTraceIncrementalMode) { 1162 space->print_on(gclog_or_tty); 1163 gclog_or_tty->stamp(); 1164 gclog_or_tty->print_cr(" start limit top=" PTR_FORMAT 1165 ", new limit=" PTR_FORMAT 1166 " (" SIZE_FORMAT "%%)", 1167 top, _icms_stop_limit, 1168 percent_of_space(space, _icms_stop_limit)); 1169 } 1170 ConcurrentMarkSweepThread::start_icms(); 1171 assert(top < _icms_stop_limit, "Tautology"); 1172 if (word_size < pointer_delta(_icms_stop_limit, top)) { 1173 return _icms_stop_limit; 1174 } 1175 1176 // The allocation will cross both the _start and _stop limits, so do the 1177 // stop notification also and return end(). 1178 if (CMSTraceIncrementalMode) { 1179 space->print_on(gclog_or_tty); 1180 gclog_or_tty->stamp(); 1181 gclog_or_tty->print_cr(" +stop limit top=" PTR_FORMAT 1182 ", new limit=" PTR_FORMAT 1183 " (" SIZE_FORMAT "%%)", 1184 top, space->end(), 1185 percent_of_space(space, space->end())); 1186 } 1187 ConcurrentMarkSweepThread::stop_icms(); 1188 return space->end(); 1189 } 1190 1191 if (top <= _icms_stop_limit) { 1192 if (CMSTraceIncrementalMode) { 1193 space->print_on(gclog_or_tty); 1194 gclog_or_tty->stamp(); 1195 gclog_or_tty->print_cr(" stop limit top=" PTR_FORMAT 1196 ", new limit=" PTR_FORMAT 1197 " (" SIZE_FORMAT "%%)", 1198 top, space->end(), 1199 percent_of_space(space, space->end())); 1200 } 1201 ConcurrentMarkSweepThread::stop_icms(); 1202 return space->end(); 1203 } 1204 1205 if (CMSTraceIncrementalMode) { 1206 space->print_on(gclog_or_tty); 1207 gclog_or_tty->stamp(); 1208 gclog_or_tty->print_cr(" end limit top=" PTR_FORMAT 1209 ", new limit=" PTR_FORMAT, 1210 top, NULL); 1211 } 1212 } 1213 1214 return NULL; 1215 } 1216 1217 oop ConcurrentMarkSweepGeneration::promote(oop obj, size_t obj_size, oop* ref) { 1218 assert(obj_size == (size_t)obj->size(), "bad obj_size passed in"); 1219 // allocate, copy and if necessary update promoinfo -- 1220 // delegate to underlying space. 1221 assert_lock_strong(freelistLock()); 1222 1223 #ifndef PRODUCT 1224 if (Universe::heap()->promotion_should_fail()) { 1225 return NULL; 1226 } 1227 #endif // #ifndef PRODUCT 1228 1229 oop res = _cmsSpace->promote(obj, obj_size, ref); 1230 if (res == NULL) { 1231 // expand and retry 1232 size_t s = _cmsSpace->expansionSpaceRequired(obj_size); // HeapWords 1233 expand(s*HeapWordSize, MinHeapDeltaBytes, 1234 CMSExpansionCause::_satisfy_promotion); 1235 // Since there's currently no next generation, we don't try to promote 1236 // into a more senior generation. 1237 assert(next_gen() == NULL, "assumption, based upon which no attempt " 1238 "is made to pass on a possibly failing " 1239 "promotion to next generation"); 1240 res = _cmsSpace->promote(obj, obj_size, ref); 1241 } 1242 if (res != NULL) { 1243 // See comment in allocate() about when objects should 1244 // be allocated live. 1245 assert(obj->is_oop(), "Will dereference klass pointer below"); 1246 collector()->promoted(false, // Not parallel 1247 (HeapWord*)res, obj->is_objArray(), obj_size); 1248 // promotion counters 1249 NOT_PRODUCT( 1250 _numObjectsPromoted++; 1251 _numWordsPromoted += 1252 (int)(CompactibleFreeListSpace::adjustObjectSize(obj->size())); 1253 ) 1254 } 1255 return res; 1256 } 1257 1258 1259 HeapWord* 1260 ConcurrentMarkSweepGeneration::allocation_limit_reached(Space* space, 1261 HeapWord* top, 1262 size_t word_sz) 1263 { 1264 return collector()->allocation_limit_reached(space, top, word_sz); 1265 } 1266 1267 // Things to support parallel young-gen collection. 1268 oop 1269 ConcurrentMarkSweepGeneration::par_promote(int thread_num, 1270 oop old, markOop m, 1271 size_t word_sz) { 1272 #ifndef PRODUCT 1273 if (Universe::heap()->promotion_should_fail()) { 1274 return NULL; 1275 } 1276 #endif // #ifndef PRODUCT 1277 1278 CMSParGCThreadState* ps = _par_gc_thread_states[thread_num]; 1279 PromotionInfo* promoInfo = &ps->promo; 1280 // if we are tracking promotions, then first ensure space for 1281 // promotion (including spooling space for saving header if necessary). 1282 // then allocate and copy, then track promoted info if needed. 1283 // When tracking (see PromotionInfo::track()), the mark word may 1284 // be displaced and in this case restoration of the mark word 1285 // occurs in the (oop_since_save_marks_)iterate phase. 1286 if (promoInfo->tracking() && !promoInfo->ensure_spooling_space()) { 1287 // Out of space for allocating spooling buffers; 1288 // try expanding and allocating spooling buffers. 1289 if (!expand_and_ensure_spooling_space(promoInfo)) { 1290 return NULL; 1291 } 1292 } 1293 assert(promoInfo->has_spooling_space(), "Control point invariant"); 1294 HeapWord* obj_ptr = ps->lab.alloc(word_sz); 1295 if (obj_ptr == NULL) { 1296 obj_ptr = expand_and_par_lab_allocate(ps, word_sz); 1297 if (obj_ptr == NULL) { 1298 return NULL; 1299 } 1300 } 1301 oop obj = oop(obj_ptr); 1302 assert(obj->klass() == NULL, "Object should be uninitialized here."); 1303 // Otherwise, copy the object. Here we must be careful to insert the 1304 // klass pointer last, since this marks the block as an allocated object. 1305 HeapWord* old_ptr = (HeapWord*)old; 1306 if (word_sz > (size_t)oopDesc::header_size()) { 1307 Copy::aligned_disjoint_words(old_ptr + oopDesc::header_size(), 1308 obj_ptr + oopDesc::header_size(), 1309 word_sz - oopDesc::header_size()); 1310 } 1311 // Restore the mark word copied above. 1312 obj->set_mark(m); 1313 // Now we can track the promoted object, if necessary. We take care 1314 // To delay the transition from uninitialized to full object 1315 // (i.e., insertion of klass pointer) until after, so that it 1316 // atomically becomes a promoted object. 1317 if (promoInfo->tracking()) { 1318 promoInfo->track((PromotedObject*)obj, old->klass()); 1319 } 1320 // Finally, install the klass pointer. 1321 obj->set_klass(old->klass()); 1322 1323 assert(old->is_oop(), "Will dereference klass ptr below"); 1324 collector()->promoted(true, // parallel 1325 obj_ptr, old->is_objArray(), word_sz); 1326 1327 NOT_PRODUCT( 1328 Atomic::inc(&_numObjectsPromoted); 1329 Atomic::add((jint)CompactibleFreeListSpace::adjustObjectSize(obj->size()), 1330 &_numWordsPromoted); 1331 ) 1332 1333 return obj; 1334 } 1335 1336 void 1337 ConcurrentMarkSweepGeneration:: 1338 par_promote_alloc_undo(int thread_num, 1339 HeapWord* obj, size_t word_sz) { 1340 // CMS does not support promotion undo. 1341 ShouldNotReachHere(); 1342 } 1343 1344 void 1345 ConcurrentMarkSweepGeneration:: 1346 par_promote_alloc_done(int thread_num) { 1347 CMSParGCThreadState* ps = _par_gc_thread_states[thread_num]; 1348 ps->lab.retire(); 1349 #if CFLS_LAB_REFILL_STATS 1350 if (thread_num == 0) { 1351 _cmsSpace->print_par_alloc_stats(); 1352 } 1353 #endif 1354 } 1355 1356 void 1357 ConcurrentMarkSweepGeneration:: 1358 par_oop_since_save_marks_iterate_done(int thread_num) { 1359 CMSParGCThreadState* ps = _par_gc_thread_states[thread_num]; 1360 ParScanWithoutBarrierClosure* dummy_cl = NULL; 1361 ps->promo.promoted_oops_iterate_nv(dummy_cl); 1362 } 1363 1364 // XXXPERM 1365 bool ConcurrentMarkSweepGeneration::should_collect(bool full, 1366 size_t size, 1367 bool tlab) 1368 { 1369 // We allow a STW collection only if a full 1370 // collection was requested. 1371 return full || should_allocate(size, tlab); // FIX ME !!! 1372 // This and promotion failure handling are connected at the 1373 // hip and should be fixed by untying them. 1374 } 1375 1376 bool CMSCollector::shouldConcurrentCollect() { 1377 if (_full_gc_requested) { 1378 assert(ExplicitGCInvokesConcurrent, "Unexpected state"); 1379 if (Verbose && PrintGCDetails) { 1380 gclog_or_tty->print_cr("CMSCollector: collect because of explicit " 1381 " gc request"); 1382 } 1383 return true; 1384 } 1385 1386 // For debugging purposes, change the type of collection. 1387 // If the rotation is not on the concurrent collection 1388 // type, don't start a concurrent collection. 1389 NOT_PRODUCT( 1390 if (RotateCMSCollectionTypes && 1391 (_cmsGen->debug_collection_type() != 1392 ConcurrentMarkSweepGeneration::Concurrent_collection_type)) { 1393 assert(_cmsGen->debug_collection_type() != 1394 ConcurrentMarkSweepGeneration::Unknown_collection_type, 1395 "Bad cms collection type"); 1396 return false; 1397 } 1398 ) 1399 1400 FreelistLocker x(this); 1401 // ------------------------------------------------------------------ 1402 // Print out lots of information which affects the initiation of 1403 // a collection. 1404 if (PrintCMSInitiationStatistics && stats().valid()) { 1405 gclog_or_tty->print("CMSCollector shouldConcurrentCollect: "); 1406 gclog_or_tty->stamp(); 1407 gclog_or_tty->print_cr(""); 1408 stats().print_on(gclog_or_tty); 1409 gclog_or_tty->print_cr("time_until_cms_gen_full %3.7f", 1410 stats().time_until_cms_gen_full()); 1411 gclog_or_tty->print_cr("free="SIZE_FORMAT, _cmsGen->free()); 1412 gclog_or_tty->print_cr("contiguous_available="SIZE_FORMAT, 1413 _cmsGen->contiguous_available()); 1414 gclog_or_tty->print_cr("promotion_rate=%g", stats().promotion_rate()); 1415 gclog_or_tty->print_cr("cms_allocation_rate=%g", stats().cms_allocation_rate()); 1416 gclog_or_tty->print_cr("occupancy=%3.7f", _cmsGen->occupancy()); 1417 gclog_or_tty->print_cr("initiatingOccupancy=%3.7f", initiatingOccupancy()); 1418 } 1419 // ------------------------------------------------------------------ 1420 1421 // If the estimated time to complete a cms collection (cms_duration()) 1422 // is less than the estimated time remaining until the cms generation 1423 // is full, start a collection. 1424 if (!UseCMSInitiatingOccupancyOnly) { 1425 if (stats().valid()) { 1426 if (stats().time_until_cms_start() == 0.0) { 1427 return true; 1428 } 1429 } else { 1430 // We want to conservatively collect somewhat early in order 1431 // to try and "bootstrap" our CMS/promotion statistics; 1432 // this branch will not fire after the first successful CMS 1433 // collection because the stats should then be valid. 1434 if (_cmsGen->occupancy() >= _bootstrap_occupancy) { 1435 if (Verbose && PrintGCDetails) { 1436 gclog_or_tty->print_cr( 1437 " CMSCollector: collect for bootstrapping statistics:" 1438 " occupancy = %f, boot occupancy = %f", _cmsGen->occupancy(), 1439 _bootstrap_occupancy); 1440 } 1441 return true; 1442 } 1443 } 1444 } 1445 1446 // Otherwise, we start a collection cycle if either the perm gen or 1447 // old gen want a collection cycle started. Each may use 1448 // an appropriate criterion for making this decision. 1449 // XXX We need to make sure that the gen expansion 1450 // criterion dovetails well with this. 1451 if (_cmsGen->shouldConcurrentCollect(initiatingOccupancy())) { 1452 if (Verbose && PrintGCDetails) { 1453 gclog_or_tty->print_cr("CMS old gen initiated"); 1454 } 1455 return true; 1456 } 1457 1458 if (cms_should_unload_classes() && 1459 _permGen->shouldConcurrentCollect(initiatingOccupancy())) { 1460 if (Verbose && PrintGCDetails) { 1461 gclog_or_tty->print_cr("CMS perm gen initiated"); 1462 } 1463 return true; 1464 } 1465 1466 return false; 1467 } 1468 1469 // Clear _expansion_cause fields of constituent generations 1470 void CMSCollector::clear_expansion_cause() { 1471 _cmsGen->clear_expansion_cause(); 1472 _permGen->clear_expansion_cause(); 1473 } 1474 1475 bool ConcurrentMarkSweepGeneration::shouldConcurrentCollect( 1476 double initiatingOccupancy) { 1477 // We should be conservative in starting a collection cycle. To 1478 // start too eagerly runs the risk of collecting too often in the 1479 // extreme. To collect too rarely falls back on full collections, 1480 // which works, even if not optimum in terms of concurrent work. 1481 // As a work around for too eagerly collecting, use the flag 1482 // UseCMSInitiatingOccupancyOnly. This also has the advantage of 1483 // giving the user an easily understandable way of controlling the 1484 // collections. 1485 // We want to start a new collection cycle if any of the following 1486 // conditions hold: 1487 // . our current occupancy exceeds the initiating occupancy, or 1488 // . we recently needed to expand and have not since that expansion, 1489 // collected, or 1490 // . we are not using adaptive free lists and linear allocation is 1491 // going to fail, or 1492 // . (for old gen) incremental collection has already failed or 1493 // may soon fail in the near future as we may not be able to absorb 1494 // promotions. 1495 assert_lock_strong(freelistLock()); 1496 1497 if (occupancy() > initiatingOccupancy) { 1498 if (PrintGCDetails && Verbose) { 1499 gclog_or_tty->print(" %s: collect because of occupancy %f / %f ", 1500 short_name(), occupancy(), initiatingOccupancy); 1501 } 1502 return true; 1503 } 1504 if (UseCMSInitiatingOccupancyOnly) { 1505 return false; 1506 } 1507 if (expansion_cause() == CMSExpansionCause::_satisfy_allocation) { 1508 if (PrintGCDetails && Verbose) { 1509 gclog_or_tty->print(" %s: collect because expanded for allocation ", 1510 short_name()); 1511 } 1512 return true; 1513 } 1514 GenCollectedHeap* gch = GenCollectedHeap::heap(); 1515 assert(gch->collector_policy()->is_two_generation_policy(), 1516 "You may want to check the correctness of the following"); 1517 if (gch->incremental_collection_will_fail()) { 1518 if (PrintGCDetails && Verbose) { 1519 gclog_or_tty->print(" %s: collect because incremental collection will fail ", 1520 short_name()); 1521 } 1522 return true; 1523 } 1524 if (!_cmsSpace->adaptive_freelists() && 1525 _cmsSpace->linearAllocationWouldFail()) { 1526 if (PrintGCDetails && Verbose) { 1527 gclog_or_tty->print(" %s: collect because of linAB ", 1528 short_name()); 1529 } 1530 return true; 1531 } 1532 return false; 1533 } 1534 1535 void ConcurrentMarkSweepGeneration::collect(bool full, 1536 bool clear_all_soft_refs, 1537 size_t size, 1538 bool tlab) 1539 { 1540 collector()->collect(full, clear_all_soft_refs, size, tlab); 1541 } 1542 1543 void CMSCollector::collect(bool full, 1544 bool clear_all_soft_refs, 1545 size_t size, 1546 bool tlab) 1547 { 1548 if (!UseCMSCollectionPassing && _collectorState > Idling) { 1549 // For debugging purposes skip the collection if the state 1550 // is not currently idle 1551 if (TraceCMSState) { 1552 gclog_or_tty->print_cr("Thread " INTPTR_FORMAT " skipped full:%d CMS state %d", 1553 Thread::current(), full, _collectorState); 1554 } 1555 return; 1556 } 1557 1558 // The following "if" branch is present for defensive reasons. 1559 // In the current uses of this interface, it can be replaced with: 1560 // assert(!GC_locker.is_active(), "Can't be called otherwise"); 1561 // But I am not placing that assert here to allow future 1562 // generality in invoking this interface. 1563 if (GC_locker::is_active()) { 1564 // A consistency test for GC_locker 1565 assert(GC_locker::needs_gc(), "Should have been set already"); 1566 // Skip this foreground collection, instead 1567 // expanding the heap if necessary. 1568 // Need the free list locks for the call to free() in compute_new_size() 1569 compute_new_size(); 1570 return; 1571 } 1572 acquire_control_and_collect(full, clear_all_soft_refs); 1573 _full_gcs_since_conc_gc++; 1574 1575 } 1576 1577 void CMSCollector::request_full_gc(unsigned int full_gc_count) { 1578 GenCollectedHeap* gch = GenCollectedHeap::heap(); 1579 unsigned int gc_count = gch->total_full_collections(); 1580 if (gc_count == full_gc_count) { 1581 MutexLockerEx y(CGC_lock, Mutex::_no_safepoint_check_flag); 1582 _full_gc_requested = true; 1583 CGC_lock->notify(); // nudge CMS thread 1584 } 1585 } 1586 1587 1588 // The foreground and background collectors need to coordinate in order 1589 // to make sure that they do not mutually interfere with CMS collections. 1590 // When a background collection is active, 1591 // the foreground collector may need to take over (preempt) and 1592 // synchronously complete an ongoing collection. Depending on the 1593 // frequency of the background collections and the heap usage 1594 // of the application, this preemption can be seldom or frequent. 1595 // There are only certain 1596 // points in the background collection that the "collection-baton" 1597 // can be passed to the foreground collector. 1598 // 1599 // The foreground collector will wait for the baton before 1600 // starting any part of the collection. The foreground collector 1601 // will only wait at one location. 1602 // 1603 // The background collector will yield the baton before starting a new 1604 // phase of the collection (e.g., before initial marking, marking from roots, 1605 // precleaning, final re-mark, sweep etc.) This is normally done at the head 1606 // of the loop which switches the phases. The background collector does some 1607 // of the phases (initial mark, final re-mark) with the world stopped. 1608 // Because of locking involved in stopping the world, 1609 // the foreground collector should not block waiting for the background 1610 // collector when it is doing a stop-the-world phase. The background 1611 // collector will yield the baton at an additional point just before 1612 // it enters a stop-the-world phase. Once the world is stopped, the 1613 // background collector checks the phase of the collection. If the 1614 // phase has not changed, it proceeds with the collection. If the 1615 // phase has changed, it skips that phase of the collection. See 1616 // the comments on the use of the Heap_lock in collect_in_background(). 1617 // 1618 // Variable used in baton passing. 1619 // _foregroundGCIsActive - Set to true by the foreground collector when 1620 // it wants the baton. The foreground clears it when it has finished 1621 // the collection. 1622 // _foregroundGCShouldWait - Set to true by the background collector 1623 // when it is running. The foreground collector waits while 1624 // _foregroundGCShouldWait is true. 1625 // CGC_lock - monitor used to protect access to the above variables 1626 // and to notify the foreground and background collectors. 1627 // _collectorState - current state of the CMS collection. 1628 // 1629 // The foreground collector 1630 // acquires the CGC_lock 1631 // sets _foregroundGCIsActive 1632 // waits on the CGC_lock for _foregroundGCShouldWait to be false 1633 // various locks acquired in preparation for the collection 1634 // are released so as not to block the background collector 1635 // that is in the midst of a collection 1636 // proceeds with the collection 1637 // clears _foregroundGCIsActive 1638 // returns 1639 // 1640 // The background collector in a loop iterating on the phases of the 1641 // collection 1642 // acquires the CGC_lock 1643 // sets _foregroundGCShouldWait 1644 // if _foregroundGCIsActive is set 1645 // clears _foregroundGCShouldWait, notifies _CGC_lock 1646 // waits on _CGC_lock for _foregroundGCIsActive to become false 1647 // and exits the loop. 1648 // otherwise 1649 // proceed with that phase of the collection 1650 // if the phase is a stop-the-world phase, 1651 // yield the baton once more just before enqueueing 1652 // the stop-world CMS operation (executed by the VM thread). 1653 // returns after all phases of the collection are done 1654 // 1655 1656 void CMSCollector::acquire_control_and_collect(bool full, 1657 bool clear_all_soft_refs) { 1658 assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint"); 1659 assert(!Thread::current()->is_ConcurrentGC_thread(), 1660 "shouldn't try to acquire control from self!"); 1661 1662 // Start the protocol for acquiring control of the 1663 // collection from the background collector (aka CMS thread). 1664 assert(ConcurrentMarkSweepThread::vm_thread_has_cms_token(), 1665 "VM thread should have CMS token"); 1666 // Remember the possibly interrupted state of an ongoing 1667 // concurrent collection 1668 CollectorState first_state = _collectorState; 1669 1670 // Signal to a possibly ongoing concurrent collection that 1671 // we want to do a foreground collection. 1672 _foregroundGCIsActive = true; 1673 1674 // Disable incremental mode during a foreground collection. 1675 ICMSDisabler icms_disabler; 1676 1677 // release locks and wait for a notify from the background collector 1678 // releasing the locks in only necessary for phases which 1679 // do yields to improve the granularity of the collection. 1680 assert_lock_strong(bitMapLock()); 1681 // We need to lock the Free list lock for the space that we are 1682 // currently collecting. 1683 assert(haveFreelistLocks(), "Must be holding free list locks"); 1684 bitMapLock()->unlock(); 1685 releaseFreelistLocks(); 1686 { 1687 MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag); 1688 if (_foregroundGCShouldWait) { 1689 // We are going to be waiting for action for the CMS thread; 1690 // it had better not be gone (for instance at shutdown)! 1691 assert(ConcurrentMarkSweepThread::cmst() != NULL, 1692 "CMS thread must be running"); 1693 // Wait here until the background collector gives us the go-ahead 1694 ConcurrentMarkSweepThread::clear_CMS_flag( 1695 ConcurrentMarkSweepThread::CMS_vm_has_token); // release token 1696 // Get a possibly blocked CMS thread going: 1697 // Note that we set _foregroundGCIsActive true above, 1698 // without protection of the CGC_lock. 1699 CGC_lock->notify(); 1700 assert(!ConcurrentMarkSweepThread::vm_thread_wants_cms_token(), 1701 "Possible deadlock"); 1702 while (_foregroundGCShouldWait) { 1703 // wait for notification 1704 CGC_lock->wait(Mutex::_no_safepoint_check_flag); 1705 // Possibility of delay/starvation here, since CMS token does 1706 // not know to give priority to VM thread? Actually, i think 1707 // there wouldn't be any delay/starvation, but the proof of 1708 // that "fact" (?) appears non-trivial. XXX 20011219YSR 1709 } 1710 ConcurrentMarkSweepThread::set_CMS_flag( 1711 ConcurrentMarkSweepThread::CMS_vm_has_token); 1712 } 1713 } 1714 // The CMS_token is already held. Get back the other locks. 1715 assert(ConcurrentMarkSweepThread::vm_thread_has_cms_token(), 1716 "VM thread should have CMS token"); 1717 getFreelistLocks(); 1718 bitMapLock()->lock_without_safepoint_check(); 1719 if (TraceCMSState) { 1720 gclog_or_tty->print_cr("CMS foreground collector has asked for control " 1721 INTPTR_FORMAT " with first state %d", Thread::current(), first_state); 1722 gclog_or_tty->print_cr(" gets control with state %d", _collectorState); 1723 } 1724 1725 // Check if we need to do a compaction, or if not, whether 1726 // we need to start the mark-sweep from scratch. 1727 bool should_compact = false; 1728 bool should_start_over = false; 1729 decide_foreground_collection_type(clear_all_soft_refs, 1730 &should_compact, &should_start_over); 1731 1732 NOT_PRODUCT( 1733 if (RotateCMSCollectionTypes) { 1734 if (_cmsGen->debug_collection_type() == 1735 ConcurrentMarkSweepGeneration::MSC_foreground_collection_type) { 1736 should_compact = true; 1737 } else if (_cmsGen->debug_collection_type() == 1738 ConcurrentMarkSweepGeneration::MS_foreground_collection_type) { 1739 should_compact = false; 1740 } 1741 } 1742 ) 1743 1744 if (PrintGCDetails && first_state > Idling) { 1745 GCCause::Cause cause = GenCollectedHeap::heap()->gc_cause(); 1746 if (GCCause::is_user_requested_gc(cause) || 1747 GCCause::is_serviceability_requested_gc(cause)) { 1748 gclog_or_tty->print(" (concurrent mode interrupted)"); 1749 } else { 1750 gclog_or_tty->print(" (concurrent mode failure)"); 1751 } 1752 } 1753 1754 if (should_compact) { 1755 // If the collection is being acquired from the background 1756 // collector, there may be references on the discovered 1757 // references lists that have NULL referents (being those 1758 // that were concurrently cleared by a mutator) or 1759 // that are no longer active (having been enqueued concurrently 1760 // by the mutator). 1761 // Scrub the list of those references because Mark-Sweep-Compact 1762 // code assumes referents are not NULL and that all discovered 1763 // Reference objects are active. 1764 ref_processor()->clean_up_discovered_references(); 1765 1766 do_compaction_work(clear_all_soft_refs); 1767 1768 // Has the GC time limit been exceeded? 1769 check_gc_time_limit(); 1770 1771 } else { 1772 do_mark_sweep_work(clear_all_soft_refs, first_state, 1773 should_start_over); 1774 } 1775 // Reset the expansion cause, now that we just completed 1776 // a collection cycle. 1777 clear_expansion_cause(); 1778 _foregroundGCIsActive = false; 1779 return; 1780 } 1781 1782 void CMSCollector::check_gc_time_limit() { 1783 1784 // Ignore explicit GC's. Exiting here does not set the flag and 1785 // does not reset the count. Updating of the averages for system 1786 // GC's is still controlled by UseAdaptiveSizePolicyWithSystemGC. 1787 GCCause::Cause gc_cause = GenCollectedHeap::heap()->gc_cause(); 1788 if (GCCause::is_user_requested_gc(gc_cause) || 1789 GCCause::is_serviceability_requested_gc(gc_cause)) { 1790 return; 1791 } 1792 1793 // Calculate the fraction of the CMS generation was freed during 1794 // the last collection. 1795 // Only consider the STW compacting cost for now. 1796 // 1797 // Note that the gc time limit test only works for the collections 1798 // of the young gen + tenured gen and not for collections of the 1799 // permanent gen. That is because the calculation of the space 1800 // freed by the collection is the free space in the young gen + 1801 // tenured gen. 1802 1803 double fraction_free = 1804 ((double)_cmsGen->free())/((double)_cmsGen->max_capacity()); 1805 if ((100.0 * size_policy()->compacting_gc_cost()) > 1806 ((double) GCTimeLimit) && 1807 ((fraction_free * 100) < GCHeapFreeLimit)) { 1808 size_policy()->inc_gc_time_limit_count(); 1809 if (UseGCOverheadLimit && 1810 (size_policy()->gc_time_limit_count() > 1811 AdaptiveSizePolicyGCTimeLimitThreshold)) { 1812 size_policy()->set_gc_time_limit_exceeded(true); 1813 // Avoid consecutive OOM due to the gc time limit by resetting 1814 // the counter. 1815 size_policy()->reset_gc_time_limit_count(); 1816 if (PrintGCDetails) { 1817 gclog_or_tty->print_cr(" GC is exceeding overhead limit " 1818 "of %d%%", GCTimeLimit); 1819 } 1820 } else { 1821 if (PrintGCDetails) { 1822 gclog_or_tty->print_cr(" GC would exceed overhead limit " 1823 "of %d%%", GCTimeLimit); 1824 } 1825 } 1826 } else { 1827 size_policy()->reset_gc_time_limit_count(); 1828 } 1829 } 1830 1831 // Resize the perm generation and the tenured generation 1832 // after obtaining the free list locks for the 1833 // two generations. 1834 void CMSCollector::compute_new_size() { 1835 assert_locked_or_safepoint(Heap_lock); 1836 FreelistLocker z(this); 1837 _permGen->compute_new_size(); 1838 _cmsGen->compute_new_size(); 1839 } 1840 1841 // A work method used by foreground collection to determine 1842 // what type of collection (compacting or not, continuing or fresh) 1843 // it should do. 1844 // NOTE: the intent is to make UseCMSCompactAtFullCollection 1845 // and CMSCompactWhenClearAllSoftRefs the default in the future 1846 // and do away with the flags after a suitable period. 1847 void CMSCollector::decide_foreground_collection_type( 1848 bool clear_all_soft_refs, bool* should_compact, 1849 bool* should_start_over) { 1850 // Normally, we'll compact only if the UseCMSCompactAtFullCollection 1851 // flag is set, and we have either requested a System.gc() or 1852 // the number of full gc's since the last concurrent cycle 1853 // has exceeded the threshold set by CMSFullGCsBeforeCompaction, 1854 // or if an incremental collection has failed 1855 GenCollectedHeap* gch = GenCollectedHeap::heap(); 1856 assert(gch->collector_policy()->is_two_generation_policy(), 1857 "You may want to check the correctness of the following"); 1858 // Inform cms gen if this was due to partial collection failing. 1859 // The CMS gen may use this fact to determine its expansion policy. 1860 if (gch->incremental_collection_will_fail()) { 1861 assert(!_cmsGen->incremental_collection_failed(), 1862 "Should have been noticed, reacted to and cleared"); 1863 _cmsGen->set_incremental_collection_failed(); 1864 } 1865 *should_compact = 1866 UseCMSCompactAtFullCollection && 1867 ((_full_gcs_since_conc_gc >= CMSFullGCsBeforeCompaction) || 1868 GCCause::is_user_requested_gc(gch->gc_cause()) || 1869 gch->incremental_collection_will_fail()); 1870 *should_start_over = false; 1871 if (clear_all_soft_refs && !*should_compact) { 1872 // We are about to do a last ditch collection attempt 1873 // so it would normally make sense to do a compaction 1874 // to reclaim as much space as possible. 1875 if (CMSCompactWhenClearAllSoftRefs) { 1876 // Default: The rationale is that in this case either 1877 // we are past the final marking phase, in which case 1878 // we'd have to start over, or so little has been done 1879 // that there's little point in saving that work. Compaction 1880 // appears to be the sensible choice in either case. 1881 *should_compact = true; 1882 } else { 1883 // We have been asked to clear all soft refs, but not to 1884 // compact. Make sure that we aren't past the final checkpoint 1885 // phase, for that is where we process soft refs. If we are already 1886 // past that phase, we'll need to redo the refs discovery phase and 1887 // if necessary clear soft refs that weren't previously 1888 // cleared. We do so by remembering the phase in which 1889 // we came in, and if we are past the refs processing 1890 // phase, we'll choose to just redo the mark-sweep 1891 // collection from scratch. 1892 if (_collectorState > FinalMarking) { 1893 // We are past the refs processing phase; 1894 // start over and do a fresh synchronous CMS cycle 1895 _collectorState = Resetting; // skip to reset to start new cycle 1896 reset(false /* == !asynch */); 1897 *should_start_over = true; 1898 } // else we can continue a possibly ongoing current cycle 1899 } 1900 } 1901 } 1902 1903 // A work method used by the foreground collector to do 1904 // a mark-sweep-compact. 1905 void CMSCollector::do_compaction_work(bool clear_all_soft_refs) { 1906 GenCollectedHeap* gch = GenCollectedHeap::heap(); 1907 TraceTime t("CMS:MSC ", PrintGCDetails && Verbose, true, gclog_or_tty); 1908 if (PrintGC && Verbose && !(GCCause::is_user_requested_gc(gch->gc_cause()))) { 1909 gclog_or_tty->print_cr("Compact ConcurrentMarkSweepGeneration after %d " 1910 "collections passed to foreground collector", _full_gcs_since_conc_gc); 1911 } 1912 1913 // Sample collection interval time and reset for collection pause. 1914 if (UseAdaptiveSizePolicy) { 1915 size_policy()->msc_collection_begin(); 1916 } 1917 1918 // Temporarily widen the span of the weak reference processing to 1919 // the entire heap. 1920 MemRegion new_span(GenCollectedHeap::heap()->reserved_region()); 1921 ReferenceProcessorSpanMutator x(ref_processor(), new_span); 1922 1923 // Temporarily, clear the "is_alive_non_header" field of the 1924 // reference processor. 1925 ReferenceProcessorIsAliveMutator y(ref_processor(), NULL); 1926 1927 // Temporarily make reference _processing_ single threaded (non-MT). 1928 ReferenceProcessorMTProcMutator z(ref_processor(), false); 1929 1930 // Temporarily make refs discovery atomic 1931 ReferenceProcessorAtomicMutator w(ref_processor(), true); 1932 1933 ref_processor()->set_enqueuing_is_done(false); 1934 ref_processor()->enable_discovery(); 1935 // If an asynchronous collection finishes, the _modUnionTable is 1936 // all clear. If we are assuming the collection from an asynchronous 1937 // collection, clear the _modUnionTable. 1938 assert(_collectorState != Idling || _modUnionTable.isAllClear(), 1939 "_modUnionTable should be clear if the baton was not passed"); 1940 _modUnionTable.clear_all(); 1941 1942 // We must adjust the allocation statistics being maintained 1943 // in the free list space. We do so by reading and clearing 1944 // the sweep timer and updating the block flux rate estimates below. 1945 assert(_sweep_timer.is_active(), "We should never see the timer inactive"); 1946 _sweep_timer.stop(); 1947 // Note that we do not use this sample to update the _sweep_estimate. 1948 _cmsGen->cmsSpace()->beginSweepFLCensus((float)(_sweep_timer.seconds()), 1949 _sweep_estimate.padded_average()); 1950 1951 GenMarkSweep::invoke_at_safepoint(_cmsGen->level(), 1952 ref_processor(), clear_all_soft_refs); 1953 #ifdef ASSERT 1954 CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace(); 1955 size_t free_size = cms_space->free(); 1956 assert(free_size == 1957 pointer_delta(cms_space->end(), cms_space->compaction_top()) 1958 * HeapWordSize, 1959 "All the free space should be compacted into one chunk at top"); 1960 assert(cms_space->dictionary()->totalChunkSize( 1961 debug_only(cms_space->freelistLock())) == 0 || 1962 cms_space->totalSizeInIndexedFreeLists() == 0, 1963 "All the free space should be in a single chunk"); 1964 size_t num = cms_space->totalCount(); 1965 assert((free_size == 0 && num == 0) || 1966 (free_size > 0 && (num == 1 || num == 2)), 1967 "There should be at most 2 free chunks after compaction"); 1968 #endif // ASSERT 1969 _collectorState = Resetting; 1970 assert(_restart_addr == NULL, 1971 "Should have been NULL'd before baton was passed"); 1972 reset(false /* == !asynch */); 1973 _cmsGen->reset_after_compaction(); 1974 1975 if (verifying() && !cms_should_unload_classes()) { 1976 perm_gen_verify_bit_map()->clear_all(); 1977 } 1978 1979 // Clear any data recorded in the PLAB chunk arrays. 1980 if (_survivor_plab_array != NULL) { 1981 reset_survivor_plab_arrays(); 1982 } 1983 1984 // Adjust the per-size allocation stats for the next epoch. 1985 _cmsGen->cmsSpace()->endSweepFLCensus(sweepCount() /* fake */); 1986 // Restart the "sweep timer" for next epoch. 1987 _sweep_timer.reset(); 1988 _sweep_timer.start(); 1989 1990 // Sample collection pause time and reset for collection interval. 1991 if (UseAdaptiveSizePolicy) { 1992 size_policy()->msc_collection_end(gch->gc_cause()); 1993 } 1994 1995 // For a mark-sweep-compact, compute_new_size() will be called 1996 // in the heap's do_collection() method. 1997 } 1998 1999 // A work method used by the foreground collector to do 2000 // a mark-sweep, after taking over from a possibly on-going 2001 // concurrent mark-sweep collection. 2002 void CMSCollector::do_mark_sweep_work(bool clear_all_soft_refs, 2003 CollectorState first_state, bool should_start_over) { 2004 if (PrintGC && Verbose) { 2005 gclog_or_tty->print_cr("Pass concurrent collection to foreground " 2006 "collector with count %d", 2007 _full_gcs_since_conc_gc); 2008 } 2009 switch (_collectorState) { 2010 case Idling: 2011 if (first_state == Idling || should_start_over) { 2012 // The background GC was not active, or should 2013 // restarted from scratch; start the cycle. 2014 _collectorState = InitialMarking; 2015 } 2016 // If first_state was not Idling, then a background GC 2017 // was in progress and has now finished. No need to do it 2018 // again. Leave the state as Idling. 2019 break; 2020 case Precleaning: 2021 // In the foreground case don't do the precleaning since 2022 // it is not done concurrently and there is extra work 2023 // required. 2024 _collectorState = FinalMarking; 2025 } 2026 if (PrintGCDetails && 2027 (_collectorState > Idling || 2028 !GCCause::is_user_requested_gc(GenCollectedHeap::heap()->gc_cause()))) { 2029 gclog_or_tty->print(" (concurrent mode failure)"); 2030 } 2031 collect_in_foreground(clear_all_soft_refs); 2032 2033 // For a mark-sweep, compute_new_size() will be called 2034 // in the heap's do_collection() method. 2035 } 2036 2037 2038 void CMSCollector::getFreelistLocks() const { 2039 // Get locks for all free lists in all generations that this 2040 // collector is responsible for 2041 _cmsGen->freelistLock()->lock_without_safepoint_check(); 2042 _permGen->freelistLock()->lock_without_safepoint_check(); 2043 } 2044 2045 void CMSCollector::releaseFreelistLocks() const { 2046 // Release locks for all free lists in all generations that this 2047 // collector is responsible for 2048 _cmsGen->freelistLock()->unlock(); 2049 _permGen->freelistLock()->unlock(); 2050 } 2051 2052 bool CMSCollector::haveFreelistLocks() const { 2053 // Check locks for all free lists in all generations that this 2054 // collector is responsible for 2055 assert_lock_strong(_cmsGen->freelistLock()); 2056 assert_lock_strong(_permGen->freelistLock()); 2057 PRODUCT_ONLY(ShouldNotReachHere()); 2058 return true; 2059 } 2060 2061 // A utility class that is used by the CMS collector to 2062 // temporarily "release" the foreground collector from its 2063 // usual obligation to wait for the background collector to 2064 // complete an ongoing phase before proceeding. 2065 class ReleaseForegroundGC: public StackObj { 2066 private: 2067 CMSCollector* _c; 2068 public: 2069 ReleaseForegroundGC(CMSCollector* c) : _c(c) { 2070 assert(_c->_foregroundGCShouldWait, "Else should not need to call"); 2071 MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag); 2072 // allow a potentially blocked foreground collector to proceed 2073 _c->_foregroundGCShouldWait = false; 2074 if (_c->_foregroundGCIsActive) { 2075 CGC_lock->notify(); 2076 } 2077 assert(!ConcurrentMarkSweepThread::cms_thread_has_cms_token(), 2078 "Possible deadlock"); 2079 } 2080 2081 ~ReleaseForegroundGC() { 2082 assert(!_c->_foregroundGCShouldWait, "Usage protocol violation?"); 2083 MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag); 2084 _c->_foregroundGCShouldWait = true; 2085 } 2086 }; 2087 2088 // There are separate collect_in_background and collect_in_foreground because of 2089 // the different locking requirements of the background collector and the 2090 // foreground collector. There was originally an attempt to share 2091 // one "collect" method between the background collector and the foreground 2092 // collector but the if-then-else required made it cleaner to have 2093 // separate methods. 2094 void CMSCollector::collect_in_background(bool clear_all_soft_refs) { 2095 assert(Thread::current()->is_ConcurrentGC_thread(), 2096 "A CMS asynchronous collection is only allowed on a CMS thread."); 2097 2098 GenCollectedHeap* gch = GenCollectedHeap::heap(); 2099 { 2100 bool safepoint_check = Mutex::_no_safepoint_check_flag; 2101 MutexLockerEx hl(Heap_lock, safepoint_check); 2102 MutexLockerEx x(CGC_lock, safepoint_check); 2103 if (_foregroundGCIsActive || !UseAsyncConcMarkSweepGC) { 2104 // The foreground collector is active or we're 2105 // not using asynchronous collections. Skip this 2106 // background collection. 2107 assert(!_foregroundGCShouldWait, "Should be clear"); 2108 return; 2109 } else { 2110 assert(_collectorState == Idling, "Should be idling before start."); 2111 _collectorState = InitialMarking; 2112 // Reset the expansion cause, now that we are about to begin 2113 // a new cycle. 2114 clear_expansion_cause(); 2115 } 2116 _unloaded_classes_last_cycle = cms_should_unload_classes(); // ... from last cycle 2117 // This controls class unloading in response to an explicit gc request. 2118 // If ExplicitGCInvokesConcurrentAndUnloadsClasses is set, then 2119 // we will unload classes even if CMSClassUnloadingEnabled is not set. 2120 // See CR 6541037 and related CRs. 2121 _unload_classes = _full_gc_requested // ... for this cycle 2122 && ExplicitGCInvokesConcurrentAndUnloadsClasses; 2123 _full_gc_requested = false; // acks all outstanding full gc requests 2124 // Signal that we are about to start a collection 2125 gch->increment_total_full_collections(); // ... starting a collection cycle 2126 _collection_count_start = gch->total_full_collections(); 2127 } 2128 2129 // Used for PrintGC 2130 size_t prev_used; 2131 if (PrintGC && Verbose) { 2132 prev_used = _cmsGen->used(); // XXXPERM 2133 } 2134 2135 // The change of the collection state is normally done at this level; 2136 // the exceptions are phases that are executed while the world is 2137 // stopped. For those phases the change of state is done while the 2138 // world is stopped. For baton passing purposes this allows the 2139 // background collector to finish the phase and change state atomically. 2140 // The foreground collector cannot wait on a phase that is done 2141 // while the world is stopped because the foreground collector already 2142 // has the world stopped and would deadlock. 2143 while (_collectorState != Idling) { 2144 if (TraceCMSState) { 2145 gclog_or_tty->print_cr("Thread " INTPTR_FORMAT " in CMS state %d", 2146 Thread::current(), _collectorState); 2147 } 2148 // The foreground collector 2149 // holds the Heap_lock throughout its collection. 2150 // holds the CMS token (but not the lock) 2151 // except while it is waiting for the background collector to yield. 2152 // 2153 // The foreground collector should be blocked (not for long) 2154 // if the background collector is about to start a phase 2155 // executed with world stopped. If the background 2156 // collector has already started such a phase, the 2157 // foreground collector is blocked waiting for the 2158 // Heap_lock. The stop-world phases (InitialMarking and FinalMarking) 2159 // are executed in the VM thread. 2160 // 2161 // The locking order is 2162 // PendingListLock (PLL) -- if applicable (FinalMarking) 2163 // Heap_lock (both this & PLL locked in VM_CMS_Operation::prologue()) 2164 // CMS token (claimed in 2165 // stop_world_and_do() --> 2166 // safepoint_synchronize() --> 2167 // CMSThread::synchronize()) 2168 2169 { 2170 // Check if the FG collector wants us to yield. 2171 CMSTokenSync x(true); // is cms thread 2172 if (waitForForegroundGC()) { 2173 // We yielded to a foreground GC, nothing more to be 2174 // done this round. 2175 assert(_foregroundGCShouldWait == false, "We set it to false in " 2176 "waitForForegroundGC()"); 2177 if (TraceCMSState) { 2178 gclog_or_tty->print_cr("CMS Thread " INTPTR_FORMAT 2179 " exiting collection CMS state %d", 2180 Thread::current(), _collectorState); 2181 } 2182 return; 2183 } else { 2184 // The background collector can run but check to see if the 2185 // foreground collector has done a collection while the 2186 // background collector was waiting to get the CGC_lock 2187 // above. If yes, break so that _foregroundGCShouldWait 2188 // is cleared before returning. 2189 if (_collectorState == Idling) { 2190 break; 2191 } 2192 } 2193 } 2194 2195 assert(_foregroundGCShouldWait, "Foreground collector, if active, " 2196 "should be waiting"); 2197 2198 switch (_collectorState) { 2199 case InitialMarking: 2200 { 2201 ReleaseForegroundGC x(this); 2202 stats().record_cms_begin(); 2203 2204 VM_CMS_Initial_Mark initial_mark_op(this); 2205 VMThread::execute(&initial_mark_op); 2206 } 2207 // The collector state may be any legal state at this point 2208 // since the background collector may have yielded to the 2209 // foreground collector. 2210 break; 2211 case Marking: 2212 // initial marking in checkpointRootsInitialWork has been completed 2213 if (markFromRoots(true)) { // we were successful 2214 assert(_collectorState == Precleaning, "Collector state should " 2215 "have changed"); 2216 } else { 2217 assert(_foregroundGCIsActive, "Internal state inconsistency"); 2218 } 2219 break; 2220 case Precleaning: 2221 if (UseAdaptiveSizePolicy) { 2222 size_policy()->concurrent_precleaning_begin(); 2223 } 2224 // marking from roots in markFromRoots has been completed 2225 preclean(); 2226 if (UseAdaptiveSizePolicy) { 2227 size_policy()->concurrent_precleaning_end(); 2228 } 2229 assert(_collectorState == AbortablePreclean || 2230 _collectorState == FinalMarking, 2231 "Collector state should have changed"); 2232 break; 2233 case AbortablePreclean: 2234 if (UseAdaptiveSizePolicy) { 2235 size_policy()->concurrent_phases_resume(); 2236 } 2237 abortable_preclean(); 2238 if (UseAdaptiveSizePolicy) { 2239 size_policy()->concurrent_precleaning_end(); 2240 } 2241 assert(_collectorState == FinalMarking, "Collector state should " 2242 "have changed"); 2243 break; 2244 case FinalMarking: 2245 { 2246 ReleaseForegroundGC x(this); 2247 2248 VM_CMS_Final_Remark final_remark_op(this); 2249 VMThread::execute(&final_remark_op); 2250 } 2251 assert(_foregroundGCShouldWait, "block post-condition"); 2252 break; 2253 case Sweeping: 2254 if (UseAdaptiveSizePolicy) { 2255 size_policy()->concurrent_sweeping_begin(); 2256 } 2257 // final marking in checkpointRootsFinal has been completed 2258 sweep(true); 2259 assert(_collectorState == Resizing, "Collector state change " 2260 "to Resizing must be done under the free_list_lock"); 2261 _full_gcs_since_conc_gc = 0; 2262 2263 // Stop the timers for adaptive size policy for the concurrent phases 2264 if (UseAdaptiveSizePolicy) { 2265 size_policy()->concurrent_sweeping_end(); 2266 size_policy()->concurrent_phases_end(gch->gc_cause(), 2267 gch->prev_gen(_cmsGen)->capacity(), 2268 _cmsGen->free()); 2269 } 2270 2271 case Resizing: { 2272 // Sweeping has been completed... 2273 // At this point the background collection has completed. 2274 // Don't move the call to compute_new_size() down 2275 // into code that might be executed if the background 2276 // collection was preempted. 2277 { 2278 ReleaseForegroundGC x(this); // unblock FG collection 2279 MutexLockerEx y(Heap_lock, Mutex::_no_safepoint_check_flag); 2280 CMSTokenSync z(true); // not strictly needed. 2281 if (_collectorState == Resizing) { 2282 compute_new_size(); 2283 _collectorState = Resetting; 2284 } else { 2285 assert(_collectorState == Idling, "The state should only change" 2286 " because the foreground collector has finished the collection"); 2287 } 2288 } 2289 break; 2290 } 2291 case Resetting: 2292 // CMS heap resizing has been completed 2293 reset(true); 2294 assert(_collectorState == Idling, "Collector state should " 2295 "have changed"); 2296 stats().record_cms_end(); 2297 // Don't move the concurrent_phases_end() and compute_new_size() 2298 // calls to here because a preempted background collection 2299 // has it's state set to "Resetting". 2300 break; 2301 case Idling: 2302 default: 2303 ShouldNotReachHere(); 2304 break; 2305 } 2306 if (TraceCMSState) { 2307 gclog_or_tty->print_cr(" Thread " INTPTR_FORMAT " done - next CMS state %d", 2308 Thread::current(), _collectorState); 2309 } 2310 assert(_foregroundGCShouldWait, "block post-condition"); 2311 } 2312 2313 // Should this be in gc_epilogue? 2314 collector_policy()->counters()->update_counters(); 2315 2316 { 2317 // Clear _foregroundGCShouldWait and, in the event that the 2318 // foreground collector is waiting, notify it, before 2319 // returning. 2320 MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag); 2321 _foregroundGCShouldWait = false; 2322 if (_foregroundGCIsActive) { 2323 CGC_lock->notify(); 2324 } 2325 assert(!ConcurrentMarkSweepThread::cms_thread_has_cms_token(), 2326 "Possible deadlock"); 2327 } 2328 if (TraceCMSState) { 2329 gclog_or_tty->print_cr("CMS Thread " INTPTR_FORMAT 2330 " exiting collection CMS state %d", 2331 Thread::current(), _collectorState); 2332 } 2333 if (PrintGC && Verbose) { 2334 _cmsGen->print_heap_change(prev_used); 2335 } 2336 } 2337 2338 void CMSCollector::collect_in_foreground(bool clear_all_soft_refs) { 2339 assert(_foregroundGCIsActive && !_foregroundGCShouldWait, 2340 "Foreground collector should be waiting, not executing"); 2341 assert(Thread::current()->is_VM_thread(), "A foreground collection" 2342 "may only be done by the VM Thread with the world stopped"); 2343 assert(ConcurrentMarkSweepThread::vm_thread_has_cms_token(), 2344 "VM thread should have CMS token"); 2345 2346 NOT_PRODUCT(TraceTime t("CMS:MS (foreground) ", PrintGCDetails && Verbose, 2347 true, gclog_or_tty);) 2348 if (UseAdaptiveSizePolicy) { 2349 size_policy()->ms_collection_begin(); 2350 } 2351 COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact); 2352 2353 HandleMark hm; // Discard invalid handles created during verification 2354 2355 if (VerifyBeforeGC && 2356 GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) { 2357 Universe::verify(true); 2358 } 2359 2360 bool init_mark_was_synchronous = false; // until proven otherwise 2361 while (_collectorState != Idling) { 2362 if (TraceCMSState) { 2363 gclog_or_tty->print_cr("Thread " INTPTR_FORMAT " in CMS state %d", 2364 Thread::current(), _collectorState); 2365 } 2366 switch (_collectorState) { 2367 case InitialMarking: 2368 init_mark_was_synchronous = true; // fact to be exploited in re-mark 2369 checkpointRootsInitial(false); 2370 assert(_collectorState == Marking, "Collector state should have changed" 2371 " within checkpointRootsInitial()"); 2372 break; 2373 case Marking: 2374 // initial marking in checkpointRootsInitialWork has been completed 2375 if (VerifyDuringGC && 2376 GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) { 2377 gclog_or_tty->print("Verify before initial mark: "); 2378 Universe::verify(true); 2379 } 2380 { 2381 bool res = markFromRoots(false); 2382 assert(res && _collectorState == FinalMarking, "Collector state should " 2383 "have changed"); 2384 break; 2385 } 2386 case FinalMarking: 2387 if (VerifyDuringGC && 2388 GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) { 2389 gclog_or_tty->print("Verify before re-mark: "); 2390 Universe::verify(true); 2391 } 2392 checkpointRootsFinal(false, clear_all_soft_refs, 2393 init_mark_was_synchronous); 2394 assert(_collectorState == Sweeping, "Collector state should not " 2395 "have changed within checkpointRootsFinal()"); 2396 break; 2397 case Sweeping: 2398 // final marking in checkpointRootsFinal has been completed 2399 if (VerifyDuringGC && 2400 GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) { 2401 gclog_or_tty->print("Verify before sweep: "); 2402 Universe::verify(true); 2403 } 2404 sweep(false); 2405 assert(_collectorState == Resizing, "Incorrect state"); 2406 break; 2407 case Resizing: { 2408 // Sweeping has been completed; the actual resize in this case 2409 // is done separately; nothing to be done in this state. 2410 _collectorState = Resetting; 2411 break; 2412 } 2413 case Resetting: 2414 // The heap has been resized. 2415 if (VerifyDuringGC && 2416 GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) { 2417 gclog_or_tty->print("Verify before reset: "); 2418 Universe::verify(true); 2419 } 2420 reset(false); 2421 assert(_collectorState == Idling, "Collector state should " 2422 "have changed"); 2423 break; 2424 case Precleaning: 2425 case AbortablePreclean: 2426 // Elide the preclean phase 2427 _collectorState = FinalMarking; 2428 break; 2429 default: 2430 ShouldNotReachHere(); 2431 } 2432 if (TraceCMSState) { 2433 gclog_or_tty->print_cr(" Thread " INTPTR_FORMAT " done - next CMS state %d", 2434 Thread::current(), _collectorState); 2435 } 2436 } 2437 2438 if (UseAdaptiveSizePolicy) { 2439 GenCollectedHeap* gch = GenCollectedHeap::heap(); 2440 size_policy()->ms_collection_end(gch->gc_cause()); 2441 } 2442 2443 if (VerifyAfterGC && 2444 GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) { 2445 Universe::verify(true); 2446 } 2447 if (TraceCMSState) { 2448 gclog_or_tty->print_cr("CMS Thread " INTPTR_FORMAT 2449 " exiting collection CMS state %d", 2450 Thread::current(), _collectorState); 2451 } 2452 } 2453 2454 bool CMSCollector::waitForForegroundGC() { 2455 bool res = false; 2456 assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(), 2457 "CMS thread should have CMS token"); 2458 // Block the foreground collector until the 2459 // background collectors decides whether to 2460 // yield. 2461 MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag); 2462 _foregroundGCShouldWait = true; 2463 if (_foregroundGCIsActive) { 2464 // The background collector yields to the 2465 // foreground collector and returns a value 2466 // indicating that it has yielded. The foreground 2467 // collector can proceed. 2468 res = true; 2469 _foregroundGCShouldWait = false; 2470 ConcurrentMarkSweepThread::clear_CMS_flag( 2471 ConcurrentMarkSweepThread::CMS_cms_has_token); 2472 ConcurrentMarkSweepThread::set_CMS_flag( 2473 ConcurrentMarkSweepThread::CMS_cms_wants_token); 2474 // Get a possibly blocked foreground thread going 2475 CGC_lock->notify(); 2476 if (TraceCMSState) { 2477 gclog_or_tty->print_cr("CMS Thread " INTPTR_FORMAT " waiting at CMS state %d", 2478 Thread::current(), _collectorState); 2479 } 2480 while (_foregroundGCIsActive) { 2481 CGC_lock->wait(Mutex::_no_safepoint_check_flag); 2482 } 2483 ConcurrentMarkSweepThread::set_CMS_flag( 2484 ConcurrentMarkSweepThread::CMS_cms_has_token); 2485 ConcurrentMarkSweepThread::clear_CMS_flag( 2486 ConcurrentMarkSweepThread::CMS_cms_wants_token); 2487 } 2488 if (TraceCMSState) { 2489 gclog_or_tty->print_cr("CMS Thread " INTPTR_FORMAT " continuing at CMS state %d", 2490 Thread::current(), _collectorState); 2491 } 2492 return res; 2493 } 2494 2495 // Because of the need to lock the free lists and other structures in 2496 // the collector, common to all the generations that the collector is 2497 // collecting, we need the gc_prologues of individual CMS generations 2498 // delegate to their collector. It may have been simpler had the 2499 // current infrastructure allowed one to call a prologue on a 2500 // collector. In the absence of that we have the generation's 2501 // prologue delegate to the collector, which delegates back 2502 // some "local" work to a worker method in the individual generations 2503 // that it's responsible for collecting, while itself doing any 2504 // work common to all generations it's responsible for. A similar 2505 // comment applies to the gc_epilogue()'s. 2506 // The role of the varaible _between_prologue_and_epilogue is to 2507 // enforce the invocation protocol. 2508 void CMSCollector::gc_prologue(bool full) { 2509 // Call gc_prologue_work() for each CMSGen and PermGen that 2510 // we are responsible for. 2511 2512 // The following locking discipline assumes that we are only called 2513 // when the world is stopped. 2514 assert(SafepointSynchronize::is_at_safepoint(), "world is stopped assumption"); 2515 2516 // The CMSCollector prologue must call the gc_prologues for the 2517 // "generations" (including PermGen if any) that it's responsible 2518 // for. 2519 2520 assert( Thread::current()->is_VM_thread() 2521 || ( CMSScavengeBeforeRemark 2522 && Thread::current()->is_ConcurrentGC_thread()), 2523 "Incorrect thread type for prologue execution"); 2524 2525 if (_between_prologue_and_epilogue) { 2526 // We have already been invoked; this is a gc_prologue delegation 2527 // from yet another CMS generation that we are responsible for, just 2528 // ignore it since all relevant work has already been done. 2529 return; 2530 } 2531 2532 // set a bit saying prologue has been called; cleared in epilogue 2533 _between_prologue_and_epilogue = true; 2534 // Claim locks for common data structures, then call gc_prologue_work() 2535 // for each CMSGen and PermGen that we are responsible for. 2536 2537 getFreelistLocks(); // gets free list locks on constituent spaces 2538 bitMapLock()->lock_without_safepoint_check(); 2539 2540 // Should call gc_prologue_work() for all cms gens we are responsible for 2541 bool registerClosure = _collectorState >= Marking 2542 && _collectorState < Sweeping; 2543 ModUnionClosure* muc = ParallelGCThreads > 0 ? &_modUnionClosurePar 2544 : &_modUnionClosure; 2545 _cmsGen->gc_prologue_work(full, registerClosure, muc); 2546 _permGen->gc_prologue_work(full, registerClosure, muc); 2547 2548 if (!full) { 2549 stats().record_gc0_begin(); 2550 } 2551 } 2552 2553 void ConcurrentMarkSweepGeneration::gc_prologue(bool full) { 2554 // Delegate to CMScollector which knows how to coordinate between 2555 // this and any other CMS generations that it is responsible for 2556 // collecting. 2557 collector()->gc_prologue(full); 2558 } 2559 2560 // This is a "private" interface for use by this generation's CMSCollector. 2561 // Not to be called directly by any other entity (for instance, 2562 // GenCollectedHeap, which calls the "public" gc_prologue method above). 2563 void ConcurrentMarkSweepGeneration::gc_prologue_work(bool full, 2564 bool registerClosure, ModUnionClosure* modUnionClosure) { 2565 assert(!incremental_collection_failed(), "Shouldn't be set yet"); 2566 assert(cmsSpace()->preconsumptionDirtyCardClosure() == NULL, 2567 "Should be NULL"); 2568 if (registerClosure) { 2569 cmsSpace()->setPreconsumptionDirtyCardClosure(modUnionClosure); 2570 } 2571 cmsSpace()->gc_prologue(); 2572 // Clear stat counters 2573 NOT_PRODUCT( 2574 assert(_numObjectsPromoted == 0, "check"); 2575 assert(_numWordsPromoted == 0, "check"); 2576 if (Verbose && PrintGC) { 2577 gclog_or_tty->print("Allocated "SIZE_FORMAT" objects, " 2578 SIZE_FORMAT" bytes concurrently", 2579 _numObjectsAllocated, _numWordsAllocated*sizeof(HeapWord)); 2580 } 2581 _numObjectsAllocated = 0; 2582 _numWordsAllocated = 0; 2583 ) 2584 } 2585 2586 void CMSCollector::gc_epilogue(bool full) { 2587 // The following locking discipline assumes that we are only called 2588 // when the world is stopped. 2589 assert(SafepointSynchronize::is_at_safepoint(), 2590 "world is stopped assumption"); 2591 2592 // Currently the CMS epilogue (see CompactibleFreeListSpace) merely checks 2593 // if linear allocation blocks need to be appropriately marked to allow the 2594 // the blocks to be parsable. We also check here whether we need to nudge the 2595 // CMS collector thread to start a new cycle (if it's not already active). 2596 assert( Thread::current()->is_VM_thread() 2597 || ( CMSScavengeBeforeRemark 2598 && Thread::current()->is_ConcurrentGC_thread()), 2599 "Incorrect thread type for epilogue execution"); 2600 2601 if (!_between_prologue_and_epilogue) { 2602 // We have already been invoked; this is a gc_epilogue delegation 2603 // from yet another CMS generation that we are responsible for, just 2604 // ignore it since all relevant work has already been done. 2605 return; 2606 } 2607 assert(haveFreelistLocks(), "must have freelist locks"); 2608 assert_lock_strong(bitMapLock()); 2609 2610 _cmsGen->gc_epilogue_work(full); 2611 _permGen->gc_epilogue_work(full); 2612 2613 if (_collectorState == AbortablePreclean || _collectorState == Precleaning) { 2614 // in case sampling was not already enabled, enable it 2615 _start_sampling = true; 2616 } 2617 // reset _eden_chunk_array so sampling starts afresh 2618 _eden_chunk_index = 0; 2619 2620 size_t cms_used = _cmsGen->cmsSpace()->used(); 2621 size_t perm_used = _permGen->cmsSpace()->used(); 2622 2623 // update performance counters - this uses a special version of 2624 // update_counters() that allows the utilization to be passed as a 2625 // parameter, avoiding multiple calls to used(). 2626 // 2627 _cmsGen->update_counters(cms_used); 2628 _permGen->update_counters(perm_used); 2629 2630 if (CMSIncrementalMode) { 2631 icms_update_allocation_limits(); 2632 } 2633 2634 bitMapLock()->unlock(); 2635 releaseFreelistLocks(); 2636 2637 _between_prologue_and_epilogue = false; // ready for next cycle 2638 } 2639 2640 void ConcurrentMarkSweepGeneration::gc_epilogue(bool full) { 2641 collector()->gc_epilogue(full); 2642 2643 // Also reset promotion tracking in par gc thread states. 2644 if (ParallelGCThreads > 0) { 2645 for (uint i = 0; i < ParallelGCThreads; i++) { 2646 _par_gc_thread_states[i]->promo.stopTrackingPromotions(); 2647 } 2648 } 2649 } 2650 2651 void ConcurrentMarkSweepGeneration::gc_epilogue_work(bool full) { 2652 assert(!incremental_collection_failed(), "Should have been cleared"); 2653 cmsSpace()->setPreconsumptionDirtyCardClosure(NULL); 2654 cmsSpace()->gc_epilogue(); 2655 // Print stat counters 2656 NOT_PRODUCT( 2657 assert(_numObjectsAllocated == 0, "check"); 2658 assert(_numWordsAllocated == 0, "check"); 2659 if (Verbose && PrintGC) { 2660 gclog_or_tty->print("Promoted "SIZE_FORMAT" objects, " 2661 SIZE_FORMAT" bytes", 2662 _numObjectsPromoted, _numWordsPromoted*sizeof(HeapWord)); 2663 } 2664 _numObjectsPromoted = 0; 2665 _numWordsPromoted = 0; 2666 ) 2667 2668 if (PrintGC && Verbose) { 2669 // Call down the chain in contiguous_available needs the freelistLock 2670 // so print this out before releasing the freeListLock. 2671 gclog_or_tty->print(" Contiguous available "SIZE_FORMAT" bytes ", 2672 contiguous_available()); 2673 } 2674 } 2675 2676 #ifndef PRODUCT 2677 bool CMSCollector::have_cms_token() { 2678 Thread* thr = Thread::current(); 2679 if (thr->is_VM_thread()) { 2680 return ConcurrentMarkSweepThread::vm_thread_has_cms_token(); 2681 } else if (thr->is_ConcurrentGC_thread()) { 2682 return ConcurrentMarkSweepThread::cms_thread_has_cms_token(); 2683 } else if (thr->is_GC_task_thread()) { 2684 return ConcurrentMarkSweepThread::vm_thread_has_cms_token() && 2685 ParGCRareEvent_lock->owned_by_self(); 2686 } 2687 return false; 2688 } 2689 #endif 2690 2691 // Check reachability of the given heap address in CMS generation, 2692 // treating all other generations as roots. 2693 bool CMSCollector::is_cms_reachable(HeapWord* addr) { 2694 // We could "guarantee" below, rather than assert, but i'll 2695 // leave these as "asserts" so that an adventurous debugger 2696 // could try this in the product build provided some subset of 2697 // the conditions were met, provided they were intersted in the 2698 // results and knew that the computation below wouldn't interfere 2699 // with other concurrent computations mutating the structures 2700 // being read or written. 2701 assert(SafepointSynchronize::is_at_safepoint(), 2702 "Else mutations in object graph will make answer suspect"); 2703 assert(have_cms_token(), "Should hold cms token"); 2704 assert(haveFreelistLocks(), "must hold free list locks"); 2705 assert_lock_strong(bitMapLock()); 2706 2707 // Clear the marking bit map array before starting, but, just 2708 // for kicks, first report if the given address is already marked 2709 gclog_or_tty->print_cr("Start: Address 0x%x is%s marked", addr, 2710 _markBitMap.isMarked(addr) ? "" : " not"); 2711 2712 if (verify_after_remark()) { 2713 MutexLockerEx x(verification_mark_bm()->lock(), Mutex::_no_safepoint_check_flag); 2714 bool result = verification_mark_bm()->isMarked(addr); 2715 gclog_or_tty->print_cr("TransitiveMark: Address 0x%x %s marked", addr, 2716 result ? "IS" : "is NOT"); 2717 return result; 2718 } else { 2719 gclog_or_tty->print_cr("Could not compute result"); 2720 return false; 2721 } 2722 } 2723 2724 //////////////////////////////////////////////////////// 2725 // CMS Verification Support 2726 //////////////////////////////////////////////////////// 2727 // Following the remark phase, the following invariant 2728 // should hold -- each object in the CMS heap which is 2729 // marked in markBitMap() should be marked in the verification_mark_bm(). 2730 2731 class VerifyMarkedClosure: public BitMapClosure { 2732 CMSBitMap* _marks; 2733 bool _failed; 2734 2735 public: 2736 VerifyMarkedClosure(CMSBitMap* bm): _marks(bm), _failed(false) {} 2737 2738 void do_bit(size_t offset) { 2739 HeapWord* addr = _marks->offsetToHeapWord(offset); 2740 if (!_marks->isMarked(addr)) { 2741 oop(addr)->print(); 2742 gclog_or_tty->print_cr(" ("INTPTR_FORMAT" should have been marked)", addr); 2743 _failed = true; 2744 } 2745 } 2746 2747 bool failed() { return _failed; } 2748 }; 2749 2750 bool CMSCollector::verify_after_remark() { 2751 gclog_or_tty->print(" [Verifying CMS Marking... "); 2752 MutexLockerEx ml(verification_mark_bm()->lock(), Mutex::_no_safepoint_check_flag); 2753 static bool init = false; 2754 2755 assert(SafepointSynchronize::is_at_safepoint(), 2756 "Else mutations in object graph will make answer suspect"); 2757 assert(have_cms_token(), 2758 "Else there may be mutual interference in use of " 2759 " verification data structures"); 2760 assert(_collectorState > Marking && _collectorState <= Sweeping, 2761 "Else marking info checked here may be obsolete"); 2762 assert(haveFreelistLocks(), "must hold free list locks"); 2763 assert_lock_strong(bitMapLock()); 2764 2765 2766 // Allocate marking bit map if not already allocated 2767 if (!init) { // first time 2768 if (!verification_mark_bm()->allocate(_span)) { 2769 return false; 2770 } 2771 init = true; 2772 } 2773 2774 assert(verification_mark_stack()->isEmpty(), "Should be empty"); 2775 2776 // Turn off refs discovery -- so we will be tracing through refs. 2777 // This is as intended, because by this time 2778 // GC must already have cleared any refs that need to be cleared, 2779 // and traced those that need to be marked; moreover, 2780 // the marking done here is not going to intefere in any 2781 // way with the marking information used by GC. 2782 NoRefDiscovery no_discovery(ref_processor()); 2783 2784 COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact;) 2785 2786 // Clear any marks from a previous round 2787 verification_mark_bm()->clear_all(); 2788 assert(verification_mark_stack()->isEmpty(), "markStack should be empty"); 2789 assert(overflow_list_is_empty(), "overflow list should be empty"); 2790 2791 GenCollectedHeap* gch = GenCollectedHeap::heap(); 2792 gch->ensure_parsability(false); // fill TLABs, but no need to retire them 2793 // Update the saved marks which may affect the root scans. 2794 gch->save_marks(); 2795 2796 if (CMSRemarkVerifyVariant == 1) { 2797 // In this first variant of verification, we complete 2798 // all marking, then check if the new marks-verctor is 2799 // a subset of the CMS marks-vector. 2800 verify_after_remark_work_1(); 2801 } else if (CMSRemarkVerifyVariant == 2) { 2802 // In this second variant of verification, we flag an error 2803 // (i.e. an object reachable in the new marks-vector not reachable 2804 // in the CMS marks-vector) immediately, also indicating the 2805 // identify of an object (A) that references the unmarked object (B) -- 2806 // presumably, a mutation to A failed to be picked up by preclean/remark? 2807 verify_after_remark_work_2(); 2808 } else { 2809 warning("Unrecognized value %d for CMSRemarkVerifyVariant", 2810 CMSRemarkVerifyVariant); 2811 } 2812 gclog_or_tty->print(" done] "); 2813 return true; 2814 } 2815 2816 void CMSCollector::verify_after_remark_work_1() { 2817 ResourceMark rm; 2818 HandleMark hm; 2819 GenCollectedHeap* gch = GenCollectedHeap::heap(); 2820 2821 // Mark from roots one level into CMS 2822 MarkRefsIntoClosure notOlder(_span, verification_mark_bm(), true /* nmethods */); 2823 gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel. 2824 2825 gch->gen_process_strong_roots(_cmsGen->level(), 2826 true, // younger gens are roots 2827 true, // collecting perm gen 2828 SharedHeap::ScanningOption(roots_scanning_options()), 2829 NULL, ¬Older); 2830 2831 // Now mark from the roots 2832 assert(_revisitStack.isEmpty(), "Should be empty"); 2833 MarkFromRootsClosure markFromRootsClosure(this, _span, 2834 verification_mark_bm(), verification_mark_stack(), &_revisitStack, 2835 false /* don't yield */, true /* verifying */); 2836 assert(_restart_addr == NULL, "Expected pre-condition"); 2837 verification_mark_bm()->iterate(&markFromRootsClosure); 2838 while (_restart_addr != NULL) { 2839 // Deal with stack overflow: by restarting at the indicated 2840 // address. 2841 HeapWord* ra = _restart_addr; 2842 markFromRootsClosure.reset(ra); 2843 _restart_addr = NULL; 2844 verification_mark_bm()->iterate(&markFromRootsClosure, ra, _span.end()); 2845 } 2846 assert(verification_mark_stack()->isEmpty(), "Should have been drained"); 2847 verify_work_stacks_empty(); 2848 // Should reset the revisit stack above, since no class tree 2849 // surgery is forthcoming. 2850 _revisitStack.reset(); // throwing away all contents 2851 2852 // Marking completed -- now verify that each bit marked in 2853 // verification_mark_bm() is also marked in markBitMap(); flag all 2854 // errors by printing corresponding objects. 2855 VerifyMarkedClosure vcl(markBitMap()); 2856 verification_mark_bm()->iterate(&vcl); 2857 if (vcl.failed()) { 2858 gclog_or_tty->print("Verification failed"); 2859 Universe::heap()->print(); 2860 fatal(" ... aborting"); 2861 } 2862 } 2863 2864 void CMSCollector::verify_after_remark_work_2() { 2865 ResourceMark rm; 2866 HandleMark hm; 2867 GenCollectedHeap* gch = GenCollectedHeap::heap(); 2868 2869 // Mark from roots one level into CMS 2870 MarkRefsIntoVerifyClosure notOlder(_span, verification_mark_bm(), 2871 markBitMap(), true /* nmethods */); 2872 gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel. 2873 gch->gen_process_strong_roots(_cmsGen->level(), 2874 true, // younger gens are roots 2875 true, // collecting perm gen 2876 SharedHeap::ScanningOption(roots_scanning_options()), 2877 NULL, ¬Older); 2878 2879 // Now mark from the roots 2880 assert(_revisitStack.isEmpty(), "Should be empty"); 2881 MarkFromRootsVerifyClosure markFromRootsClosure(this, _span, 2882 verification_mark_bm(), markBitMap(), verification_mark_stack()); 2883 assert(_restart_addr == NULL, "Expected pre-condition"); 2884 verification_mark_bm()->iterate(&markFromRootsClosure); 2885 while (_restart_addr != NULL) { 2886 // Deal with stack overflow: by restarting at the indicated 2887 // address. 2888 HeapWord* ra = _restart_addr; 2889 markFromRootsClosure.reset(ra); 2890 _restart_addr = NULL; 2891 verification_mark_bm()->iterate(&markFromRootsClosure, ra, _span.end()); 2892 } 2893 assert(verification_mark_stack()->isEmpty(), "Should have been drained"); 2894 verify_work_stacks_empty(); 2895 // Should reset the revisit stack above, since no class tree 2896 // surgery is forthcoming. 2897 _revisitStack.reset(); // throwing away all contents 2898 2899 // Marking completed -- now verify that each bit marked in 2900 // verification_mark_bm() is also marked in markBitMap(); flag all 2901 // errors by printing corresponding objects. 2902 VerifyMarkedClosure vcl(markBitMap()); 2903 verification_mark_bm()->iterate(&vcl); 2904 assert(!vcl.failed(), "Else verification above should not have succeeded"); 2905 } 2906 2907 void ConcurrentMarkSweepGeneration::save_marks() { 2908 // delegate to CMS space 2909 cmsSpace()->save_marks(); 2910 for (uint i = 0; i < ParallelGCThreads; i++) { 2911 _par_gc_thread_states[i]->promo.startTrackingPromotions(); 2912 } 2913 } 2914 2915 bool ConcurrentMarkSweepGeneration::no_allocs_since_save_marks() { 2916 return cmsSpace()->no_allocs_since_save_marks(); 2917 } 2918 2919 #define CMS_SINCE_SAVE_MARKS_DEFN(OopClosureType, nv_suffix) \ 2920 \ 2921 void ConcurrentMarkSweepGeneration:: \ 2922 oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl) { \ 2923 cl->set_generation(this); \ 2924 cmsSpace()->oop_since_save_marks_iterate##nv_suffix(cl); \ 2925 cl->reset_generation(); \ 2926 save_marks(); \ 2927 } 2928 2929 ALL_SINCE_SAVE_MARKS_CLOSURES(CMS_SINCE_SAVE_MARKS_DEFN) 2930 2931 void 2932 ConcurrentMarkSweepGeneration::object_iterate_since_last_GC(ObjectClosure* blk) 2933 { 2934 // Not currently implemented; need to do the following. -- ysr. 2935 // dld -- I think that is used for some sort of allocation profiler. So it 2936 // really means the objects allocated by the mutator since the last 2937 // GC. We could potentially implement this cheaply by recording only 2938 // the direct allocations in a side data structure. 2939 // 2940 // I think we probably ought not to be required to support these 2941 // iterations at any arbitrary point; I think there ought to be some 2942 // call to enable/disable allocation profiling in a generation/space, 2943 // and the iterator ought to return the objects allocated in the 2944 // gen/space since the enable call, or the last iterator call (which 2945 // will probably be at a GC.) That way, for gens like CM&S that would 2946 // require some extra data structure to support this, we only pay the 2947 // cost when it's in use... 2948 cmsSpace()->object_iterate_since_last_GC(blk); 2949 } 2950 2951 void 2952 ConcurrentMarkSweepGeneration::younger_refs_iterate(OopsInGenClosure* cl) { 2953 cl->set_generation(this); 2954 younger_refs_in_space_iterate(_cmsSpace, cl); 2955 cl->reset_generation(); 2956 } 2957 2958 void 2959 ConcurrentMarkSweepGeneration::oop_iterate(MemRegion mr, OopClosure* cl) { 2960 if (freelistLock()->owned_by_self()) { 2961 Generation::oop_iterate(mr, cl); 2962 } else { 2963 MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag); 2964 Generation::oop_iterate(mr, cl); 2965 } 2966 } 2967 2968 void 2969 ConcurrentMarkSweepGeneration::oop_iterate(OopClosure* cl) { 2970 if (freelistLock()->owned_by_self()) { 2971 Generation::oop_iterate(cl); 2972 } else { 2973 MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag); 2974 Generation::oop_iterate(cl); 2975 } 2976 } 2977 2978 void 2979 ConcurrentMarkSweepGeneration::object_iterate(ObjectClosure* cl) { 2980 if (freelistLock()->owned_by_self()) { 2981 Generation::object_iterate(cl); 2982 } else { 2983 MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag); 2984 Generation::object_iterate(cl); 2985 } 2986 } 2987 2988 void 2989 ConcurrentMarkSweepGeneration::pre_adjust_pointers() { 2990 } 2991 2992 void 2993 ConcurrentMarkSweepGeneration::post_compact() { 2994 } 2995 2996 void 2997 ConcurrentMarkSweepGeneration::prepare_for_verify() { 2998 // Fix the linear allocation blocks to look like free blocks. 2999 3000 // Locks are normally acquired/released in gc_prologue/gc_epilogue, but those 3001 // are not called when the heap is verified during universe initialization and 3002 // at vm shutdown. 3003 if (freelistLock()->owned_by_self()) { 3004 cmsSpace()->prepare_for_verify(); 3005 } else { 3006 MutexLockerEx fll(freelistLock(), Mutex::_no_safepoint_check_flag); 3007 cmsSpace()->prepare_for_verify(); 3008 } 3009 } 3010 3011 void 3012 ConcurrentMarkSweepGeneration::verify(bool allow_dirty /* ignored */) { 3013 // Locks are normally acquired/released in gc_prologue/gc_epilogue, but those 3014 // are not called when the heap is verified during universe initialization and 3015 // at vm shutdown. 3016 if (freelistLock()->owned_by_self()) { 3017 cmsSpace()->verify(false /* ignored */); 3018 } else { 3019 MutexLockerEx fll(freelistLock(), Mutex::_no_safepoint_check_flag); 3020 cmsSpace()->verify(false /* ignored */); 3021 } 3022 } 3023 3024 void CMSCollector::verify(bool allow_dirty /* ignored */) { 3025 _cmsGen->verify(allow_dirty); 3026 _permGen->verify(allow_dirty); 3027 } 3028 3029 #ifndef PRODUCT 3030 bool CMSCollector::overflow_list_is_empty() const { 3031 assert(_num_par_pushes >= 0, "Inconsistency"); 3032 if (_overflow_list == NULL) { 3033 assert(_num_par_pushes == 0, "Inconsistency"); 3034 } 3035 return _overflow_list == NULL; 3036 } 3037 3038 // The methods verify_work_stacks_empty() and verify_overflow_empty() 3039 // merely consolidate assertion checks that appear to occur together frequently. 3040 void CMSCollector::verify_work_stacks_empty() const { 3041 assert(_markStack.isEmpty(), "Marking stack should be empty"); 3042 assert(overflow_list_is_empty(), "Overflow list should be empty"); 3043 } 3044 3045 void CMSCollector::verify_overflow_empty() const { 3046 assert(overflow_list_is_empty(), "Overflow list should be empty"); 3047 assert(no_preserved_marks(), "No preserved marks"); 3048 } 3049 #endif // PRODUCT 3050 3051 void CMSCollector::setup_cms_unloading_and_verification_state() { 3052 const bool should_verify = VerifyBeforeGC || VerifyAfterGC || VerifyDuringGC 3053 || VerifyBeforeExit; 3054 const int rso = SharedHeap::SO_Symbols | SharedHeap::SO_Strings 3055 | SharedHeap::SO_CodeCache; 3056 3057 if (cms_should_unload_classes()) { // Should unload classes this cycle 3058 remove_root_scanning_option(rso); // Shrink the root set appropriately 3059 set_verifying(should_verify); // Set verification state for this cycle 3060 return; // Nothing else needs to be done at this time 3061 } 3062 3063 // Not unloading classes this cycle 3064 assert(!cms_should_unload_classes(), "Inconsitency!"); 3065 if ((!verifying() || cms_unloaded_classes_last_cycle()) && should_verify) { 3066 // We were not verifying, or we _were_ unloading classes in the last cycle, 3067 // AND some verification options are enabled this cycle; in this case, 3068 // we must make sure that the deadness map is allocated if not already so, 3069 // and cleared (if already allocated previously -- 3070 // CMSBitMap::sizeInBits() is used to determine if it's allocated). 3071 if (perm_gen_verify_bit_map()->sizeInBits() == 0) { 3072 if (!perm_gen_verify_bit_map()->allocate(_permGen->reserved())) { 3073 warning("Failed to allocate permanent generation verification CMS Bit Map;\n" 3074 "permanent generation verification disabled"); 3075 return; // Note that we leave verification disabled, so we'll retry this 3076 // allocation next cycle. We _could_ remember this failure 3077 // and skip further attempts and permanently disable verification 3078 // attempts if that is considered more desirable. 3079 } 3080 assert(perm_gen_verify_bit_map()->covers(_permGen->reserved()), 3081 "_perm_gen_ver_bit_map inconsistency?"); 3082 } else { 3083 perm_gen_verify_bit_map()->clear_all(); 3084 } 3085 // Include symbols, strings and code cache elements to prevent their resurrection. 3086 add_root_scanning_option(rso); 3087 set_verifying(true); 3088 } else if (verifying() && !should_verify) { 3089 // We were verifying, but some verification flags got disabled. 3090 set_verifying(false); 3091 // Exclude symbols, strings and code cache elements from root scanning to 3092 // reduce IM and RM pauses. 3093 remove_root_scanning_option(rso); 3094 } 3095 } 3096 3097 3098 #ifndef PRODUCT 3099 HeapWord* CMSCollector::block_start(const void* p) const { 3100 const HeapWord* addr = (HeapWord*)p; 3101 if (_span.contains(p)) { 3102 if (_cmsGen->cmsSpace()->is_in_reserved(addr)) { 3103 return _cmsGen->cmsSpace()->block_start(p); 3104 } else { 3105 assert(_permGen->cmsSpace()->is_in_reserved(addr), 3106 "Inconsistent _span?"); 3107 return _permGen->cmsSpace()->block_start(p); 3108 } 3109 } 3110 return NULL; 3111 } 3112 #endif 3113 3114 HeapWord* 3115 ConcurrentMarkSweepGeneration::expand_and_allocate(size_t word_size, 3116 bool tlab, 3117 bool parallel) { 3118 assert(!tlab, "Can't deal with TLAB allocation"); 3119 MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag); 3120 expand(word_size*HeapWordSize, MinHeapDeltaBytes, 3121 CMSExpansionCause::_satisfy_allocation); 3122 if (GCExpandToAllocateDelayMillis > 0) { 3123 os::sleep(Thread::current(), GCExpandToAllocateDelayMillis, false); 3124 } 3125 return have_lock_and_allocate(word_size, tlab); 3126 } 3127 3128 // YSR: All of this generation expansion/shrinking stuff is an exact copy of 3129 // OneContigSpaceCardGeneration, which makes me wonder if we should move this 3130 // to CardGeneration and share it... 3131 void ConcurrentMarkSweepGeneration::expand(size_t bytes, size_t expand_bytes, 3132 CMSExpansionCause::Cause cause) 3133 { 3134 assert_locked_or_safepoint(Heap_lock); 3135 3136 size_t aligned_bytes = ReservedSpace::page_align_size_up(bytes); 3137 size_t aligned_expand_bytes = ReservedSpace::page_align_size_up(expand_bytes); 3138 bool success = false; 3139 if (aligned_expand_bytes > aligned_bytes) { 3140 success = grow_by(aligned_expand_bytes); 3141 } 3142 if (!success) { 3143 success = grow_by(aligned_bytes); 3144 } 3145 if (!success) { 3146 size_t remaining_bytes = _virtual_space.uncommitted_size(); 3147 if (remaining_bytes > 0) { 3148 success = grow_by(remaining_bytes); 3149 } 3150 } 3151 if (GC_locker::is_active()) { 3152 if (PrintGC && Verbose) { 3153 gclog_or_tty->print_cr("Garbage collection disabled, expanded heap instead"); 3154 } 3155 } 3156 // remember why we expanded; this information is used 3157 // by shouldConcurrentCollect() when making decisions on whether to start 3158 // a new CMS cycle. 3159 if (success) { 3160 set_expansion_cause(cause); 3161 if (PrintGCDetails && Verbose) { 3162 gclog_or_tty->print_cr("Expanded CMS gen for %s", 3163 CMSExpansionCause::to_string(cause)); 3164 } 3165 } 3166 } 3167 3168 HeapWord* ConcurrentMarkSweepGeneration::expand_and_par_lab_allocate(CMSParGCThreadState* ps, size_t word_sz) { 3169 HeapWord* res = NULL; 3170 MutexLocker x(ParGCRareEvent_lock); 3171 while (true) { 3172 // Expansion by some other thread might make alloc OK now: 3173 res = ps->lab.alloc(word_sz); 3174 if (res != NULL) return res; 3175 // If there's not enough expansion space available, give up. 3176 if (_virtual_space.uncommitted_size() < (word_sz * HeapWordSize)) { 3177 return NULL; 3178 } 3179 // Otherwise, we try expansion. 3180 expand(word_sz*HeapWordSize, MinHeapDeltaBytes, 3181 CMSExpansionCause::_allocate_par_lab); 3182 // Now go around the loop and try alloc again; 3183 // A competing par_promote might beat us to the expansion space, 3184 // so we may go around the loop again if promotion fails agaion. 3185 if (GCExpandToAllocateDelayMillis > 0) { 3186 os::sleep(Thread::current(), GCExpandToAllocateDelayMillis, false); 3187 } 3188 } 3189 } 3190 3191 3192 bool ConcurrentMarkSweepGeneration::expand_and_ensure_spooling_space( 3193 PromotionInfo* promo) { 3194 MutexLocker x(ParGCRareEvent_lock); 3195 size_t refill_size_bytes = promo->refillSize() * HeapWordSize; 3196 while (true) { 3197 // Expansion by some other thread might make alloc OK now: 3198 if (promo->ensure_spooling_space()) { 3199 assert(promo->has_spooling_space(), 3200 "Post-condition of successful ensure_spooling_space()"); 3201 return true; 3202 } 3203 // If there's not enough expansion space available, give up. 3204 if (_virtual_space.uncommitted_size() < refill_size_bytes) { 3205 return false; 3206 } 3207 // Otherwise, we try expansion. 3208 expand(refill_size_bytes, MinHeapDeltaBytes, 3209 CMSExpansionCause::_allocate_par_spooling_space); 3210 // Now go around the loop and try alloc again; 3211 // A competing allocation might beat us to the expansion space, 3212 // so we may go around the loop again if allocation fails again. 3213 if (GCExpandToAllocateDelayMillis > 0) { 3214 os::sleep(Thread::current(), GCExpandToAllocateDelayMillis, false); 3215 } 3216 } 3217 } 3218 3219 3220 3221 void ConcurrentMarkSweepGeneration::shrink(size_t bytes) { 3222 assert_locked_or_safepoint(Heap_lock); 3223 size_t size = ReservedSpace::page_align_size_down(bytes); 3224 if (size > 0) { 3225 shrink_by(size); 3226 } 3227 } 3228 3229 bool ConcurrentMarkSweepGeneration::grow_by(size_t bytes) { 3230 assert_locked_or_safepoint(Heap_lock); 3231 bool result = _virtual_space.expand_by(bytes); 3232 if (result) { 3233 HeapWord* old_end = _cmsSpace->end(); 3234 size_t new_word_size = 3235 heap_word_size(_virtual_space.committed_size()); 3236 MemRegion mr(_cmsSpace->bottom(), new_word_size); 3237 _bts->resize(new_word_size); // resize the block offset shared array 3238 Universe::heap()->barrier_set()->resize_covered_region(mr); 3239 // Hmmmm... why doesn't CFLS::set_end verify locking? 3240 // This is quite ugly; FIX ME XXX 3241 _cmsSpace->assert_locked(); 3242 _cmsSpace->set_end((HeapWord*)_virtual_space.high()); 3243 3244 // update the space and generation capacity counters 3245 if (UsePerfData) { 3246 _space_counters->update_capacity(); 3247 _gen_counters->update_all(); 3248 } 3249 3250 if (Verbose && PrintGC) { 3251 size_t new_mem_size = _virtual_space.committed_size(); 3252 size_t old_mem_size = new_mem_size - bytes; 3253 gclog_or_tty->print_cr("Expanding %s from %ldK by %ldK to %ldK", 3254 name(), old_mem_size/K, bytes/K, new_mem_size/K); 3255 } 3256 } 3257 return result; 3258 } 3259 3260 bool ConcurrentMarkSweepGeneration::grow_to_reserved() { 3261 assert_locked_or_safepoint(Heap_lock); 3262 bool success = true; 3263 const size_t remaining_bytes = _virtual_space.uncommitted_size(); 3264 if (remaining_bytes > 0) { 3265 success = grow_by(remaining_bytes); 3266 DEBUG_ONLY(if (!success) warning("grow to reserved failed");) 3267 } 3268 return success; 3269 } 3270 3271 void ConcurrentMarkSweepGeneration::shrink_by(size_t bytes) { 3272 assert_locked_or_safepoint(Heap_lock); 3273 assert_lock_strong(freelistLock()); 3274 // XXX Fix when compaction is implemented. 3275 warning("Shrinking of CMS not yet implemented"); 3276 return; 3277 } 3278 3279 3280 // Simple ctor/dtor wrapper for accounting & timer chores around concurrent 3281 // phases. 3282 class CMSPhaseAccounting: public StackObj { 3283 public: 3284 CMSPhaseAccounting(CMSCollector *collector, 3285 const char *phase, 3286 bool print_cr = true); 3287 ~CMSPhaseAccounting(); 3288 3289 private: 3290 CMSCollector *_collector; 3291 const char *_phase; 3292 elapsedTimer _wallclock; 3293 bool _print_cr; 3294 3295 public: 3296 // Not MT-safe; so do not pass around these StackObj's 3297 // where they may be accessed by other threads. 3298 jlong wallclock_millis() { 3299 assert(_wallclock.is_active(), "Wall clock should not stop"); 3300 _wallclock.stop(); // to record time 3301 jlong ret = _wallclock.milliseconds(); 3302 _wallclock.start(); // restart 3303 return ret; 3304 } 3305 }; 3306 3307 CMSPhaseAccounting::CMSPhaseAccounting(CMSCollector *collector, 3308 const char *phase, 3309 bool print_cr) : 3310 _collector(collector), _phase(phase), _print_cr(print_cr) { 3311 3312 if (PrintCMSStatistics != 0) { 3313 _collector->resetYields(); 3314 } 3315 if (PrintGCDetails && PrintGCTimeStamps) { 3316 gclog_or_tty->date_stamp(PrintGCDateStamps); 3317 gclog_or_tty->stamp(); 3318 gclog_or_tty->print_cr(": [%s-concurrent-%s-start]", 3319 _collector->cmsGen()->short_name(), _phase); 3320 } 3321 _collector->resetTimer(); 3322 _wallclock.start(); 3323 _collector->startTimer(); 3324 } 3325 3326 CMSPhaseAccounting::~CMSPhaseAccounting() { 3327 assert(_wallclock.is_active(), "Wall clock should not have stopped"); 3328 _collector->stopTimer(); 3329 _wallclock.stop(); 3330 if (PrintGCDetails) { 3331 gclog_or_tty->date_stamp(PrintGCDateStamps); 3332 if (PrintGCTimeStamps) { 3333 gclog_or_tty->stamp(); 3334 gclog_or_tty->print(": "); 3335 } 3336 gclog_or_tty->print("[%s-concurrent-%s: %3.3f/%3.3f secs]", 3337 _collector->cmsGen()->short_name(), 3338 _phase, _collector->timerValue(), _wallclock.seconds()); 3339 if (_print_cr) { 3340 gclog_or_tty->print_cr(""); 3341 } 3342 if (PrintCMSStatistics != 0) { 3343 gclog_or_tty->print_cr(" (CMS-concurrent-%s yielded %d times)", _phase, 3344 _collector->yields()); 3345 } 3346 } 3347 } 3348 3349 // CMS work 3350 3351 // Checkpoint the roots into this generation from outside 3352 // this generation. [Note this initial checkpoint need only 3353 // be approximate -- we'll do a catch up phase subsequently.] 3354 void CMSCollector::checkpointRootsInitial(bool asynch) { 3355 assert(_collectorState == InitialMarking, "Wrong collector state"); 3356 check_correct_thread_executing(); 3357 ReferenceProcessor* rp = ref_processor(); 3358 SpecializationStats::clear(); 3359 assert(_restart_addr == NULL, "Control point invariant"); 3360 if (asynch) { 3361 // acquire locks for subsequent manipulations 3362 MutexLockerEx x(bitMapLock(), 3363 Mutex::_no_safepoint_check_flag); 3364 checkpointRootsInitialWork(asynch); 3365 rp->verify_no_references_recorded(); 3366 rp->enable_discovery(); // enable ("weak") refs discovery 3367 _collectorState = Marking; 3368 } else { 3369 // (Weak) Refs discovery: this is controlled from genCollectedHeap::do_collection 3370 // which recognizes if we are a CMS generation, and doesn't try to turn on 3371 // discovery; verify that they aren't meddling. 3372 assert(!rp->discovery_is_atomic(), 3373 "incorrect setting of discovery predicate"); 3374 assert(!rp->discovery_enabled(), "genCollectedHeap shouldn't control " 3375 "ref discovery for this generation kind"); 3376 // already have locks 3377 checkpointRootsInitialWork(asynch); 3378 rp->enable_discovery(); // now enable ("weak") refs discovery 3379 _collectorState = Marking; 3380 } 3381 SpecializationStats::print(); 3382 } 3383 3384 void CMSCollector::checkpointRootsInitialWork(bool asynch) { 3385 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped"); 3386 assert(_collectorState == InitialMarking, "just checking"); 3387 3388 // If there has not been a GC[n-1] since last GC[n] cycle completed, 3389 // precede our marking with a collection of all 3390 // younger generations to keep floating garbage to a minimum. 3391 // XXX: we won't do this for now -- it's an optimization to be done later. 3392 3393 // already have locks 3394 assert_lock_strong(bitMapLock()); 3395 assert(_markBitMap.isAllClear(), "was reset at end of previous cycle"); 3396 3397 // Setup the verification and class unloading state for this 3398 // CMS collection cycle. 3399 setup_cms_unloading_and_verification_state(); 3400 3401 NOT_PRODUCT(TraceTime t("\ncheckpointRootsInitialWork", 3402 PrintGCDetails && Verbose, true, gclog_or_tty);) 3403 if (UseAdaptiveSizePolicy) { 3404 size_policy()->checkpoint_roots_initial_begin(); 3405 } 3406 3407 // Reset all the PLAB chunk arrays if necessary. 3408 if (_survivor_plab_array != NULL && !CMSPLABRecordAlways) { 3409 reset_survivor_plab_arrays(); 3410 } 3411 3412 ResourceMark rm; 3413 HandleMark hm; 3414 3415 FalseClosure falseClosure; 3416 // In the case of a synchronous collection, we will elide the 3417 // remark step, so it's important to catch all the nmethod oops 3418 // in this step; hence the last argument to the constrcutor below. 3419 MarkRefsIntoClosure notOlder(_span, &_markBitMap, !asynch /* nmethods */); 3420 GenCollectedHeap* gch = GenCollectedHeap::heap(); 3421 3422 verify_work_stacks_empty(); 3423 verify_overflow_empty(); 3424 3425 gch->ensure_parsability(false); // fill TLABs, but no need to retire them 3426 // Update the saved marks which may affect the root scans. 3427 gch->save_marks(); 3428 3429 // weak reference processing has not started yet. 3430 ref_processor()->set_enqueuing_is_done(false); 3431 3432 { 3433 COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact;) 3434 gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel. 3435 gch->gen_process_strong_roots(_cmsGen->level(), 3436 true, // younger gens are roots 3437 true, // collecting perm gen 3438 SharedHeap::ScanningOption(roots_scanning_options()), 3439 NULL, ¬Older); 3440 } 3441 3442 // Clear mod-union table; it will be dirtied in the prologue of 3443 // CMS generation per each younger generation collection. 3444 3445 assert(_modUnionTable.isAllClear(), 3446 "Was cleared in most recent final checkpoint phase" 3447 " or no bits are set in the gc_prologue before the start of the next " 3448 "subsequent marking phase."); 3449 3450 // Temporarily disabled, since pre/post-consumption closures don't 3451 // care about precleaned cards 3452 #if 0 3453 { 3454 MemRegion mr = MemRegion((HeapWord*)_virtual_space.low(), 3455 (HeapWord*)_virtual_space.high()); 3456 _ct->ct_bs()->preclean_dirty_cards(mr); 3457 } 3458 #endif 3459 3460 // Save the end of the used_region of the constituent generations 3461 // to be used to limit the extent of sweep in each generation. 3462 save_sweep_limits(); 3463 if (UseAdaptiveSizePolicy) { 3464 size_policy()->checkpoint_roots_initial_end(gch->gc_cause()); 3465 } 3466 verify_overflow_empty(); 3467 } 3468 3469 bool CMSCollector::markFromRoots(bool asynch) { 3470 // we might be tempted to assert that: 3471 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 3472 // "inconsistent argument?"); 3473 // However that wouldn't be right, because it's possible that 3474 // a safepoint is indeed in progress as a younger generation 3475 // stop-the-world GC happens even as we mark in this generation. 3476 assert(_collectorState == Marking, "inconsistent state?"); 3477 check_correct_thread_executing(); 3478 verify_overflow_empty(); 3479 3480 bool res; 3481 if (asynch) { 3482 3483 // Start the timers for adaptive size policy for the concurrent phases 3484 // Do it here so that the foreground MS can use the concurrent 3485 // timer since a foreground MS might has the sweep done concurrently 3486 // or STW. 3487 if (UseAdaptiveSizePolicy) { 3488 size_policy()->concurrent_marking_begin(); 3489 } 3490 3491 // Weak ref discovery note: We may be discovering weak 3492 // refs in this generation concurrent (but interleaved) with 3493 // weak ref discovery by a younger generation collector. 3494 3495 CMSTokenSyncWithLocks ts(true, bitMapLock()); 3496 TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); 3497 CMSPhaseAccounting pa(this, "mark", !PrintGCDetails); 3498 res = markFromRootsWork(asynch); 3499 if (res) { 3500 _collectorState = Precleaning; 3501 } else { // We failed and a foreground collection wants to take over 3502 assert(_foregroundGCIsActive, "internal state inconsistency"); 3503 assert(_restart_addr == NULL, "foreground will restart from scratch"); 3504 if (PrintGCDetails) { 3505 gclog_or_tty->print_cr("bailing out to foreground collection"); 3506 } 3507 } 3508 if (UseAdaptiveSizePolicy) { 3509 size_policy()->concurrent_marking_end(); 3510 } 3511 } else { 3512 assert(SafepointSynchronize::is_at_safepoint(), 3513 "inconsistent with asynch == false"); 3514 if (UseAdaptiveSizePolicy) { 3515 size_policy()->ms_collection_marking_begin(); 3516 } 3517 // already have locks 3518 res = markFromRootsWork(asynch); 3519 _collectorState = FinalMarking; 3520 if (UseAdaptiveSizePolicy) { 3521 GenCollectedHeap* gch = GenCollectedHeap::heap(); 3522 size_policy()->ms_collection_marking_end(gch->gc_cause()); 3523 } 3524 } 3525 verify_overflow_empty(); 3526 return res; 3527 } 3528 3529 bool CMSCollector::markFromRootsWork(bool asynch) { 3530 // iterate over marked bits in bit map, doing a full scan and mark 3531 // from these roots using the following algorithm: 3532 // . if oop is to the right of the current scan pointer, 3533 // mark corresponding bit (we'll process it later) 3534 // . else (oop is to left of current scan pointer) 3535 // push oop on marking stack 3536 // . drain the marking stack 3537 3538 // Note that when we do a marking step we need to hold the 3539 // bit map lock -- recall that direct allocation (by mutators) 3540 // and promotion (by younger generation collectors) is also 3541 // marking the bit map. [the so-called allocate live policy.] 3542 // Because the implementation of bit map marking is not 3543 // robust wrt simultaneous marking of bits in the same word, 3544 // we need to make sure that there is no such interference 3545 // between concurrent such updates. 3546 3547 // already have locks 3548 assert_lock_strong(bitMapLock()); 3549 3550 // Clear the revisit stack, just in case there are any 3551 // obsolete contents from a short-circuited previous CMS cycle. 3552 _revisitStack.reset(); 3553 verify_work_stacks_empty(); 3554 verify_overflow_empty(); 3555 assert(_revisitStack.isEmpty(), "tabula rasa"); 3556 3557 bool result = false; 3558 if (CMSConcurrentMTEnabled && ParallelCMSThreads > 0) { 3559 result = do_marking_mt(asynch); 3560 } else { 3561 result = do_marking_st(asynch); 3562 } 3563 return result; 3564 } 3565 3566 // Forward decl 3567 class CMSConcMarkingTask; 3568 3569 class CMSConcMarkingTerminator: public ParallelTaskTerminator { 3570 CMSCollector* _collector; 3571 CMSConcMarkingTask* _task; 3572 bool _yield; 3573 protected: 3574 virtual void yield(); 3575 public: 3576 // "n_threads" is the number of threads to be terminated. 3577 // "queue_set" is a set of work queues of other threads. 3578 // "collector" is the CMS collector associated with this task terminator. 3579 // "yield" indicates whether we need the gang as a whole to yield. 3580 CMSConcMarkingTerminator(int n_threads, TaskQueueSetSuper* queue_set, 3581 CMSCollector* collector, bool yield) : 3582 ParallelTaskTerminator(n_threads, queue_set), 3583 _collector(collector), 3584 _yield(yield) { } 3585 3586 void set_task(CMSConcMarkingTask* task) { 3587 _task = task; 3588 } 3589 }; 3590 3591 // MT Concurrent Marking Task 3592 class CMSConcMarkingTask: public YieldingFlexibleGangTask { 3593 CMSCollector* _collector; 3594 YieldingFlexibleWorkGang* _workers; // the whole gang 3595 int _n_workers; // requested/desired # workers 3596 bool _asynch; 3597 bool _result; 3598 CompactibleFreeListSpace* _cms_space; 3599 CompactibleFreeListSpace* _perm_space; 3600 HeapWord* _global_finger; 3601 HeapWord* _restart_addr; 3602 3603 // Exposed here for yielding support 3604 Mutex* const _bit_map_lock; 3605 3606 // The per thread work queues, available here for stealing 3607 OopTaskQueueSet* _task_queues; 3608 CMSConcMarkingTerminator _term; 3609 3610 public: 3611 CMSConcMarkingTask(CMSCollector* collector, 3612 CompactibleFreeListSpace* cms_space, 3613 CompactibleFreeListSpace* perm_space, 3614 bool asynch, int n_workers, 3615 YieldingFlexibleWorkGang* workers, 3616 OopTaskQueueSet* task_queues): 3617 YieldingFlexibleGangTask("Concurrent marking done multi-threaded"), 3618 _collector(collector), 3619 _cms_space(cms_space), 3620 _perm_space(perm_space), 3621 _asynch(asynch), _n_workers(n_workers), _result(true), 3622 _workers(workers), _task_queues(task_queues), 3623 _term(n_workers, task_queues, _collector, asynch), 3624 _bit_map_lock(collector->bitMapLock()) 3625 { 3626 assert(n_workers <= workers->total_workers(), 3627 "Else termination won't work correctly today"); // XXX FIX ME! 3628 _requested_size = n_workers; 3629 _term.set_task(this); 3630 assert(_cms_space->bottom() < _perm_space->bottom(), 3631 "Finger incorrectly initialized below"); 3632 _restart_addr = _global_finger = _cms_space->bottom(); 3633 } 3634 3635 3636 OopTaskQueueSet* task_queues() { return _task_queues; } 3637 3638 OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); } 3639 3640 HeapWord** global_finger_addr() { return &_global_finger; } 3641 3642 CMSConcMarkingTerminator* terminator() { return &_term; } 3643 3644 void work(int i); 3645 3646 virtual void coordinator_yield(); // stuff done by coordinator 3647 bool result() { return _result; } 3648 3649 void reset(HeapWord* ra) { 3650 assert(_global_finger >= _cms_space->end(), "Postcondition of ::work(i)"); 3651 assert(_global_finger >= _perm_space->end(), "Postcondition of ::work(i)"); 3652 assert(ra < _perm_space->end(), "ra too large"); 3653 _restart_addr = _global_finger = ra; 3654 _term.reset_for_reuse(); 3655 } 3656 3657 static bool get_work_from_overflow_stack(CMSMarkStack* ovflw_stk, 3658 OopTaskQueue* work_q); 3659 3660 private: 3661 void do_scan_and_mark(int i, CompactibleFreeListSpace* sp); 3662 void do_work_steal(int i); 3663 void bump_global_finger(HeapWord* f); 3664 }; 3665 3666 void CMSConcMarkingTerminator::yield() { 3667 if (ConcurrentMarkSweepThread::should_yield() && 3668 !_collector->foregroundGCIsActive() && 3669 _yield) { 3670 _task->yield(); 3671 } else { 3672 ParallelTaskTerminator::yield(); 3673 } 3674 } 3675 3676 //////////////////////////////////////////////////////////////// 3677 // Concurrent Marking Algorithm Sketch 3678 //////////////////////////////////////////////////////////////// 3679 // Until all tasks exhausted (both spaces): 3680 // -- claim next available chunk 3681 // -- bump global finger via CAS 3682 // -- find first object that starts in this chunk 3683 // and start scanning bitmap from that position 3684 // -- scan marked objects for oops 3685 // -- CAS-mark target, and if successful: 3686 // . if target oop is above global finger (volatile read) 3687 // nothing to do 3688 // . if target oop is in chunk and above local finger 3689 // then nothing to do 3690 // . else push on work-queue 3691 // -- Deal with possible overflow issues: 3692 // . local work-queue overflow causes stuff to be pushed on 3693 // global (common) overflow queue 3694 // . always first empty local work queue 3695 // . then get a batch of oops from global work queue if any 3696 // . then do work stealing 3697 // -- When all tasks claimed (both spaces) 3698 // and local work queue empty, 3699 // then in a loop do: 3700 // . check global overflow stack; steal a batch of oops and trace 3701 // . try to steal from other threads oif GOS is empty 3702 // . if neither is available, offer termination 3703 // -- Terminate and return result 3704 // 3705 void CMSConcMarkingTask::work(int i) { 3706 elapsedTimer _timer; 3707 ResourceMark rm; 3708 HandleMark hm; 3709 3710 DEBUG_ONLY(_collector->verify_overflow_empty();) 3711 3712 // Before we begin work, our work queue should be empty 3713 assert(work_queue(i)->size() == 0, "Expected to be empty"); 3714 // Scan the bitmap covering _cms_space, tracing through grey objects. 3715 _timer.start(); 3716 do_scan_and_mark(i, _cms_space); 3717 _timer.stop(); 3718 if (PrintCMSStatistics != 0) { 3719 gclog_or_tty->print_cr("Finished cms space scanning in %dth thread: %3.3f sec", 3720 i, _timer.seconds()); // XXX: need xxx/xxx type of notation, two timers 3721 } 3722 3723 // ... do the same for the _perm_space 3724 _timer.reset(); 3725 _timer.start(); 3726 do_scan_and_mark(i, _perm_space); 3727 _timer.stop(); 3728 if (PrintCMSStatistics != 0) { 3729 gclog_or_tty->print_cr("Finished perm space scanning in %dth thread: %3.3f sec", 3730 i, _timer.seconds()); // XXX: need xxx/xxx type of notation, two timers 3731 } 3732 3733 // ... do work stealing 3734 _timer.reset(); 3735 _timer.start(); 3736 do_work_steal(i); 3737 _timer.stop(); 3738 if (PrintCMSStatistics != 0) { 3739 gclog_or_tty->print_cr("Finished work stealing in %dth thread: %3.3f sec", 3740 i, _timer.seconds()); // XXX: need xxx/xxx type of notation, two timers 3741 } 3742 assert(_collector->_markStack.isEmpty(), "Should have been emptied"); 3743 assert(work_queue(i)->size() == 0, "Should have been emptied"); 3744 // Note that under the current task protocol, the 3745 // following assertion is true even of the spaces 3746 // expanded since the completion of the concurrent 3747 // marking. XXX This will likely change under a strict 3748 // ABORT semantics. 3749 assert(_global_finger > _cms_space->end() && 3750 _global_finger >= _perm_space->end(), 3751 "All tasks have been completed"); 3752 DEBUG_ONLY(_collector->verify_overflow_empty();) 3753 } 3754 3755 void CMSConcMarkingTask::bump_global_finger(HeapWord* f) { 3756 HeapWord* read = _global_finger; 3757 HeapWord* cur = read; 3758 while (f > read) { 3759 cur = read; 3760 read = (HeapWord*) Atomic::cmpxchg_ptr(f, &_global_finger, cur); 3761 if (cur == read) { 3762 // our cas succeeded 3763 assert(_global_finger >= f, "protocol consistency"); 3764 break; 3765 } 3766 } 3767 } 3768 3769 // This is really inefficient, and should be redone by 3770 // using (not yet available) block-read and -write interfaces to the 3771 // stack and the work_queue. XXX FIX ME !!! 3772 bool CMSConcMarkingTask::get_work_from_overflow_stack(CMSMarkStack* ovflw_stk, 3773 OopTaskQueue* work_q) { 3774 // Fast lock-free check 3775 if (ovflw_stk->length() == 0) { 3776 return false; 3777 } 3778 assert(work_q->size() == 0, "Shouldn't steal"); 3779 MutexLockerEx ml(ovflw_stk->par_lock(), 3780 Mutex::_no_safepoint_check_flag); 3781 // Grab up to 1/4 the size of the work queue 3782 size_t num = MIN2((size_t)work_q->max_elems()/4, 3783 (size_t)ParGCDesiredObjsFromOverflowList); 3784 num = MIN2(num, ovflw_stk->length()); 3785 for (int i = (int) num; i > 0; i--) { 3786 oop cur = ovflw_stk->pop(); 3787 assert(cur != NULL, "Counted wrong?"); 3788 work_q->push(cur); 3789 } 3790 return num > 0; 3791 } 3792 3793 void CMSConcMarkingTask::do_scan_and_mark(int i, CompactibleFreeListSpace* sp) { 3794 SequentialSubTasksDone* pst = sp->conc_par_seq_tasks(); 3795 int n_tasks = pst->n_tasks(); 3796 // We allow that there may be no tasks to do here because 3797 // we are restarting after a stack overflow. 3798 assert(pst->valid() || n_tasks == 0, "Uninitialized use?"); 3799 int nth_task = 0; 3800 3801 HeapWord* aligned_start = sp->bottom(); 3802 if (sp->used_region().contains(_restart_addr)) { 3803 // Align down to a card boundary for the start of 0th task 3804 // for this space. 3805 aligned_start = 3806 (HeapWord*)align_size_down((uintptr_t)_restart_addr, 3807 CardTableModRefBS::card_size); 3808 } 3809 3810 size_t chunk_size = sp->marking_task_size(); 3811 while (!pst->is_task_claimed(/* reference */ nth_task)) { 3812 // Having claimed the nth task in this space, 3813 // compute the chunk that it corresponds to: 3814 MemRegion span = MemRegion(aligned_start + nth_task*chunk_size, 3815 aligned_start + (nth_task+1)*chunk_size); 3816 // Try and bump the global finger via a CAS; 3817 // note that we need to do the global finger bump 3818 // _before_ taking the intersection below, because 3819 // the task corresponding to that region will be 3820 // deemed done even if the used_region() expands 3821 // because of allocation -- as it almost certainly will 3822 // during start-up while the threads yield in the 3823 // closure below. 3824 HeapWord* finger = span.end(); 3825 bump_global_finger(finger); // atomically 3826 // There are null tasks here corresponding to chunks 3827 // beyond the "top" address of the space. 3828 span = span.intersection(sp->used_region()); 3829 if (!span.is_empty()) { // Non-null task 3830 HeapWord* prev_obj; 3831 assert(!span.contains(_restart_addr) || nth_task == 0, 3832 "Inconsistency"); 3833 if (nth_task == 0) { 3834 // For the 0th task, we'll not need to compute a block_start. 3835 if (span.contains(_restart_addr)) { 3836 // In the case of a restart because of stack overflow, 3837 // we might additionally skip a chunk prefix. 3838 prev_obj = _restart_addr; 3839 } else { 3840 prev_obj = span.start(); 3841 } 3842 } else { 3843 // We want to skip the first object because 3844 // the protocol is to scan any object in its entirety 3845 // that _starts_ in this span; a fortiori, any 3846 // object starting in an earlier span is scanned 3847 // as part of an earlier claimed task. 3848 // Below we use the "careful" version of block_start 3849 // so we do not try to navigate uninitialized objects. 3850 prev_obj = sp->block_start_careful(span.start()); 3851 // Below we use a variant of block_size that uses the 3852 // Printezis bits to avoid waiting for allocated 3853 // objects to become initialized/parsable. 3854 while (prev_obj < span.start()) { 3855 size_t sz = sp->block_size_no_stall(prev_obj, _collector); 3856 if (sz > 0) { 3857 prev_obj += sz; 3858 } else { 3859 // In this case we may end up doing a bit of redundant 3860 // scanning, but that appears unavoidable, short of 3861 // locking the free list locks; see bug 6324141. 3862 break; 3863 } 3864 } 3865 } 3866 if (prev_obj < span.end()) { 3867 MemRegion my_span = MemRegion(prev_obj, span.end()); 3868 // Do the marking work within a non-empty span -- 3869 // the last argument to the constructor indicates whether the 3870 // iteration should be incremental with periodic yields. 3871 Par_MarkFromRootsClosure cl(this, _collector, my_span, 3872 &_collector->_markBitMap, 3873 work_queue(i), 3874 &_collector->_markStack, 3875 &_collector->_revisitStack, 3876 _asynch); 3877 _collector->_markBitMap.iterate(&cl, my_span.start(), my_span.end()); 3878 } // else nothing to do for this task 3879 } // else nothing to do for this task 3880 } 3881 // We'd be tempted to assert here that since there are no 3882 // more tasks left to claim in this space, the global_finger 3883 // must exceed space->top() and a fortiori space->end(). However, 3884 // that would not quite be correct because the bumping of 3885 // global_finger occurs strictly after the claiming of a task, 3886 // so by the time we reach here the global finger may not yet 3887 // have been bumped up by the thread that claimed the last 3888 // task. 3889 pst->all_tasks_completed(); 3890 } 3891 3892 class Par_ConcMarkingClosure: public OopClosure { 3893 CMSCollector* _collector; 3894 MemRegion _span; 3895 CMSBitMap* _bit_map; 3896 CMSMarkStack* _overflow_stack; 3897 CMSMarkStack* _revisit_stack; // XXXXXX Check proper use 3898 OopTaskQueue* _work_queue; 3899 3900 public: 3901 Par_ConcMarkingClosure(CMSCollector* collector, OopTaskQueue* work_queue, 3902 CMSBitMap* bit_map, CMSMarkStack* overflow_stack): 3903 _collector(collector), 3904 _span(_collector->_span), 3905 _work_queue(work_queue), 3906 _bit_map(bit_map), 3907 _overflow_stack(overflow_stack) { } // need to initialize revisit stack etc. 3908 3909 void do_oop(oop* p); 3910 void trim_queue(size_t max); 3911 void handle_stack_overflow(HeapWord* lost); 3912 }; 3913 3914 // Grey object scanning during work stealing phase -- 3915 // the salient assumption here is that any references 3916 // that are in these stolen objects being scanned must 3917 // already have been initialized (else they would not have 3918 // been published), so we do not need to check for 3919 // uninitialized objects before pushing here. 3920 void Par_ConcMarkingClosure::do_oop(oop* p) { 3921 oop this_oop = *p; 3922 assert(this_oop->is_oop_or_null(true), 3923 "expected an oop or NULL"); 3924 HeapWord* addr = (HeapWord*)this_oop; 3925 // Check if oop points into the CMS generation 3926 // and is not marked 3927 if (_span.contains(addr) && !_bit_map->isMarked(addr)) { 3928 // a white object ... 3929 // If we manage to "claim" the object, by being the 3930 // first thread to mark it, then we push it on our 3931 // marking stack 3932 if (_bit_map->par_mark(addr)) { // ... now grey 3933 // push on work queue (grey set) 3934 bool simulate_overflow = false; 3935 NOT_PRODUCT( 3936 if (CMSMarkStackOverflowALot && 3937 _collector->simulate_overflow()) { 3938 // simulate a stack overflow 3939 simulate_overflow = true; 3940 } 3941 ) 3942 if (simulate_overflow || 3943 !(_work_queue->push(this_oop) || _overflow_stack->par_push(this_oop))) { 3944 // stack overflow 3945 if (PrintCMSStatistics != 0) { 3946 gclog_or_tty->print_cr("CMS marking stack overflow (benign) at " 3947 SIZE_FORMAT, _overflow_stack->capacity()); 3948 } 3949 // We cannot assert that the overflow stack is full because 3950 // it may have been emptied since. 3951 assert(simulate_overflow || 3952 _work_queue->size() == _work_queue->max_elems(), 3953 "Else push should have succeeded"); 3954 handle_stack_overflow(addr); 3955 } 3956 } // Else, some other thread got there first 3957 } 3958 } 3959 3960 void Par_ConcMarkingClosure::trim_queue(size_t max) { 3961 while (_work_queue->size() > max) { 3962 oop new_oop; 3963 if (_work_queue->pop_local(new_oop)) { 3964 assert(new_oop->is_oop(), "Should be an oop"); 3965 assert(_bit_map->isMarked((HeapWord*)new_oop), "Grey object"); 3966 assert(_span.contains((HeapWord*)new_oop), "Not in span"); 3967 assert(new_oop->is_parsable(), "Should be parsable"); 3968 new_oop->oop_iterate(this); // do_oop() above 3969 } 3970 } 3971 } 3972 3973 // Upon stack overflow, we discard (part of) the stack, 3974 // remembering the least address amongst those discarded 3975 // in CMSCollector's _restart_address. 3976 void Par_ConcMarkingClosure::handle_stack_overflow(HeapWord* lost) { 3977 // We need to do this under a mutex to prevent other 3978 // workers from interfering with the work done below. 3979 MutexLockerEx ml(_overflow_stack->par_lock(), 3980 Mutex::_no_safepoint_check_flag); 3981 // Remember the least grey address discarded 3982 HeapWord* ra = (HeapWord*)_overflow_stack->least_value(lost); 3983 _collector->lower_restart_addr(ra); 3984 _overflow_stack->reset(); // discard stack contents 3985 _overflow_stack->expand(); // expand the stack if possible 3986 } 3987 3988 3989 void CMSConcMarkingTask::do_work_steal(int i) { 3990 OopTaskQueue* work_q = work_queue(i); 3991 oop obj_to_scan; 3992 CMSBitMap* bm = &(_collector->_markBitMap); 3993 CMSMarkStack* ovflw = &(_collector->_markStack); 3994 int* seed = _collector->hash_seed(i); 3995 Par_ConcMarkingClosure cl(_collector, work_q, bm, ovflw); 3996 while (true) { 3997 cl.trim_queue(0); 3998 assert(work_q->size() == 0, "Should have been emptied above"); 3999 if (get_work_from_overflow_stack(ovflw, work_q)) { 4000 // Can't assert below because the work obtained from the 4001 // overflow stack may already have been stolen from us. 4002 // assert(work_q->size() > 0, "Work from overflow stack"); 4003 continue; 4004 } else if (task_queues()->steal(i, seed, /* reference */ obj_to_scan)) { 4005 assert(obj_to_scan->is_oop(), "Should be an oop"); 4006 assert(bm->isMarked((HeapWord*)obj_to_scan), "Grey object"); 4007 obj_to_scan->oop_iterate(&cl); 4008 } else if (terminator()->offer_termination()) { 4009 assert(work_q->size() == 0, "Impossible!"); 4010 break; 4011 } 4012 } 4013 } 4014 4015 // This is run by the CMS (coordinator) thread. 4016 void CMSConcMarkingTask::coordinator_yield() { 4017 assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(), 4018 "CMS thread should hold CMS token"); 4019 4020 // First give up the locks, then yield, then re-lock 4021 // We should probably use a constructor/destructor idiom to 4022 // do this unlock/lock or modify the MutexUnlocker class to 4023 // serve our purpose. XXX 4024 assert_lock_strong(_bit_map_lock); 4025 _bit_map_lock->unlock(); 4026 ConcurrentMarkSweepThread::desynchronize(true); 4027 ConcurrentMarkSweepThread::acknowledge_yield_request(); 4028 _collector->stopTimer(); 4029 if (PrintCMSStatistics != 0) { 4030 _collector->incrementYields(); 4031 } 4032 _collector->icms_wait(); 4033 4034 // It is possible for whichever thread initiated the yield request 4035 // not to get a chance to wake up and take the bitmap lock between 4036 // this thread releasing it and reacquiring it. So, while the 4037 // should_yield() flag is on, let's sleep for a bit to give the 4038 // other thread a chance to wake up. The limit imposed on the number 4039 // of iterations is defensive, to avoid any unforseen circumstances 4040 // putting us into an infinite loop. Since it's always been this 4041 // (coordinator_yield()) method that was observed to cause the 4042 // problem, we are using a parameter (CMSCoordinatorYieldSleepCount) 4043 // which is by default non-zero. For the other seven methods that 4044 // also perform the yield operation, as are using a different 4045 // parameter (CMSYieldSleepCount) which is by default zero. This way we 4046 // can enable the sleeping for those methods too, if necessary. 4047 // See 6442774. 4048 // 4049 // We really need to reconsider the synchronization between the GC 4050 // thread and the yield-requesting threads in the future and we 4051 // should really use wait/notify, which is the recommended 4052 // way of doing this type of interaction. Additionally, we should 4053 // consolidate the eight methods that do the yield operation and they 4054 // are almost identical into one for better maintenability and 4055 // readability. See 6445193. 4056 // 4057 // Tony 2006.06.29 4058 for (unsigned i = 0; i < CMSCoordinatorYieldSleepCount && 4059 ConcurrentMarkSweepThread::should_yield() && 4060 !CMSCollector::foregroundGCIsActive(); ++i) { 4061 os::sleep(Thread::current(), 1, false); 4062 ConcurrentMarkSweepThread::acknowledge_yield_request(); 4063 } 4064 4065 ConcurrentMarkSweepThread::synchronize(true); 4066 _bit_map_lock->lock_without_safepoint_check(); 4067 _collector->startTimer(); 4068 } 4069 4070 bool CMSCollector::do_marking_mt(bool asynch) { 4071 assert(ParallelCMSThreads > 0 && conc_workers() != NULL, "precondition"); 4072 // In the future this would be determined ergonomically, based 4073 // on #cpu's, # active mutator threads (and load), and mutation rate. 4074 int num_workers = ParallelCMSThreads; 4075 4076 CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace(); 4077 CompactibleFreeListSpace* perm_space = _permGen->cmsSpace(); 4078 4079 CMSConcMarkingTask tsk(this, cms_space, perm_space, 4080 asynch, num_workers /* number requested XXX */, 4081 conc_workers(), task_queues()); 4082 4083 // Since the actual number of workers we get may be different 4084 // from the number we requested above, do we need to do anything different 4085 // below? In particular, may be we need to subclass the SequantialSubTasksDone 4086 // class?? XXX 4087 cms_space ->initialize_sequential_subtasks_for_marking(num_workers); 4088 perm_space->initialize_sequential_subtasks_for_marking(num_workers); 4089 4090 // Refs discovery is already non-atomic. 4091 assert(!ref_processor()->discovery_is_atomic(), "Should be non-atomic"); 4092 // Mutate the Refs discovery so it is MT during the 4093 // multi-threaded marking phase. 4094 ReferenceProcessorMTMutator mt(ref_processor(), num_workers > 1); 4095 4096 conc_workers()->start_task(&tsk); 4097 while (tsk.yielded()) { 4098 tsk.coordinator_yield(); 4099 conc_workers()->continue_task(&tsk); 4100 } 4101 // If the task was aborted, _restart_addr will be non-NULL 4102 assert(tsk.completed() || _restart_addr != NULL, "Inconsistency"); 4103 while (_restart_addr != NULL) { 4104 // XXX For now we do not make use of ABORTED state and have not 4105 // yet implemented the right abort semantics (even in the original 4106 // single-threaded CMS case). That needs some more investigation 4107 // and is deferred for now; see CR# TBF. 07252005YSR. XXX 4108 assert(!CMSAbortSemantics || tsk.aborted(), "Inconsistency"); 4109 // If _restart_addr is non-NULL, a marking stack overflow 4110 // occured; we need to do a fresh marking iteration from the 4111 // indicated restart address. 4112 if (_foregroundGCIsActive && asynch) { 4113 // We may be running into repeated stack overflows, having 4114 // reached the limit of the stack size, while making very 4115 // slow forward progress. It may be best to bail out and 4116 // let the foreground collector do its job. 4117 // Clear _restart_addr, so that foreground GC 4118 // works from scratch. This avoids the headache of 4119 // a "rescan" which would otherwise be needed because 4120 // of the dirty mod union table & card table. 4121 _restart_addr = NULL; 4122 return false; 4123 } 4124 // Adjust the task to restart from _restart_addr 4125 tsk.reset(_restart_addr); 4126 cms_space ->initialize_sequential_subtasks_for_marking(num_workers, 4127 _restart_addr); 4128 perm_space->initialize_sequential_subtasks_for_marking(num_workers, 4129 _restart_addr); 4130 _restart_addr = NULL; 4131 // Get the workers going again 4132 conc_workers()->start_task(&tsk); 4133 while (tsk.yielded()) { 4134 tsk.coordinator_yield(); 4135 conc_workers()->continue_task(&tsk); 4136 } 4137 } 4138 assert(tsk.completed(), "Inconsistency"); 4139 assert(tsk.result() == true, "Inconsistency"); 4140 return true; 4141 } 4142 4143 bool CMSCollector::do_marking_st(bool asynch) { 4144 ResourceMark rm; 4145 HandleMark hm; 4146 4147 MarkFromRootsClosure markFromRootsClosure(this, _span, &_markBitMap, 4148 &_markStack, &_revisitStack, CMSYield && asynch); 4149 // the last argument to iterate indicates whether the iteration 4150 // should be incremental with periodic yields. 4151 _markBitMap.iterate(&markFromRootsClosure); 4152 // If _restart_addr is non-NULL, a marking stack overflow 4153 // occured; we need to do a fresh iteration from the 4154 // indicated restart address. 4155 while (_restart_addr != NULL) { 4156 if (_foregroundGCIsActive && asynch) { 4157 // We may be running into repeated stack overflows, having 4158 // reached the limit of the stack size, while making very 4159 // slow forward progress. It may be best to bail out and 4160 // let the foreground collector do its job. 4161 // Clear _restart_addr, so that foreground GC 4162 // works from scratch. This avoids the headache of 4163 // a "rescan" which would otherwise be needed because 4164 // of the dirty mod union table & card table. 4165 _restart_addr = NULL; 4166 return false; // indicating failure to complete marking 4167 } 4168 // Deal with stack overflow: 4169 // we restart marking from _restart_addr 4170 HeapWord* ra = _restart_addr; 4171 markFromRootsClosure.reset(ra); 4172 _restart_addr = NULL; 4173 _markBitMap.iterate(&markFromRootsClosure, ra, _span.end()); 4174 } 4175 return true; 4176 } 4177 4178 void CMSCollector::preclean() { 4179 check_correct_thread_executing(); 4180 assert(Thread::current()->is_ConcurrentGC_thread(), "Wrong thread"); 4181 verify_work_stacks_empty(); 4182 verify_overflow_empty(); 4183 _abort_preclean = false; 4184 if (CMSPrecleaningEnabled) { 4185 _eden_chunk_index = 0; 4186 size_t used = get_eden_used(); 4187 size_t capacity = get_eden_capacity(); 4188 // Don't start sampling unless we will get sufficiently 4189 // many samples. 4190 if (used < (capacity/(CMSScheduleRemarkSamplingRatio * 100) 4191 * CMSScheduleRemarkEdenPenetration)) { 4192 _start_sampling = true; 4193 } else { 4194 _start_sampling = false; 4195 } 4196 TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); 4197 CMSPhaseAccounting pa(this, "preclean", !PrintGCDetails); 4198 preclean_work(CMSPrecleanRefLists1, CMSPrecleanSurvivors1); 4199 } 4200 CMSTokenSync x(true); // is cms thread 4201 if (CMSPrecleaningEnabled) { 4202 sample_eden(); 4203 _collectorState = AbortablePreclean; 4204 } else { 4205 _collectorState = FinalMarking; 4206 } 4207 verify_work_stacks_empty(); 4208 verify_overflow_empty(); 4209 } 4210 4211 // Try and schedule the remark such that young gen 4212 // occupancy is CMSScheduleRemarkEdenPenetration %. 4213 void CMSCollector::abortable_preclean() { 4214 check_correct_thread_executing(); 4215 assert(CMSPrecleaningEnabled, "Inconsistent control state"); 4216 assert(_collectorState == AbortablePreclean, "Inconsistent control state"); 4217 4218 // If Eden's current occupancy is below this threshold, 4219 // immediately schedule the remark; else preclean 4220 // past the next scavenge in an effort to 4221 // schedule the pause as described avove. By choosing 4222 // CMSScheduleRemarkEdenSizeThreshold >= max eden size 4223 // we will never do an actual abortable preclean cycle. 4224 if (get_eden_used() > CMSScheduleRemarkEdenSizeThreshold) { 4225 TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); 4226 CMSPhaseAccounting pa(this, "abortable-preclean", !PrintGCDetails); 4227 // We need more smarts in the abortable preclean 4228 // loop below to deal with cases where allocation 4229 // in young gen is very very slow, and our precleaning 4230 // is running a losing race against a horde of 4231 // mutators intent on flooding us with CMS updates 4232 // (dirty cards). 4233 // One, admittedly dumb, strategy is to give up 4234 // after a certain number of abortable precleaning loops 4235 // or after a certain maximum time. We want to make 4236 // this smarter in the next iteration. 4237 // XXX FIX ME!!! YSR 4238 size_t loops = 0, workdone = 0, cumworkdone = 0, waited = 0; 4239 while (!(should_abort_preclean() || 4240 ConcurrentMarkSweepThread::should_terminate())) { 4241 workdone = preclean_work(CMSPrecleanRefLists2, CMSPrecleanSurvivors2); 4242 cumworkdone += workdone; 4243 loops++; 4244 // Voluntarily terminate abortable preclean phase if we have 4245 // been at it for too long. 4246 if ((CMSMaxAbortablePrecleanLoops != 0) && 4247 loops >= CMSMaxAbortablePrecleanLoops) { 4248 if (PrintGCDetails) { 4249 gclog_or_tty->print(" CMS: abort preclean due to loops "); 4250 } 4251 break; 4252 } 4253 if (pa.wallclock_millis() > CMSMaxAbortablePrecleanTime) { 4254 if (PrintGCDetails) { 4255 gclog_or_tty->print(" CMS: abort preclean due to time "); 4256 } 4257 break; 4258 } 4259 // If we are doing little work each iteration, we should 4260 // take a short break. 4261 if (workdone < CMSAbortablePrecleanMinWorkPerIteration) { 4262 // Sleep for some time, waiting for work to accumulate 4263 stopTimer(); 4264 cmsThread()->wait_on_cms_lock(CMSAbortablePrecleanWaitMillis); 4265 startTimer(); 4266 waited++; 4267 } 4268 } 4269 if (PrintCMSStatistics > 0) { 4270 gclog_or_tty->print(" [%d iterations, %d waits, %d cards)] ", 4271 loops, waited, cumworkdone); 4272 } 4273 } 4274 CMSTokenSync x(true); // is cms thread 4275 if (_collectorState != Idling) { 4276 assert(_collectorState == AbortablePreclean, 4277 "Spontaneous state transition?"); 4278 _collectorState = FinalMarking; 4279 } // Else, a foreground collection completed this CMS cycle. 4280 return; 4281 } 4282 4283 // Respond to an Eden sampling opportunity 4284 void CMSCollector::sample_eden() { 4285 // Make sure a young gc cannot sneak in between our 4286 // reading and recording of a sample. 4287 assert(Thread::current()->is_ConcurrentGC_thread(), 4288 "Only the cms thread may collect Eden samples"); 4289 assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(), 4290 "Should collect samples while holding CMS token"); 4291 if (!_start_sampling) { 4292 return; 4293 } 4294 if (_eden_chunk_array) { 4295 if (_eden_chunk_index < _eden_chunk_capacity) { 4296 _eden_chunk_array[_eden_chunk_index] = *_top_addr; // take sample 4297 assert(_eden_chunk_array[_eden_chunk_index] <= *_end_addr, 4298 "Unexpected state of Eden"); 4299 // We'd like to check that what we just sampled is an oop-start address; 4300 // however, we cannot do that here since the object may not yet have been 4301 // initialized. So we'll instead do the check when we _use_ this sample 4302 // later. 4303 if (_eden_chunk_index == 0 || 4304 (pointer_delta(_eden_chunk_array[_eden_chunk_index], 4305 _eden_chunk_array[_eden_chunk_index-1]) 4306 >= CMSSamplingGrain)) { 4307 _eden_chunk_index++; // commit sample 4308 } 4309 } 4310 } 4311 if ((_collectorState == AbortablePreclean) && !_abort_preclean) { 4312 size_t used = get_eden_used(); 4313 size_t capacity = get_eden_capacity(); 4314 assert(used <= capacity, "Unexpected state of Eden"); 4315 if (used > (capacity/100 * CMSScheduleRemarkEdenPenetration)) { 4316 _abort_preclean = true; 4317 } 4318 } 4319 } 4320 4321 4322 size_t CMSCollector::preclean_work(bool clean_refs, bool clean_survivor) { 4323 assert(_collectorState == Precleaning || 4324 _collectorState == AbortablePreclean, "incorrect state"); 4325 ResourceMark rm; 4326 HandleMark hm; 4327 // Do one pass of scrubbing the discovered reference lists 4328 // to remove any reference objects with strongly-reachable 4329 // referents. 4330 if (clean_refs) { 4331 ReferenceProcessor* rp = ref_processor(); 4332 CMSPrecleanRefsYieldClosure yield_cl(this); 4333 assert(rp->span().equals(_span), "Spans should be equal"); 4334 CMSKeepAliveClosure keep_alive(this, _span, &_markBitMap, 4335 &_markStack); 4336 CMSDrainMarkingStackClosure complete_trace(this, 4337 _span, &_markBitMap, &_markStack, 4338 &keep_alive); 4339 4340 // We don't want this step to interfere with a young 4341 // collection because we don't want to take CPU 4342 // or memory bandwidth away from the young GC threads 4343 // (which may be as many as there are CPUs). 4344 // Note that we don't need to protect ourselves from 4345 // interference with mutators because they can't 4346 // manipulate the discovered reference lists nor affect 4347 // the computed reachability of the referents, the 4348 // only properties manipulated by the precleaning 4349 // of these reference lists. 4350 stopTimer(); 4351 CMSTokenSyncWithLocks x(true /* is cms thread */, 4352 bitMapLock()); 4353 startTimer(); 4354 sample_eden(); 4355 // The following will yield to allow foreground 4356 // collection to proceed promptly. XXX YSR: 4357 // The code in this method may need further 4358 // tweaking for better performance and some restructuring 4359 // for cleaner interfaces. 4360 rp->preclean_discovered_references( 4361 rp->is_alive_non_header(), &keep_alive, &complete_trace, 4362 &yield_cl); 4363 } 4364 4365 if (clean_survivor) { // preclean the active survivor space(s) 4366 assert(_young_gen->kind() == Generation::DefNew || 4367 _young_gen->kind() == Generation::ParNew || 4368 _young_gen->kind() == Generation::ASParNew, 4369 "incorrect type for cast"); 4370 DefNewGeneration* dng = (DefNewGeneration*)_young_gen; 4371 PushAndMarkClosure pam_cl(this, _span, ref_processor(), 4372 &_markBitMap, &_modUnionTable, 4373 &_markStack, &_revisitStack, 4374 true /* precleaning phase */); 4375 stopTimer(); 4376 CMSTokenSyncWithLocks ts(true /* is cms thread */, 4377 bitMapLock()); 4378 startTimer(); 4379 unsigned int before_count = 4380 GenCollectedHeap::heap()->total_collections(); 4381 SurvivorSpacePrecleanClosure 4382 sss_cl(this, _span, &_markBitMap, &_markStack, 4383 &pam_cl, before_count, CMSYield); 4384 dng->from()->object_iterate_careful(&sss_cl); 4385 dng->to()->object_iterate_careful(&sss_cl); 4386 } 4387 MarkRefsIntoAndScanClosure 4388 mrias_cl(_span, ref_processor(), &_markBitMap, &_modUnionTable, 4389 &_markStack, &_revisitStack, this, CMSYield, 4390 true /* precleaning phase */); 4391 // CAUTION: The following closure has persistent state that may need to 4392 // be reset upon a decrease in the sequence of addresses it 4393 // processes. 4394 ScanMarkedObjectsAgainCarefullyClosure 4395 smoac_cl(this, _span, 4396 &_markBitMap, &_markStack, &_revisitStack, &mrias_cl, CMSYield); 4397 4398 // Preclean dirty cards in ModUnionTable and CardTable using 4399 // appropriate convergence criterion; 4400 // repeat CMSPrecleanIter times unless we find that 4401 // we are losing. 4402 assert(CMSPrecleanIter < 10, "CMSPrecleanIter is too large"); 4403 assert(CMSPrecleanNumerator < CMSPrecleanDenominator, 4404 "Bad convergence multiplier"); 4405 assert(CMSPrecleanThreshold >= 100, 4406 "Unreasonably low CMSPrecleanThreshold"); 4407 4408 size_t numIter, cumNumCards, lastNumCards, curNumCards; 4409 for (numIter = 0, cumNumCards = lastNumCards = curNumCards = 0; 4410 numIter < CMSPrecleanIter; 4411 numIter++, lastNumCards = curNumCards, cumNumCards += curNumCards) { 4412 curNumCards = preclean_mod_union_table(_cmsGen, &smoac_cl); 4413 if (CMSPermGenPrecleaningEnabled) { 4414 curNumCards += preclean_mod_union_table(_permGen, &smoac_cl); 4415 } 4416 if (Verbose && PrintGCDetails) { 4417 gclog_or_tty->print(" (modUnionTable: %d cards)", curNumCards); 4418 } 4419 // Either there are very few dirty cards, so re-mark 4420 // pause will be small anyway, or our pre-cleaning isn't 4421 // that much faster than the rate at which cards are being 4422 // dirtied, so we might as well stop and re-mark since 4423 // precleaning won't improve our re-mark time by much. 4424 if (curNumCards <= CMSPrecleanThreshold || 4425 (numIter > 0 && 4426 (curNumCards * CMSPrecleanDenominator > 4427 lastNumCards * CMSPrecleanNumerator))) { 4428 numIter++; 4429 cumNumCards += curNumCards; 4430 break; 4431 } 4432 } 4433 curNumCards = preclean_card_table(_cmsGen, &smoac_cl); 4434 if (CMSPermGenPrecleaningEnabled) { 4435 curNumCards += preclean_card_table(_permGen, &smoac_cl); 4436 } 4437 cumNumCards += curNumCards; 4438 if (PrintGCDetails && PrintCMSStatistics != 0) { 4439 gclog_or_tty->print_cr(" (cardTable: %d cards, re-scanned %d cards, %d iterations)", 4440 curNumCards, cumNumCards, numIter); 4441 } 4442 return cumNumCards; // as a measure of useful work done 4443 } 4444 4445 // PRECLEANING NOTES: 4446 // Precleaning involves: 4447 // . reading the bits of the modUnionTable and clearing the set bits. 4448 // . For the cards corresponding to the set bits, we scan the 4449 // objects on those cards. This means we need the free_list_lock 4450 // so that we can safely iterate over the CMS space when scanning 4451 // for oops. 4452 // . When we scan the objects, we'll be both reading and setting 4453 // marks in the marking bit map, so we'll need the marking bit map. 4454 // . For protecting _collector_state transitions, we take the CGC_lock. 4455 // Note that any races in the reading of of card table entries by the 4456 // CMS thread on the one hand and the clearing of those entries by the 4457 // VM thread or the setting of those entries by the mutator threads on the 4458 // other are quite benign. However, for efficiency it makes sense to keep 4459 // the VM thread from racing with the CMS thread while the latter is 4460 // dirty card info to the modUnionTable. We therefore also use the 4461 // CGC_lock to protect the reading of the card table and the mod union 4462 // table by the CM thread. 4463 // . We run concurrently with mutator updates, so scanning 4464 // needs to be done carefully -- we should not try to scan 4465 // potentially uninitialized objects. 4466 // 4467 // Locking strategy: While holding the CGC_lock, we scan over and 4468 // reset a maximal dirty range of the mod union / card tables, then lock 4469 // the free_list_lock and bitmap lock to do a full marking, then 4470 // release these locks; and repeat the cycle. This allows for a 4471 // certain amount of fairness in the sharing of these locks between 4472 // the CMS collector on the one hand, and the VM thread and the 4473 // mutators on the other. 4474 4475 // NOTE: preclean_mod_union_table() and preclean_card_table() 4476 // further below are largely identical; if you need to modify 4477 // one of these methods, please check the other method too. 4478 4479 size_t CMSCollector::preclean_mod_union_table( 4480 ConcurrentMarkSweepGeneration* gen, 4481 ScanMarkedObjectsAgainCarefullyClosure* cl) { 4482 verify_work_stacks_empty(); 4483 verify_overflow_empty(); 4484 4485 // strategy: starting with the first card, accumulate contiguous 4486 // ranges of dirty cards; clear these cards, then scan the region 4487 // covered by these cards. 4488 4489 // Since all of the MUT is committed ahead, we can just use 4490 // that, in case the generations expand while we are precleaning. 4491 // It might also be fine to just use the committed part of the 4492 // generation, but we might potentially miss cards when the 4493 // generation is rapidly expanding while we are in the midst 4494 // of precleaning. 4495 HeapWord* startAddr = gen->reserved().start(); 4496 HeapWord* endAddr = gen->reserved().end(); 4497 4498 cl->setFreelistLock(gen->freelistLock()); // needed for yielding 4499 4500 size_t numDirtyCards, cumNumDirtyCards; 4501 HeapWord *nextAddr, *lastAddr; 4502 for (cumNumDirtyCards = numDirtyCards = 0, 4503 nextAddr = lastAddr = startAddr; 4504 nextAddr < endAddr; 4505 nextAddr = lastAddr, cumNumDirtyCards += numDirtyCards) { 4506 4507 ResourceMark rm; 4508 HandleMark hm; 4509 4510 MemRegion dirtyRegion; 4511 { 4512 stopTimer(); 4513 CMSTokenSync ts(true); 4514 startTimer(); 4515 sample_eden(); 4516 // Get dirty region starting at nextOffset (inclusive), 4517 // simultaneously clearing it. 4518 dirtyRegion = 4519 _modUnionTable.getAndClearMarkedRegion(nextAddr, endAddr); 4520 assert(dirtyRegion.start() >= nextAddr, 4521 "returned region inconsistent?"); 4522 } 4523 // Remember where the next search should begin. 4524 // The returned region (if non-empty) is a right open interval, 4525 // so lastOffset is obtained from the right end of that 4526 // interval. 4527 lastAddr = dirtyRegion.end(); 4528 // Should do something more transparent and less hacky XXX 4529 numDirtyCards = 4530 _modUnionTable.heapWordDiffToOffsetDiff(dirtyRegion.word_size()); 4531 4532 // We'll scan the cards in the dirty region (with periodic 4533 // yields for foreground GC as needed). 4534 if (!dirtyRegion.is_empty()) { 4535 assert(numDirtyCards > 0, "consistency check"); 4536 HeapWord* stop_point = NULL; 4537 { 4538 stopTimer(); 4539 CMSTokenSyncWithLocks ts(true, gen->freelistLock(), 4540 bitMapLock()); 4541 startTimer(); 4542 verify_work_stacks_empty(); 4543 verify_overflow_empty(); 4544 sample_eden(); 4545 stop_point = 4546 gen->cmsSpace()->object_iterate_careful_m(dirtyRegion, cl); 4547 } 4548 if (stop_point != NULL) { 4549 // The careful iteration stopped early either because it found an 4550 // uninitialized object, or because we were in the midst of an 4551 // "abortable preclean", which should now be aborted. Redirty 4552 // the bits corresponding to the partially-scanned or unscanned 4553 // cards. We'll either restart at the next block boundary or 4554 // abort the preclean. 4555 assert((CMSPermGenPrecleaningEnabled && (gen == _permGen)) || 4556 (_collectorState == AbortablePreclean && should_abort_preclean()), 4557 "Unparsable objects should only be in perm gen."); 4558 4559 stopTimer(); 4560 CMSTokenSyncWithLocks ts(true, bitMapLock()); 4561 startTimer(); 4562 _modUnionTable.mark_range(MemRegion(stop_point, dirtyRegion.end())); 4563 if (should_abort_preclean()) { 4564 break; // out of preclean loop 4565 } else { 4566 // Compute the next address at which preclean should pick up; 4567 // might need bitMapLock in order to read P-bits. 4568 lastAddr = next_card_start_after_block(stop_point); 4569 } 4570 } 4571 } else { 4572 assert(lastAddr == endAddr, "consistency check"); 4573 assert(numDirtyCards == 0, "consistency check"); 4574 break; 4575 } 4576 } 4577 verify_work_stacks_empty(); 4578 verify_overflow_empty(); 4579 return cumNumDirtyCards; 4580 } 4581 4582 // NOTE: preclean_mod_union_table() above and preclean_card_table() 4583 // below are largely identical; if you need to modify 4584 // one of these methods, please check the other method too. 4585 4586 size_t CMSCollector::preclean_card_table(ConcurrentMarkSweepGeneration* gen, 4587 ScanMarkedObjectsAgainCarefullyClosure* cl) { 4588 // strategy: it's similar to precleamModUnionTable above, in that 4589 // we accumulate contiguous ranges of dirty cards, mark these cards 4590 // precleaned, then scan the region covered by these cards. 4591 HeapWord* endAddr = (HeapWord*)(gen->_virtual_space.high()); 4592 HeapWord* startAddr = (HeapWord*)(gen->_virtual_space.low()); 4593 4594 cl->setFreelistLock(gen->freelistLock()); // needed for yielding 4595 4596 size_t numDirtyCards, cumNumDirtyCards; 4597 HeapWord *lastAddr, *nextAddr; 4598 4599 for (cumNumDirtyCards = numDirtyCards = 0, 4600 nextAddr = lastAddr = startAddr; 4601 nextAddr < endAddr; 4602 nextAddr = lastAddr, cumNumDirtyCards += numDirtyCards) { 4603 4604 ResourceMark rm; 4605 HandleMark hm; 4606 4607 MemRegion dirtyRegion; 4608 { 4609 // See comments in "Precleaning notes" above on why we 4610 // do this locking. XXX Could the locking overheads be 4611 // too high when dirty cards are sparse? [I don't think so.] 4612 stopTimer(); 4613 CMSTokenSync x(true); // is cms thread 4614 startTimer(); 4615 sample_eden(); 4616 // Get and clear dirty region from card table 4617 dirtyRegion = _ct->ct_bs()->dirty_card_range_after_preclean( 4618 MemRegion(nextAddr, endAddr)); 4619 assert(dirtyRegion.start() >= nextAddr, 4620 "returned region inconsistent?"); 4621 } 4622 lastAddr = dirtyRegion.end(); 4623 numDirtyCards = 4624 dirtyRegion.word_size()/CardTableModRefBS::card_size_in_words; 4625 4626 if (!dirtyRegion.is_empty()) { 4627 stopTimer(); 4628 CMSTokenSyncWithLocks ts(true, gen->freelistLock(), bitMapLock()); 4629 startTimer(); 4630 sample_eden(); 4631 verify_work_stacks_empty(); 4632 verify_overflow_empty(); 4633 HeapWord* stop_point = 4634 gen->cmsSpace()->object_iterate_careful_m(dirtyRegion, cl); 4635 if (stop_point != NULL) { 4636 // The careful iteration stopped early because it found an 4637 // uninitialized object. Redirty the bits corresponding to the 4638 // partially-scanned or unscanned cards, and start again at the 4639 // next block boundary. 4640 assert(CMSPermGenPrecleaningEnabled || 4641 (_collectorState == AbortablePreclean && should_abort_preclean()), 4642 "Unparsable objects should only be in perm gen."); 4643 _ct->ct_bs()->invalidate(MemRegion(stop_point, dirtyRegion.end())); 4644 if (should_abort_preclean()) { 4645 break; // out of preclean loop 4646 } else { 4647 // Compute the next address at which preclean should pick up. 4648 lastAddr = next_card_start_after_block(stop_point); 4649 } 4650 } 4651 } else { 4652 break; 4653 } 4654 } 4655 verify_work_stacks_empty(); 4656 verify_overflow_empty(); 4657 return cumNumDirtyCards; 4658 } 4659 4660 void CMSCollector::checkpointRootsFinal(bool asynch, 4661 bool clear_all_soft_refs, bool init_mark_was_synchronous) { 4662 assert(_collectorState == FinalMarking, "incorrect state transition?"); 4663 check_correct_thread_executing(); 4664 // world is stopped at this checkpoint 4665 assert(SafepointSynchronize::is_at_safepoint(), 4666 "world should be stopped"); 4667 verify_work_stacks_empty(); 4668 verify_overflow_empty(); 4669 4670 SpecializationStats::clear(); 4671 if (PrintGCDetails) { 4672 gclog_or_tty->print("[YG occupancy: "SIZE_FORMAT" K ("SIZE_FORMAT" K)]", 4673 _young_gen->used() / K, 4674 _young_gen->capacity() / K); 4675 } 4676 if (asynch) { 4677 if (CMSScavengeBeforeRemark) { 4678 GenCollectedHeap* gch = GenCollectedHeap::heap(); 4679 // Temporarily set flag to false, GCH->do_collection will 4680 // expect it to be false and set to true 4681 FlagSetting fl(gch->_is_gc_active, false); 4682 NOT_PRODUCT(TraceTime t("Scavenge-Before-Remark", 4683 PrintGCDetails && Verbose, true, gclog_or_tty);) 4684 int level = _cmsGen->level() - 1; 4685 if (level >= 0) { 4686 gch->do_collection(true, // full (i.e. force, see below) 4687 false, // !clear_all_soft_refs 4688 0, // size 4689 false, // is_tlab 4690 level // max_level 4691 ); 4692 } 4693 } 4694 FreelistLocker x(this); 4695 MutexLockerEx y(bitMapLock(), 4696 Mutex::_no_safepoint_check_flag); 4697 assert(!init_mark_was_synchronous, "but that's impossible!"); 4698 checkpointRootsFinalWork(asynch, clear_all_soft_refs, false); 4699 } else { 4700 // already have all the locks 4701 checkpointRootsFinalWork(asynch, clear_all_soft_refs, 4702 init_mark_was_synchronous); 4703 } 4704 verify_work_stacks_empty(); 4705 verify_overflow_empty(); 4706 SpecializationStats::print(); 4707 } 4708 4709 void CMSCollector::checkpointRootsFinalWork(bool asynch, 4710 bool clear_all_soft_refs, bool init_mark_was_synchronous) { 4711 4712 NOT_PRODUCT(TraceTime tr("checkpointRootsFinalWork", PrintGCDetails, false, gclog_or_tty);) 4713 4714 assert(haveFreelistLocks(), "must have free list locks"); 4715 assert_lock_strong(bitMapLock()); 4716 4717 if (UseAdaptiveSizePolicy) { 4718 size_policy()->checkpoint_roots_final_begin(); 4719 } 4720 4721 ResourceMark rm; 4722 HandleMark hm; 4723 4724 GenCollectedHeap* gch = GenCollectedHeap::heap(); 4725 4726 if (cms_should_unload_classes()) { 4727 CodeCache::gc_prologue(); 4728 } 4729 assert(haveFreelistLocks(), "must have free list locks"); 4730 assert_lock_strong(bitMapLock()); 4731 4732 if (!init_mark_was_synchronous) { 4733 // We might assume that we need not fill TLAB's when 4734 // CMSScavengeBeforeRemark is set, because we may have just done 4735 // a scavenge which would have filled all TLAB's -- and besides 4736 // Eden would be empty. This however may not always be the case -- 4737 // for instance although we asked for a scavenge, it may not have 4738 // happened because of a JNI critical section. We probably need 4739 // a policy for deciding whether we can in that case wait until 4740 // the critical section releases and then do the remark following 4741 // the scavenge, and skip it here. In the absence of that policy, 4742 // or of an indication of whether the scavenge did indeed occur, 4743 // we cannot rely on TLAB's having been filled and must do 4744 // so here just in case a scavenge did not happen. 4745 gch->ensure_parsability(false); // fill TLAB's, but no need to retire them 4746 // Update the saved marks which may affect the root scans. 4747 gch->save_marks(); 4748 4749 { 4750 COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact;) 4751 4752 // Note on the role of the mod union table: 4753 // Since the marker in "markFromRoots" marks concurrently with 4754 // mutators, it is possible for some reachable objects not to have been 4755 // scanned. For instance, an only reference to an object A was 4756 // placed in object B after the marker scanned B. Unless B is rescanned, 4757 // A would be collected. Such updates to references in marked objects 4758 // are detected via the mod union table which is the set of all cards 4759 // dirtied since the first checkpoint in this GC cycle and prior to 4760 // the most recent young generation GC, minus those cleaned up by the 4761 // concurrent precleaning. 4762 if (CMSParallelRemarkEnabled && ParallelGCThreads > 0) { 4763 TraceTime t("Rescan (parallel) ", PrintGCDetails, false, gclog_or_tty); 4764 do_remark_parallel(); 4765 } else { 4766 TraceTime t("Rescan (non-parallel) ", PrintGCDetails, false, 4767 gclog_or_tty); 4768 do_remark_non_parallel(); 4769 } 4770 } 4771 } else { 4772 assert(!asynch, "Can't have init_mark_was_synchronous in asynch mode"); 4773 // The initial mark was stop-world, so there's no rescanning to 4774 // do; go straight on to the next step below. 4775 } 4776 verify_work_stacks_empty(); 4777 verify_overflow_empty(); 4778 4779 { 4780 NOT_PRODUCT(TraceTime ts("refProcessingWork", PrintGCDetails, false, gclog_or_tty);) 4781 refProcessingWork(asynch, clear_all_soft_refs); 4782 } 4783 verify_work_stacks_empty(); 4784 verify_overflow_empty(); 4785 4786 if (cms_should_unload_classes()) { 4787 CodeCache::gc_epilogue(); 4788 } 4789 4790 // If we encountered any (marking stack / work queue) overflow 4791 // events during the current CMS cycle, take appropriate 4792 // remedial measures, where possible, so as to try and avoid 4793 // recurrence of that condition. 4794 assert(_markStack.isEmpty(), "No grey objects"); 4795 size_t ser_ovflw = _ser_pmc_remark_ovflw + _ser_pmc_preclean_ovflw + 4796 _ser_kac_ovflw; 4797 if (ser_ovflw > 0) { 4798 if (PrintCMSStatistics != 0) { 4799 gclog_or_tty->print_cr("Marking stack overflow (benign) " 4800 "(pmc_pc="SIZE_FORMAT", pmc_rm="SIZE_FORMAT", kac="SIZE_FORMAT")", 4801 _ser_pmc_preclean_ovflw, _ser_pmc_remark_ovflw, 4802 _ser_kac_ovflw); 4803 } 4804 _markStack.expand(); 4805 _ser_pmc_remark_ovflw = 0; 4806 _ser_pmc_preclean_ovflw = 0; 4807 _ser_kac_ovflw = 0; 4808 } 4809 if (_par_pmc_remark_ovflw > 0 || _par_kac_ovflw > 0) { 4810 if (PrintCMSStatistics != 0) { 4811 gclog_or_tty->print_cr("Work queue overflow (benign) " 4812 "(pmc_rm="SIZE_FORMAT", kac="SIZE_FORMAT")", 4813 _par_pmc_remark_ovflw, _par_kac_ovflw); 4814 } 4815 _par_pmc_remark_ovflw = 0; 4816 _par_kac_ovflw = 0; 4817 } 4818 if (PrintCMSStatistics != 0) { 4819 if (_markStack._hit_limit > 0) { 4820 gclog_or_tty->print_cr(" (benign) Hit max stack size limit ("SIZE_FORMAT")", 4821 _markStack._hit_limit); 4822 } 4823 if (_markStack._failed_double > 0) { 4824 gclog_or_tty->print_cr(" (benign) Failed stack doubling ("SIZE_FORMAT")," 4825 " current capacity "SIZE_FORMAT, 4826 _markStack._failed_double, 4827 _markStack.capacity()); 4828 } 4829 } 4830 _markStack._hit_limit = 0; 4831 _markStack._failed_double = 0; 4832 4833 if ((VerifyAfterGC || VerifyDuringGC) && 4834 GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) { 4835 verify_after_remark(); 4836 } 4837 4838 // Change under the freelistLocks. 4839 _collectorState = Sweeping; 4840 // Call isAllClear() under bitMapLock 4841 assert(_modUnionTable.isAllClear(), "Should be clear by end of the" 4842 " final marking"); 4843 if (UseAdaptiveSizePolicy) { 4844 size_policy()->checkpoint_roots_final_end(gch->gc_cause()); 4845 } 4846 } 4847 4848 // Parallel remark task 4849 class CMSParRemarkTask: public AbstractGangTask { 4850 CMSCollector* _collector; 4851 WorkGang* _workers; 4852 int _n_workers; 4853 CompactibleFreeListSpace* _cms_space; 4854 CompactibleFreeListSpace* _perm_space; 4855 4856 // The per-thread work queues, available here for stealing. 4857 OopTaskQueueSet* _task_queues; 4858 ParallelTaskTerminator _term; 4859 4860 public: 4861 CMSParRemarkTask(CMSCollector* collector, 4862 CompactibleFreeListSpace* cms_space, 4863 CompactibleFreeListSpace* perm_space, 4864 int n_workers, WorkGang* workers, 4865 OopTaskQueueSet* task_queues): 4866 AbstractGangTask("Rescan roots and grey objects in parallel"), 4867 _collector(collector), 4868 _cms_space(cms_space), _perm_space(perm_space), 4869 _n_workers(n_workers), 4870 _workers(workers), 4871 _task_queues(task_queues), 4872 _term(workers->total_workers(), task_queues) { } 4873 4874 OopTaskQueueSet* task_queues() { return _task_queues; } 4875 4876 OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); } 4877 4878 ParallelTaskTerminator* terminator() { return &_term; } 4879 4880 void work(int i); 4881 4882 private: 4883 // Work method in support of parallel rescan ... of young gen spaces 4884 void do_young_space_rescan(int i, Par_MarkRefsIntoAndScanClosure* cl, 4885 ContiguousSpace* space, 4886 HeapWord** chunk_array, size_t chunk_top); 4887 4888 // ... of dirty cards in old space 4889 void do_dirty_card_rescan_tasks(CompactibleFreeListSpace* sp, int i, 4890 Par_MarkRefsIntoAndScanClosure* cl); 4891 4892 // ... work stealing for the above 4893 void do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl, int* seed); 4894 }; 4895 4896 void CMSParRemarkTask::work(int i) { 4897 elapsedTimer _timer; 4898 ResourceMark rm; 4899 HandleMark hm; 4900 4901 // ---------- rescan from roots -------------- 4902 _timer.start(); 4903 GenCollectedHeap* gch = GenCollectedHeap::heap(); 4904 Par_MarkRefsIntoAndScanClosure par_mrias_cl(_collector, 4905 _collector->_span, _collector->ref_processor(), 4906 &(_collector->_markBitMap), 4907 work_queue(i), &(_collector->_revisitStack)); 4908 4909 // Rescan young gen roots first since these are likely 4910 // coarsely partitioned and may, on that account, constitute 4911 // the critical path; thus, it's best to start off that 4912 // work first. 4913 // ---------- young gen roots -------------- 4914 { 4915 DefNewGeneration* dng = _collector->_young_gen->as_DefNewGeneration(); 4916 EdenSpace* eden_space = dng->eden(); 4917 ContiguousSpace* from_space = dng->from(); 4918 ContiguousSpace* to_space = dng->to(); 4919 4920 HeapWord** eca = _collector->_eden_chunk_array; 4921 size_t ect = _collector->_eden_chunk_index; 4922 HeapWord** sca = _collector->_survivor_chunk_array; 4923 size_t sct = _collector->_survivor_chunk_index; 4924 4925 assert(ect <= _collector->_eden_chunk_capacity, "out of bounds"); 4926 assert(sct <= _collector->_survivor_chunk_capacity, "out of bounds"); 4927 4928 do_young_space_rescan(i, &par_mrias_cl, to_space, NULL, 0); 4929 do_young_space_rescan(i, &par_mrias_cl, from_space, sca, sct); 4930 do_young_space_rescan(i, &par_mrias_cl, eden_space, eca, ect); 4931 4932 _timer.stop(); 4933 if (PrintCMSStatistics != 0) { 4934 gclog_or_tty->print_cr( 4935 "Finished young gen rescan work in %dth thread: %3.3f sec", 4936 i, _timer.seconds()); 4937 } 4938 } 4939 4940 // ---------- remaining roots -------------- 4941 _timer.reset(); 4942 _timer.start(); 4943 gch->gen_process_strong_roots(_collector->_cmsGen->level(), 4944 false, // yg was scanned above 4945 true, // collecting perm gen 4946 SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()), 4947 NULL, &par_mrias_cl); 4948 _timer.stop(); 4949 if (PrintCMSStatistics != 0) { 4950 gclog_or_tty->print_cr( 4951 "Finished remaining root rescan work in %dth thread: %3.3f sec", 4952 i, _timer.seconds()); 4953 } 4954 4955 // ---------- rescan dirty cards ------------ 4956 _timer.reset(); 4957 _timer.start(); 4958 4959 // Do the rescan tasks for each of the two spaces 4960 // (cms_space and perm_space) in turn. 4961 do_dirty_card_rescan_tasks(_cms_space, i, &par_mrias_cl); 4962 do_dirty_card_rescan_tasks(_perm_space, i, &par_mrias_cl); 4963 _timer.stop(); 4964 if (PrintCMSStatistics != 0) { 4965 gclog_or_tty->print_cr( 4966 "Finished dirty card rescan work in %dth thread: %3.3f sec", 4967 i, _timer.seconds()); 4968 } 4969 4970 // ---------- steal work from other threads ... 4971 // ---------- ... and drain overflow list. 4972 _timer.reset(); 4973 _timer.start(); 4974 do_work_steal(i, &par_mrias_cl, _collector->hash_seed(i)); 4975 _timer.stop(); 4976 if (PrintCMSStatistics != 0) { 4977 gclog_or_tty->print_cr( 4978 "Finished work stealing in %dth thread: %3.3f sec", 4979 i, _timer.seconds()); 4980 } 4981 } 4982 4983 void 4984 CMSParRemarkTask::do_young_space_rescan(int i, 4985 Par_MarkRefsIntoAndScanClosure* cl, ContiguousSpace* space, 4986 HeapWord** chunk_array, size_t chunk_top) { 4987 // Until all tasks completed: 4988 // . claim an unclaimed task 4989 // . compute region boundaries corresponding to task claimed 4990 // using chunk_array 4991 // . par_oop_iterate(cl) over that region 4992 4993 ResourceMark rm; 4994 HandleMark hm; 4995 4996 SequentialSubTasksDone* pst = space->par_seq_tasks(); 4997 assert(pst->valid(), "Uninitialized use?"); 4998 4999 int nth_task = 0; 5000 int n_tasks = pst->n_tasks(); 5001 5002 HeapWord *start, *end; 5003 while (!pst->is_task_claimed(/* reference */ nth_task)) { 5004 // We claimed task # nth_task; compute its boundaries. 5005 if (chunk_top == 0) { // no samples were taken 5006 assert(nth_task == 0 && n_tasks == 1, "Can have only 1 EdenSpace task"); 5007 start = space->bottom(); 5008 end = space->top(); 5009 } else if (nth_task == 0) { 5010 start = space->bottom(); 5011 end = chunk_array[nth_task]; 5012 } else if (nth_task < (jint)chunk_top) { 5013 assert(nth_task >= 1, "Control point invariant"); 5014 start = chunk_array[nth_task - 1]; 5015 end = chunk_array[nth_task]; 5016 } else { 5017 assert(nth_task == (jint)chunk_top, "Control point invariant"); 5018 start = chunk_array[chunk_top - 1]; 5019 end = space->top(); 5020 } 5021 MemRegion mr(start, end); 5022 // Verify that mr is in space 5023 assert(mr.is_empty() || space->used_region().contains(mr), 5024 "Should be in space"); 5025 // Verify that "start" is an object boundary 5026 assert(mr.is_empty() || oop(mr.start())->is_oop(), 5027 "Should be an oop"); 5028 space->par_oop_iterate(mr, cl); 5029 } 5030 pst->all_tasks_completed(); 5031 } 5032 5033 void 5034 CMSParRemarkTask::do_dirty_card_rescan_tasks( 5035 CompactibleFreeListSpace* sp, int i, 5036 Par_MarkRefsIntoAndScanClosure* cl) { 5037 // Until all tasks completed: 5038 // . claim an unclaimed task 5039 // . compute region boundaries corresponding to task claimed 5040 // . transfer dirty bits ct->mut for that region 5041 // . apply rescanclosure to dirty mut bits for that region 5042 5043 ResourceMark rm; 5044 HandleMark hm; 5045 5046 OopTaskQueue* work_q = work_queue(i); 5047 ModUnionClosure modUnionClosure(&(_collector->_modUnionTable)); 5048 // CAUTION! CAUTION! CAUTION! CAUTION! CAUTION! CAUTION! CAUTION! 5049 // CAUTION: This closure has state that persists across calls to 5050 // the work method dirty_range_iterate_clear() in that it has 5051 // imbedded in it a (subtype of) UpwardsObjectClosure. The 5052 // use of that state in the imbedded UpwardsObjectClosure instance 5053 // assumes that the cards are always iterated (even if in parallel 5054 // by several threads) in monotonically increasing order per each 5055 // thread. This is true of the implementation below which picks 5056 // card ranges (chunks) in monotonically increasing order globally 5057 // and, a-fortiori, in monotonically increasing order per thread 5058 // (the latter order being a subsequence of the former). 5059 // If the work code below is ever reorganized into a more chaotic 5060 // work-partitioning form than the current "sequential tasks" 5061 // paradigm, the use of that persistent state will have to be 5062 // revisited and modified appropriately. See also related 5063 // bug 4756801 work on which should examine this code to make 5064 // sure that the changes there do not run counter to the 5065 // assumptions made here and necessary for correctness and 5066 // efficiency. Note also that this code might yield inefficient 5067 // behaviour in the case of very large objects that span one or 5068 // more work chunks. Such objects would potentially be scanned 5069 // several times redundantly. Work on 4756801 should try and 5070 // address that performance anomaly if at all possible. XXX 5071 MemRegion full_span = _collector->_span; 5072 CMSBitMap* bm = &(_collector->_markBitMap); // shared 5073 CMSMarkStack* rs = &(_collector->_revisitStack); // shared 5074 MarkFromDirtyCardsClosure 5075 greyRescanClosure(_collector, full_span, // entire span of interest 5076 sp, bm, work_q, rs, cl); 5077 5078 SequentialSubTasksDone* pst = sp->conc_par_seq_tasks(); 5079 assert(pst->valid(), "Uninitialized use?"); 5080 int nth_task = 0; 5081 const int alignment = CardTableModRefBS::card_size * BitsPerWord; 5082 MemRegion span = sp->used_region(); 5083 HeapWord* start_addr = span.start(); 5084 HeapWord* end_addr = (HeapWord*)round_to((intptr_t)span.end(), 5085 alignment); 5086 const size_t chunk_size = sp->rescan_task_size(); // in HeapWord units 5087 assert((HeapWord*)round_to((intptr_t)start_addr, alignment) == 5088 start_addr, "Check alignment"); 5089 assert((size_t)round_to((intptr_t)chunk_size, alignment) == 5090 chunk_size, "Check alignment"); 5091 5092 while (!pst->is_task_claimed(/* reference */ nth_task)) { 5093 // Having claimed the nth_task, compute corresponding mem-region, 5094 // which is a-fortiori aligned correctly (i.e. at a MUT bopundary). 5095 // The alignment restriction ensures that we do not need any 5096 // synchronization with other gang-workers while setting or 5097 // clearing bits in thus chunk of the MUT. 5098 MemRegion this_span = MemRegion(start_addr + nth_task*chunk_size, 5099 start_addr + (nth_task+1)*chunk_size); 5100 // The last chunk's end might be way beyond end of the 5101 // used region. In that case pull back appropriately. 5102 if (this_span.end() > end_addr) { 5103 this_span.set_end(end_addr); 5104 assert(!this_span.is_empty(), "Program logic (calculation of n_tasks)"); 5105 } 5106 // Iterate over the dirty cards covering this chunk, marking them 5107 // precleaned, and setting the corresponding bits in the mod union 5108 // table. Since we have been careful to partition at Card and MUT-word 5109 // boundaries no synchronization is needed between parallel threads. 5110 _collector->_ct->ct_bs()->dirty_card_iterate(this_span, 5111 &modUnionClosure); 5112 5113 // Having transferred these marks into the modUnionTable, 5114 // rescan the marked objects on the dirty cards in the modUnionTable. 5115 // Even if this is at a synchronous collection, the initial marking 5116 // may have been done during an asynchronous collection so there 5117 // may be dirty bits in the mod-union table. 5118 _collector->_modUnionTable.dirty_range_iterate_clear( 5119 this_span, &greyRescanClosure); 5120 _collector->_modUnionTable.verifyNoOneBitsInRange( 5121 this_span.start(), 5122 this_span.end()); 5123 } 5124 pst->all_tasks_completed(); // declare that i am done 5125 } 5126 5127 // . see if we can share work_queues with ParNew? XXX 5128 void 5129 CMSParRemarkTask::do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl, 5130 int* seed) { 5131 OopTaskQueue* work_q = work_queue(i); 5132 NOT_PRODUCT(int num_steals = 0;) 5133 oop obj_to_scan; 5134 CMSBitMap* bm = &(_collector->_markBitMap); 5135 size_t num_from_overflow_list = 5136 MIN2((size_t)work_q->max_elems()/4, 5137 (size_t)ParGCDesiredObjsFromOverflowList); 5138 5139 while (true) { 5140 // Completely finish any left over work from (an) earlier round(s) 5141 cl->trim_queue(0); 5142 // Now check if there's any work in the overflow list 5143 if (_collector->par_take_from_overflow_list(num_from_overflow_list, 5144 work_q)) { 5145 // found something in global overflow list; 5146 // not yet ready to go stealing work from others. 5147 // We'd like to assert(work_q->size() != 0, ...) 5148 // because we just took work from the overflow list, 5149 // but of course we can't since all of that could have 5150 // been already stolen from us. 5151 // "He giveth and He taketh away." 5152 continue; 5153 } 5154 // Verify that we have no work before we resort to stealing 5155 assert(work_q->size() == 0, "Have work, shouldn't steal"); 5156 // Try to steal from other queues that have work 5157 if (task_queues()->steal(i, seed, /* reference */ obj_to_scan)) { 5158 NOT_PRODUCT(num_steals++;) 5159 assert(obj_to_scan->is_oop(), "Oops, not an oop!"); 5160 assert(bm->isMarked((HeapWord*)obj_to_scan), "Stole an unmarked oop?"); 5161 // Do scanning work 5162 obj_to_scan->oop_iterate(cl); 5163 // Loop around, finish this work, and try to steal some more 5164 } else if (terminator()->offer_termination()) { 5165 break; // nirvana from the infinite cycle 5166 } 5167 } 5168 NOT_PRODUCT( 5169 if (PrintCMSStatistics != 0) { 5170 gclog_or_tty->print("\n\t(%d: stole %d oops)", i, num_steals); 5171 } 5172 ) 5173 assert(work_q->size() == 0 && _collector->overflow_list_is_empty(), 5174 "Else our work is not yet done"); 5175 } 5176 5177 // Return a thread-local PLAB recording array, as appropriate. 5178 void* CMSCollector::get_data_recorder(int thr_num) { 5179 if (_survivor_plab_array != NULL && 5180 (CMSPLABRecordAlways || 5181 (_collectorState > Marking && _collectorState < FinalMarking))) { 5182 assert(thr_num < (int)ParallelGCThreads, "thr_num is out of bounds"); 5183 ChunkArray* ca = &_survivor_plab_array[thr_num]; 5184 ca->reset(); // clear it so that fresh data is recorded 5185 return (void*) ca; 5186 } else { 5187 return NULL; 5188 } 5189 } 5190 5191 // Reset all the thread-local PLAB recording arrays 5192 void CMSCollector::reset_survivor_plab_arrays() { 5193 for (uint i = 0; i < ParallelGCThreads; i++) { 5194 _survivor_plab_array[i].reset(); 5195 } 5196 } 5197 5198 // Merge the per-thread plab arrays into the global survivor chunk 5199 // array which will provide the partitioning of the survivor space 5200 // for CMS rescan. 5201 void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv) { 5202 assert(_survivor_plab_array != NULL, "Error"); 5203 assert(_survivor_chunk_array != NULL, "Error"); 5204 assert(_collectorState == FinalMarking, "Error"); 5205 for (uint j = 0; j < ParallelGCThreads; j++) { 5206 _cursor[j] = 0; 5207 } 5208 HeapWord* top = surv->top(); 5209 size_t i; 5210 for (i = 0; i < _survivor_chunk_capacity; i++) { // all sca entries 5211 HeapWord* min_val = top; // Higher than any PLAB address 5212 uint min_tid = 0; // position of min_val this round 5213 for (uint j = 0; j < ParallelGCThreads; j++) { 5214 ChunkArray* cur_sca = &_survivor_plab_array[j]; 5215 if (_cursor[j] == cur_sca->end()) { 5216 continue; 5217 } 5218 assert(_cursor[j] < cur_sca->end(), "ctl pt invariant"); 5219 HeapWord* cur_val = cur_sca->nth(_cursor[j]); 5220 assert(surv->used_region().contains(cur_val), "Out of bounds value"); 5221 if (cur_val < min_val) { 5222 min_tid = j; 5223 min_val = cur_val; 5224 } else { 5225 assert(cur_val < top, "All recorded addresses should be less"); 5226 } 5227 } 5228 // At this point min_val and min_tid are respectively 5229 // the least address in _survivor_plab_array[j]->nth(_cursor[j]) 5230 // and the thread (j) that witnesses that address. 5231 // We record this address in the _survivor_chunk_array[i] 5232 // and increment _cursor[min_tid] prior to the next round i. 5233 if (min_val == top) { 5234 break; 5235 } 5236 _survivor_chunk_array[i] = min_val; 5237 _cursor[min_tid]++; 5238 } 5239 // We are all done; record the size of the _survivor_chunk_array 5240 _survivor_chunk_index = i; // exclusive: [0, i) 5241 if (PrintCMSStatistics > 0) { 5242 gclog_or_tty->print(" (Survivor:" SIZE_FORMAT "chunks) ", i); 5243 } 5244 // Verify that we used up all the recorded entries 5245 #ifdef ASSERT 5246 size_t total = 0; 5247 for (uint j = 0; j < ParallelGCThreads; j++) { 5248 assert(_cursor[j] == _survivor_plab_array[j].end(), "Ctl pt invariant"); 5249 total += _cursor[j]; 5250 } 5251 assert(total == _survivor_chunk_index, "Ctl Pt Invariant"); 5252 // Check that the merged array is in sorted order 5253 if (total > 0) { 5254 for (size_t i = 0; i < total - 1; i++) { 5255 if (PrintCMSStatistics > 0) { 5256 gclog_or_tty->print(" (chunk" SIZE_FORMAT ":" INTPTR_FORMAT ") ", 5257 i, _survivor_chunk_array[i]); 5258 } 5259 assert(_survivor_chunk_array[i] < _survivor_chunk_array[i+1], 5260 "Not sorted"); 5261 } 5262 } 5263 #endif // ASSERT 5264 } 5265 5266 // Set up the space's par_seq_tasks structure for work claiming 5267 // for parallel rescan of young gen. 5268 // See ParRescanTask where this is currently used. 5269 void 5270 CMSCollector:: 5271 initialize_sequential_subtasks_for_young_gen_rescan(int n_threads) { 5272 assert(n_threads > 0, "Unexpected n_threads argument"); 5273 DefNewGeneration* dng = (DefNewGeneration*)_young_gen; 5274 5275 // Eden space 5276 { 5277 SequentialSubTasksDone* pst = dng->eden()->par_seq_tasks(); 5278 assert(!pst->valid(), "Clobbering existing data?"); 5279 // Each valid entry in [0, _eden_chunk_index) represents a task. 5280 size_t n_tasks = _eden_chunk_index + 1; 5281 assert(n_tasks == 1 || _eden_chunk_array != NULL, "Error"); 5282 pst->set_par_threads(n_threads); 5283 pst->set_n_tasks((int)n_tasks); 5284 } 5285 5286 // Merge the survivor plab arrays into _survivor_chunk_array 5287 if (_survivor_plab_array != NULL) { 5288 merge_survivor_plab_arrays(dng->from()); 5289 } else { 5290 assert(_survivor_chunk_index == 0, "Error"); 5291 } 5292 5293 // To space 5294 { 5295 SequentialSubTasksDone* pst = dng->to()->par_seq_tasks(); 5296 assert(!pst->valid(), "Clobbering existing data?"); 5297 pst->set_par_threads(n_threads); 5298 pst->set_n_tasks(1); 5299 assert(pst->valid(), "Error"); 5300 } 5301 5302 // From space 5303 { 5304 SequentialSubTasksDone* pst = dng->from()->par_seq_tasks(); 5305 assert(!pst->valid(), "Clobbering existing data?"); 5306 size_t n_tasks = _survivor_chunk_index + 1; 5307 assert(n_tasks == 1 || _survivor_chunk_array != NULL, "Error"); 5308 pst->set_par_threads(n_threads); 5309 pst->set_n_tasks((int)n_tasks); 5310 assert(pst->valid(), "Error"); 5311 } 5312 } 5313 5314 // Parallel version of remark 5315 void CMSCollector::do_remark_parallel() { 5316 GenCollectedHeap* gch = GenCollectedHeap::heap(); 5317 WorkGang* workers = gch->workers(); 5318 assert(workers != NULL, "Need parallel worker threads."); 5319 int n_workers = workers->total_workers(); 5320 CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace(); 5321 CompactibleFreeListSpace* perm_space = _permGen->cmsSpace(); 5322 5323 CMSParRemarkTask tsk(this, 5324 cms_space, perm_space, 5325 n_workers, workers, task_queues()); 5326 5327 // Set up for parallel process_strong_roots work. 5328 gch->set_par_threads(n_workers); 5329 gch->change_strong_roots_parity(); 5330 // We won't be iterating over the cards in the card table updating 5331 // the younger_gen cards, so we shouldn't call the following else 5332 // the verification code as well as subsequent younger_refs_iterate 5333 // code would get confused. XXX 5334 // gch->rem_set()->prepare_for_younger_refs_iterate(true); // parallel 5335 5336 // The young gen rescan work will not be done as part of 5337 // process_strong_roots (which currently doesn't knw how to 5338 // parallelize such a scan), but rather will be broken up into 5339 // a set of parallel tasks (via the sampling that the [abortable] 5340 // preclean phase did of EdenSpace, plus the [two] tasks of 5341 // scanning the [two] survivor spaces. Further fine-grain 5342 // parallelization of the scanning of the survivor spaces 5343 // themselves, and of precleaning of the younger gen itself 5344 // is deferred to the future. 5345 initialize_sequential_subtasks_for_young_gen_rescan(n_workers); 5346 5347 // The dirty card rescan work is broken up into a "sequence" 5348 // of parallel tasks (per constituent space) that are dynamically 5349 // claimed by the parallel threads. 5350 cms_space->initialize_sequential_subtasks_for_rescan(n_workers); 5351 perm_space->initialize_sequential_subtasks_for_rescan(n_workers); 5352 5353 // It turns out that even when we're using 1 thread, doing the work in a 5354 // separate thread causes wide variance in run times. We can't help this 5355 // in the multi-threaded case, but we special-case n=1 here to get 5356 // repeatable measurements of the 1-thread overhead of the parallel code. 5357 if (n_workers > 1) { 5358 // Make refs discovery MT-safe 5359 ReferenceProcessorMTMutator mt(ref_processor(), true); 5360 workers->run_task(&tsk); 5361 } else { 5362 tsk.work(0); 5363 } 5364 gch->set_par_threads(0); // 0 ==> non-parallel. 5365 // restore, single-threaded for now, any preserved marks 5366 // as a result of work_q overflow 5367 restore_preserved_marks_if_any(); 5368 } 5369 5370 // Non-parallel version of remark 5371 void CMSCollector::do_remark_non_parallel() { 5372 ResourceMark rm; 5373 HandleMark hm; 5374 GenCollectedHeap* gch = GenCollectedHeap::heap(); 5375 MarkRefsIntoAndScanClosure 5376 mrias_cl(_span, ref_processor(), &_markBitMap, &_modUnionTable, 5377 &_markStack, &_revisitStack, this, 5378 false /* should_yield */, false /* not precleaning */); 5379 MarkFromDirtyCardsClosure 5380 markFromDirtyCardsClosure(this, _span, 5381 NULL, // space is set further below 5382 &_markBitMap, &_markStack, &_revisitStack, 5383 &mrias_cl); 5384 { 5385 TraceTime t("grey object rescan", PrintGCDetails, false, gclog_or_tty); 5386 // Iterate over the dirty cards, marking them precleaned, and 5387 // setting the corresponding bits in the mod union table. 5388 { 5389 ModUnionClosure modUnionClosure(&_modUnionTable); 5390 _ct->ct_bs()->dirty_card_iterate( 5391 _cmsGen->used_region(), 5392 &modUnionClosure); 5393 _ct->ct_bs()->dirty_card_iterate( 5394 _permGen->used_region(), 5395 &modUnionClosure); 5396 } 5397 // Having transferred these marks into the modUnionTable, we just need 5398 // to rescan the marked objects on the dirty cards in the modUnionTable. 5399 // The initial marking may have been done during an asynchronous 5400 // collection so there may be dirty bits in the mod-union table. 5401 const int alignment = 5402 CardTableModRefBS::card_size * BitsPerWord; 5403 { 5404 // ... First handle dirty cards in CMS gen 5405 markFromDirtyCardsClosure.set_space(_cmsGen->cmsSpace()); 5406 MemRegion ur = _cmsGen->used_region(); 5407 HeapWord* lb = ur.start(); 5408 HeapWord* ub = (HeapWord*)round_to((intptr_t)ur.end(), alignment); 5409 MemRegion cms_span(lb, ub); 5410 _modUnionTable.dirty_range_iterate_clear(cms_span, 5411 &markFromDirtyCardsClosure); 5412 verify_work_stacks_empty(); 5413 if (PrintCMSStatistics != 0) { 5414 gclog_or_tty->print(" (re-scanned "SIZE_FORMAT" dirty cards in cms gen) ", 5415 markFromDirtyCardsClosure.num_dirty_cards()); 5416 } 5417 } 5418 { 5419 // .. and then repeat for dirty cards in perm gen 5420 markFromDirtyCardsClosure.set_space(_permGen->cmsSpace()); 5421 MemRegion ur = _permGen->used_region(); 5422 HeapWord* lb = ur.start(); 5423 HeapWord* ub = (HeapWord*)round_to((intptr_t)ur.end(), alignment); 5424 MemRegion perm_span(lb, ub); 5425 _modUnionTable.dirty_range_iterate_clear(perm_span, 5426 &markFromDirtyCardsClosure); 5427 verify_work_stacks_empty(); 5428 if (PrintCMSStatistics != 0) { 5429 gclog_or_tty->print(" (re-scanned "SIZE_FORMAT" dirty cards in perm gen) ", 5430 markFromDirtyCardsClosure.num_dirty_cards()); 5431 } 5432 } 5433 } 5434 if (VerifyDuringGC && 5435 GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) { 5436 HandleMark hm; // Discard invalid handles created during verification 5437 Universe::verify(true); 5438 } 5439 { 5440 TraceTime t("root rescan", PrintGCDetails, false, gclog_or_tty); 5441 5442 verify_work_stacks_empty(); 5443 5444 gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel. 5445 gch->gen_process_strong_roots(_cmsGen->level(), 5446 true, // younger gens as roots 5447 true, // collecting perm gen 5448 SharedHeap::ScanningOption(roots_scanning_options()), 5449 NULL, &mrias_cl); 5450 } 5451 verify_work_stacks_empty(); 5452 // Restore evacuated mark words, if any, used for overflow list links 5453 if (!CMSOverflowEarlyRestoration) { 5454 restore_preserved_marks_if_any(); 5455 } 5456 verify_overflow_empty(); 5457 } 5458 5459 //////////////////////////////////////////////////////// 5460 // Parallel Reference Processing Task Proxy Class 5461 //////////////////////////////////////////////////////// 5462 class CMSRefProcTaskProxy: public AbstractGangTask { 5463 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 5464 CMSCollector* _collector; 5465 CMSBitMap* _mark_bit_map; 5466 const MemRegion _span; 5467 OopTaskQueueSet* _task_queues; 5468 ParallelTaskTerminator _term; 5469 ProcessTask& _task; 5470 5471 public: 5472 CMSRefProcTaskProxy(ProcessTask& task, 5473 CMSCollector* collector, 5474 const MemRegion& span, 5475 CMSBitMap* mark_bit_map, 5476 int total_workers, 5477 OopTaskQueueSet* task_queues): 5478 AbstractGangTask("Process referents by policy in parallel"), 5479 _task(task), 5480 _collector(collector), _span(span), _mark_bit_map(mark_bit_map), 5481 _task_queues(task_queues), 5482 _term(total_workers, task_queues) 5483 { 5484 assert(_collector->_span.equals(_span) && !_span.is_empty(), 5485 "Inconsistency in _span"); 5486 } 5487 OopTaskQueueSet* task_queues() { return _task_queues; } 5488 5489 OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); } 5490 5491 ParallelTaskTerminator* terminator() { return &_term; } 5492 5493 void do_work_steal(int i, 5494 CMSParDrainMarkingStackClosure* drain, 5495 CMSParKeepAliveClosure* keep_alive, 5496 int* seed); 5497 5498 virtual void work(int i); 5499 }; 5500 5501 void CMSRefProcTaskProxy::work(int i) { 5502 assert(_collector->_span.equals(_span), "Inconsistency in _span"); 5503 CMSParKeepAliveClosure par_keep_alive(_collector, _span, 5504 _mark_bit_map, work_queue(i)); 5505 CMSParDrainMarkingStackClosure par_drain_stack(_collector, _span, 5506 _mark_bit_map, work_queue(i)); 5507 CMSIsAliveClosure is_alive_closure(_span, _mark_bit_map); 5508 _task.work(i, is_alive_closure, par_keep_alive, par_drain_stack); 5509 if (_task.marks_oops_alive()) { 5510 do_work_steal(i, &par_drain_stack, &par_keep_alive, 5511 _collector->hash_seed(i)); 5512 } 5513 assert(work_queue(i)->size() == 0, "work_queue should be empty"); 5514 assert(_collector->_overflow_list == NULL, "non-empty _overflow_list"); 5515 } 5516 5517 class CMSRefEnqueueTaskProxy: public AbstractGangTask { 5518 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 5519 EnqueueTask& _task; 5520 5521 public: 5522 CMSRefEnqueueTaskProxy(EnqueueTask& task) 5523 : AbstractGangTask("Enqueue reference objects in parallel"), 5524 _task(task) 5525 { } 5526 5527 virtual void work(int i) 5528 { 5529 _task.work(i); 5530 } 5531 }; 5532 5533 CMSParKeepAliveClosure::CMSParKeepAliveClosure(CMSCollector* collector, 5534 MemRegion span, CMSBitMap* bit_map, OopTaskQueue* work_queue): 5535 _collector(collector), 5536 _span(span), 5537 _bit_map(bit_map), 5538 _work_queue(work_queue), 5539 _mark_and_push(collector, span, bit_map, work_queue), 5540 _low_water_mark(MIN2((uint)(work_queue->max_elems()/4), 5541 (uint)(CMSWorkQueueDrainThreshold * ParallelGCThreads))) 5542 { } 5543 5544 // . see if we can share work_queues with ParNew? XXX 5545 void CMSRefProcTaskProxy::do_work_steal(int i, 5546 CMSParDrainMarkingStackClosure* drain, 5547 CMSParKeepAliveClosure* keep_alive, 5548 int* seed) { 5549 OopTaskQueue* work_q = work_queue(i); 5550 NOT_PRODUCT(int num_steals = 0;) 5551 oop obj_to_scan; 5552 size_t num_from_overflow_list = 5553 MIN2((size_t)work_q->max_elems()/4, 5554 (size_t)ParGCDesiredObjsFromOverflowList); 5555 5556 while (true) { 5557 // Completely finish any left over work from (an) earlier round(s) 5558 drain->trim_queue(0); 5559 // Now check if there's any work in the overflow list 5560 if (_collector->par_take_from_overflow_list(num_from_overflow_list, 5561 work_q)) { 5562 // Found something in global overflow list; 5563 // not yet ready to go stealing work from others. 5564 // We'd like to assert(work_q->size() != 0, ...) 5565 // because we just took work from the overflow list, 5566 // but of course we can't, since all of that might have 5567 // been already stolen from us. 5568 continue; 5569 } 5570 // Verify that we have no work before we resort to stealing 5571 assert(work_q->size() == 0, "Have work, shouldn't steal"); 5572 // Try to steal from other queues that have work 5573 if (task_queues()->steal(i, seed, /* reference */ obj_to_scan)) { 5574 NOT_PRODUCT(num_steals++;) 5575 assert(obj_to_scan->is_oop(), "Oops, not an oop!"); 5576 assert(_mark_bit_map->isMarked((HeapWord*)obj_to_scan), "Stole an unmarked oop?"); 5577 // Do scanning work 5578 obj_to_scan->oop_iterate(keep_alive); 5579 // Loop around, finish this work, and try to steal some more 5580 } else if (terminator()->offer_termination()) { 5581 break; // nirvana from the infinite cycle 5582 } 5583 } 5584 NOT_PRODUCT( 5585 if (PrintCMSStatistics != 0) { 5586 gclog_or_tty->print("\n\t(%d: stole %d oops)", i, num_steals); 5587 } 5588 ) 5589 } 5590 5591 void CMSRefProcTaskExecutor::execute(ProcessTask& task) 5592 { 5593 GenCollectedHeap* gch = GenCollectedHeap::heap(); 5594 WorkGang* workers = gch->workers(); 5595 assert(workers != NULL, "Need parallel worker threads."); 5596 int n_workers = workers->total_workers(); 5597 CMSRefProcTaskProxy rp_task(task, &_collector, 5598 _collector.ref_processor()->span(), 5599 _collector.markBitMap(), 5600 n_workers, _collector.task_queues()); 5601 workers->run_task(&rp_task); 5602 } 5603 5604 void CMSRefProcTaskExecutor::execute(EnqueueTask& task) 5605 { 5606 5607 GenCollectedHeap* gch = GenCollectedHeap::heap(); 5608 WorkGang* workers = gch->workers(); 5609 assert(workers != NULL, "Need parallel worker threads."); 5610 CMSRefEnqueueTaskProxy enq_task(task); 5611 workers->run_task(&enq_task); 5612 } 5613 5614 void CMSCollector::refProcessingWork(bool asynch, bool clear_all_soft_refs) { 5615 5616 ResourceMark rm; 5617 HandleMark hm; 5618 ReferencePolicy* soft_ref_policy; 5619 5620 assert(!ref_processor()->enqueuing_is_done(), "Enqueuing should not be complete"); 5621 // Process weak references. 5622 if (clear_all_soft_refs) { 5623 soft_ref_policy = new AlwaysClearPolicy(); 5624 } else { 5625 #ifdef COMPILER2 5626 soft_ref_policy = new LRUMaxHeapPolicy(); 5627 #else 5628 soft_ref_policy = new LRUCurrentHeapPolicy(); 5629 #endif // COMPILER2 5630 } 5631 verify_work_stacks_empty(); 5632 5633 ReferenceProcessor* rp = ref_processor(); 5634 assert(rp->span().equals(_span), "Spans should be equal"); 5635 CMSKeepAliveClosure cmsKeepAliveClosure(this, _span, &_markBitMap, 5636 &_markStack); 5637 CMSDrainMarkingStackClosure cmsDrainMarkingStackClosure(this, 5638 _span, &_markBitMap, &_markStack, 5639 &cmsKeepAliveClosure); 5640 { 5641 TraceTime t("weak refs processing", PrintGCDetails, false, gclog_or_tty); 5642 if (rp->processing_is_mt()) { 5643 CMSRefProcTaskExecutor task_executor(*this); 5644 rp->process_discovered_references(soft_ref_policy, 5645 &_is_alive_closure, 5646 &cmsKeepAliveClosure, 5647 &cmsDrainMarkingStackClosure, 5648 &task_executor); 5649 } else { 5650 rp->process_discovered_references(soft_ref_policy, 5651 &_is_alive_closure, 5652 &cmsKeepAliveClosure, 5653 &cmsDrainMarkingStackClosure, 5654 NULL); 5655 } 5656 verify_work_stacks_empty(); 5657 } 5658 5659 if (cms_should_unload_classes()) { 5660 { 5661 TraceTime t("class unloading", PrintGCDetails, false, gclog_or_tty); 5662 5663 // Follow SystemDictionary roots and unload classes 5664 bool purged_class = SystemDictionary::do_unloading(&_is_alive_closure); 5665 5666 // Follow CodeCache roots and unload any methods marked for unloading 5667 CodeCache::do_unloading(&_is_alive_closure, 5668 &cmsKeepAliveClosure, 5669 purged_class); 5670 5671 cmsDrainMarkingStackClosure.do_void(); 5672 verify_work_stacks_empty(); 5673 5674 // Update subklass/sibling/implementor links in KlassKlass descendants 5675 assert(!_revisitStack.isEmpty(), "revisit stack should not be empty"); 5676 oop k; 5677 while ((k = _revisitStack.pop()) != NULL) { 5678 ((Klass*)(oopDesc*)k)->follow_weak_klass_links( 5679 &_is_alive_closure, 5680 &cmsKeepAliveClosure); 5681 } 5682 assert(!ClassUnloading || 5683 (_markStack.isEmpty() && overflow_list_is_empty()), 5684 "Should not have found new reachable objects"); 5685 assert(_revisitStack.isEmpty(), "revisit stack should have been drained"); 5686 cmsDrainMarkingStackClosure.do_void(); 5687 verify_work_stacks_empty(); 5688 } 5689 5690 { 5691 TraceTime t("scrub symbol & string tables", PrintGCDetails, false, gclog_or_tty); 5692 // Now clean up stale oops in SymbolTable and StringTable 5693 SymbolTable::unlink(&_is_alive_closure); 5694 StringTable::unlink(&_is_alive_closure); 5695 } 5696 } 5697 5698 verify_work_stacks_empty(); 5699 // Restore any preserved marks as a result of mark stack or 5700 // work queue overflow 5701 restore_preserved_marks_if_any(); // done single-threaded for now 5702 5703 rp->set_enqueuing_is_done(true); 5704 if (rp->processing_is_mt()) { 5705 CMSRefProcTaskExecutor task_executor(*this); 5706 rp->enqueue_discovered_references(&task_executor); 5707 } else { 5708 rp->enqueue_discovered_references(NULL); 5709 } 5710 rp->verify_no_references_recorded(); 5711 assert(!rp->discovery_enabled(), "should have been disabled"); 5712 5713 // JVMTI object tagging is based on JNI weak refs. If any of these 5714 // refs were cleared then JVMTI needs to update its maps and 5715 // maybe post ObjectFrees to agents. 5716 JvmtiExport::cms_ref_processing_epilogue(); 5717 } 5718 5719 #ifndef PRODUCT 5720 void CMSCollector::check_correct_thread_executing() { 5721 Thread* t = Thread::current(); 5722 // Only the VM thread or the CMS thread should be here. 5723 assert(t->is_ConcurrentGC_thread() || t->is_VM_thread(), 5724 "Unexpected thread type"); 5725 // If this is the vm thread, the foreground process 5726 // should not be waiting. Note that _foregroundGCIsActive is 5727 // true while the foreground collector is waiting. 5728 if (_foregroundGCShouldWait) { 5729 // We cannot be the VM thread 5730 assert(t->is_ConcurrentGC_thread(), 5731 "Should be CMS thread"); 5732 } else { 5733 // We can be the CMS thread only if we are in a stop-world 5734 // phase of CMS collection. 5735 if (t->is_ConcurrentGC_thread()) { 5736 assert(_collectorState == InitialMarking || 5737 _collectorState == FinalMarking, 5738 "Should be a stop-world phase"); 5739 // The CMS thread should be holding the CMS_token. 5740 assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(), 5741 "Potential interference with concurrently " 5742 "executing VM thread"); 5743 } 5744 } 5745 } 5746 #endif 5747 5748 void CMSCollector::sweep(bool asynch) { 5749 assert(_collectorState == Sweeping, "just checking"); 5750 check_correct_thread_executing(); 5751 verify_work_stacks_empty(); 5752 verify_overflow_empty(); 5753 incrementSweepCount(); 5754 _sweep_timer.stop(); 5755 _sweep_estimate.sample(_sweep_timer.seconds()); 5756 size_policy()->avg_cms_free_at_sweep()->sample(_cmsGen->free()); 5757 5758 // PermGen verification support: If perm gen sweeping is disabled in 5759 // this cycle, we preserve the perm gen object "deadness" information 5760 // in the perm_gen_verify_bit_map. In order to do that we traverse 5761 // all blocks in perm gen and mark all dead objects. 5762 if (verifying() && !cms_should_unload_classes()) { 5763 CMSTokenSyncWithLocks ts(true, _permGen->freelistLock(), 5764 bitMapLock()); 5765 assert(perm_gen_verify_bit_map()->sizeInBits() != 0, 5766 "Should have already been allocated"); 5767 MarkDeadObjectsClosure mdo(this, _permGen->cmsSpace(), 5768 markBitMap(), perm_gen_verify_bit_map()); 5769 _permGen->cmsSpace()->blk_iterate(&mdo); 5770 } 5771 5772 if (asynch) { 5773 TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); 5774 CMSPhaseAccounting pa(this, "sweep", !PrintGCDetails); 5775 // First sweep the old gen then the perm gen 5776 { 5777 CMSTokenSyncWithLocks ts(true, _cmsGen->freelistLock(), 5778 bitMapLock()); 5779 sweepWork(_cmsGen, asynch); 5780 } 5781 5782 // Now repeat for perm gen 5783 if (cms_should_unload_classes()) { 5784 CMSTokenSyncWithLocks ts(true, _permGen->freelistLock(), 5785 bitMapLock()); 5786 sweepWork(_permGen, asynch); 5787 } 5788 5789 // Update Universe::_heap_*_at_gc figures. 5790 // We need all the free list locks to make the abstract state 5791 // transition from Sweeping to Resetting. See detailed note 5792 // further below. 5793 { 5794 CMSTokenSyncWithLocks ts(true, _cmsGen->freelistLock(), 5795 _permGen->freelistLock()); 5796 // Update heap occupancy information which is used as 5797 // input to soft ref clearing policy at the next gc. 5798 Universe::update_heap_info_at_gc(); 5799 _collectorState = Resizing; 5800 } 5801 } else { 5802 // already have needed locks 5803 sweepWork(_cmsGen, asynch); 5804 5805 if (cms_should_unload_classes()) { 5806 sweepWork(_permGen, asynch); 5807 } 5808 // Update heap occupancy information which is used as 5809 // input to soft ref clearing policy at the next gc. 5810 Universe::update_heap_info_at_gc(); 5811 _collectorState = Resizing; 5812 } 5813 verify_work_stacks_empty(); 5814 verify_overflow_empty(); 5815 5816 _sweep_timer.reset(); 5817 _sweep_timer.start(); 5818 5819 update_time_of_last_gc(os::javaTimeMillis()); 5820 5821 // NOTE on abstract state transitions: 5822 // Mutators allocate-live and/or mark the mod-union table dirty 5823 // based on the state of the collection. The former is done in 5824 // the interval [Marking, Sweeping] and the latter in the interval 5825 // [Marking, Sweeping). Thus the transitions into the Marking state 5826 // and out of the Sweeping state must be synchronously visible 5827 // globally to the mutators. 5828 // The transition into the Marking state happens with the world 5829 // stopped so the mutators will globally see it. Sweeping is 5830 // done asynchronously by the background collector so the transition 5831 // from the Sweeping state to the Resizing state must be done 5832 // under the freelistLock (as is the check for whether to 5833 // allocate-live and whether to dirty the mod-union table). 5834 assert(_collectorState == Resizing, "Change of collector state to" 5835 " Resizing must be done under the freelistLocks (plural)"); 5836 5837 // Now that sweeping has been completed, if the GCH's 5838 // incremental_collection_will_fail flag is set, clear it, 5839 // thus inviting a younger gen collection to promote into 5840 // this generation. If such a promotion may still fail, 5841 // the flag will be set again when a young collection is 5842 // attempted. 5843 // I think the incremental_collection_will_fail flag's use 5844 // is specific to a 2 generation collection policy, so i'll 5845 // assert that that's the configuration we are operating within. 5846 // The use of the flag can and should be generalized appropriately 5847 // in the future to deal with a general n-generation system. 5848 5849 GenCollectedHeap* gch = GenCollectedHeap::heap(); 5850 assert(gch->collector_policy()->is_two_generation_policy(), 5851 "Resetting of incremental_collection_will_fail flag" 5852 " may be incorrect otherwise"); 5853 gch->clear_incremental_collection_will_fail(); 5854 gch->update_full_collections_completed(_collection_count_start); 5855 } 5856 5857 // FIX ME!!! Looks like this belongs in CFLSpace, with 5858 // CMSGen merely delegating to it. 5859 void ConcurrentMarkSweepGeneration::setNearLargestChunk() { 5860 double nearLargestPercent = 0.999; 5861 HeapWord* minAddr = _cmsSpace->bottom(); 5862 HeapWord* largestAddr = 5863 (HeapWord*) _cmsSpace->dictionary()->findLargestDict(); 5864 if (largestAddr == 0) { 5865 // The dictionary appears to be empty. In this case 5866 // try to coalesce at the end of the heap. 5867 largestAddr = _cmsSpace->end(); 5868 } 5869 size_t largestOffset = pointer_delta(largestAddr, minAddr); 5870 size_t nearLargestOffset = 5871 (size_t)((double)largestOffset * nearLargestPercent) - MinChunkSize; 5872 _cmsSpace->set_nearLargestChunk(minAddr + nearLargestOffset); 5873 } 5874 5875 bool ConcurrentMarkSweepGeneration::isNearLargestChunk(HeapWord* addr) { 5876 return addr >= _cmsSpace->nearLargestChunk(); 5877 } 5878 5879 FreeChunk* ConcurrentMarkSweepGeneration::find_chunk_at_end() { 5880 return _cmsSpace->find_chunk_at_end(); 5881 } 5882 5883 void ConcurrentMarkSweepGeneration::update_gc_stats(int current_level, 5884 bool full) { 5885 // The next lower level has been collected. Gather any statistics 5886 // that are of interest at this point. 5887 if (!full && (current_level + 1) == level()) { 5888 // Gather statistics on the young generation collection. 5889 collector()->stats().record_gc0_end(used()); 5890 } 5891 } 5892 5893 CMSAdaptiveSizePolicy* ConcurrentMarkSweepGeneration::size_policy() { 5894 GenCollectedHeap* gch = GenCollectedHeap::heap(); 5895 assert(gch->kind() == CollectedHeap::GenCollectedHeap, 5896 "Wrong type of heap"); 5897 CMSAdaptiveSizePolicy* sp = (CMSAdaptiveSizePolicy*) 5898 gch->gen_policy()->size_policy(); 5899 assert(sp->is_gc_cms_adaptive_size_policy(), 5900 "Wrong type of size policy"); 5901 return sp; 5902 } 5903 5904 void ConcurrentMarkSweepGeneration::rotate_debug_collection_type() { 5905 if (PrintGCDetails && Verbose) { 5906 gclog_or_tty->print("Rotate from %d ", _debug_collection_type); 5907 } 5908 _debug_collection_type = (CollectionTypes) (_debug_collection_type + 1); 5909 _debug_collection_type = 5910 (CollectionTypes) (_debug_collection_type % Unknown_collection_type); 5911 if (PrintGCDetails && Verbose) { 5912 gclog_or_tty->print_cr("to %d ", _debug_collection_type); 5913 } 5914 } 5915 5916 void CMSCollector::sweepWork(ConcurrentMarkSweepGeneration* gen, 5917 bool asynch) { 5918 // We iterate over the space(s) underlying this generation, 5919 // checking the mark bit map to see if the bits corresponding 5920 // to specific blocks are marked or not. Blocks that are 5921 // marked are live and are not swept up. All remaining blocks 5922 // are swept up, with coalescing on-the-fly as we sweep up 5923 // contiguous free and/or garbage blocks: 5924 // We need to ensure that the sweeper synchronizes with allocators 5925 // and stop-the-world collectors. In particular, the following 5926 // locks are used: 5927 // . CMS token: if this is held, a stop the world collection cannot occur 5928 // . freelistLock: if this is held no allocation can occur from this 5929 // generation by another thread 5930 // . bitMapLock: if this is held, no other thread can access or update 5931 // 5932 5933 // Note that we need to hold the freelistLock if we use 5934 // block iterate below; else the iterator might go awry if 5935 // a mutator (or promotion) causes block contents to change 5936 // (for instance if the allocator divvies up a block). 5937 // If we hold the free list lock, for all practical purposes 5938 // young generation GC's can't occur (they'll usually need to 5939 // promote), so we might as well prevent all young generation 5940 // GC's while we do a sweeping step. For the same reason, we might 5941 // as well take the bit map lock for the entire duration 5942 5943 // check that we hold the requisite locks 5944 assert(have_cms_token(), "Should hold cms token"); 5945 assert( (asynch && ConcurrentMarkSweepThread::cms_thread_has_cms_token()) 5946 || (!asynch && ConcurrentMarkSweepThread::vm_thread_has_cms_token()), 5947 "Should possess CMS token to sweep"); 5948 assert_lock_strong(gen->freelistLock()); 5949 assert_lock_strong(bitMapLock()); 5950 5951 assert(!_sweep_timer.is_active(), "Was switched off in an outer context"); 5952 gen->cmsSpace()->beginSweepFLCensus((float)(_sweep_timer.seconds()), 5953 _sweep_estimate.padded_average()); 5954 gen->setNearLargestChunk(); 5955 5956 { 5957 SweepClosure sweepClosure(this, gen, &_markBitMap, 5958 CMSYield && asynch); 5959 gen->cmsSpace()->blk_iterate_careful(&sweepClosure); 5960 // We need to free-up/coalesce garbage/blocks from a 5961 // co-terminal free run. This is done in the SweepClosure 5962 // destructor; so, do not remove this scope, else the 5963 // end-of-sweep-census below will be off by a little bit. 5964 } 5965 gen->cmsSpace()->sweep_completed(); 5966 gen->cmsSpace()->endSweepFLCensus(sweepCount()); 5967 } 5968 5969 // Reset CMS data structures (for now just the marking bit map) 5970 // preparatory for the next cycle. 5971 void CMSCollector::reset(bool asynch) { 5972 GenCollectedHeap* gch = GenCollectedHeap::heap(); 5973 CMSAdaptiveSizePolicy* sp = size_policy(); 5974 AdaptiveSizePolicyOutput(sp, gch->total_collections()); 5975 if (asynch) { 5976 CMSTokenSyncWithLocks ts(true, bitMapLock()); 5977 5978 // If the state is not "Resetting", the foreground thread 5979 // has done a collection and the resetting. 5980 if (_collectorState != Resetting) { 5981 assert(_collectorState == Idling, "The state should only change" 5982 " because the foreground collector has finished the collection"); 5983 return; 5984 } 5985 5986 // Clear the mark bitmap (no grey objects to start with) 5987 // for the next cycle. 5988 TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); 5989 CMSPhaseAccounting cmspa(this, "reset", !PrintGCDetails); 5990 5991 HeapWord* curAddr = _markBitMap.startWord(); 5992 while (curAddr < _markBitMap.endWord()) { 5993 size_t remaining = pointer_delta(_markBitMap.endWord(), curAddr); 5994 MemRegion chunk(curAddr, MIN2(CMSBitMapYieldQuantum, remaining)); 5995 _markBitMap.clear_large_range(chunk); 5996 if (ConcurrentMarkSweepThread::should_yield() && 5997 !foregroundGCIsActive() && 5998 CMSYield) { 5999 assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(), 6000 "CMS thread should hold CMS token"); 6001 assert_lock_strong(bitMapLock()); 6002 bitMapLock()->unlock(); 6003 ConcurrentMarkSweepThread::desynchronize(true); 6004 ConcurrentMarkSweepThread::acknowledge_yield_request(); 6005 stopTimer(); 6006 if (PrintCMSStatistics != 0) { 6007 incrementYields(); 6008 } 6009 icms_wait(); 6010 6011 // See the comment in coordinator_yield() 6012 for (unsigned i = 0; i < CMSYieldSleepCount && 6013 ConcurrentMarkSweepThread::should_yield() && 6014 !CMSCollector::foregroundGCIsActive(); ++i) { 6015 os::sleep(Thread::current(), 1, false); 6016 ConcurrentMarkSweepThread::acknowledge_yield_request(); 6017 } 6018 6019 ConcurrentMarkSweepThread::synchronize(true); 6020 bitMapLock()->lock_without_safepoint_check(); 6021 startTimer(); 6022 } 6023 curAddr = chunk.end(); 6024 } 6025 _collectorState = Idling; 6026 } else { 6027 // already have the lock 6028 assert(_collectorState == Resetting, "just checking"); 6029 assert_lock_strong(bitMapLock()); 6030 _markBitMap.clear_all(); 6031 _collectorState = Idling; 6032 } 6033 6034 // Stop incremental mode after a cycle completes, so that any future cycles 6035 // are triggered by allocation. 6036 stop_icms(); 6037 6038 NOT_PRODUCT( 6039 if (RotateCMSCollectionTypes) { 6040 _cmsGen->rotate_debug_collection_type(); 6041 } 6042 ) 6043 } 6044 6045 void CMSCollector::do_CMS_operation(CMS_op_type op) { 6046 gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps); 6047 TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); 6048 TraceTime t("GC", PrintGC, !PrintGCDetails, gclog_or_tty); 6049 TraceCollectorStats tcs(counters()); 6050 6051 switch (op) { 6052 case CMS_op_checkpointRootsInitial: { 6053 checkpointRootsInitial(true); // asynch 6054 if (PrintGC) { 6055 _cmsGen->printOccupancy("initial-mark"); 6056 } 6057 break; 6058 } 6059 case CMS_op_checkpointRootsFinal: { 6060 checkpointRootsFinal(true, // asynch 6061 false, // !clear_all_soft_refs 6062 false); // !init_mark_was_synchronous 6063 if (PrintGC) { 6064 _cmsGen->printOccupancy("remark"); 6065 } 6066 break; 6067 } 6068 default: 6069 fatal("No such CMS_op"); 6070 } 6071 } 6072 6073 #ifndef PRODUCT 6074 size_t const CMSCollector::skip_header_HeapWords() { 6075 return FreeChunk::header_size(); 6076 } 6077 6078 // Try and collect here conditions that should hold when 6079 // CMS thread is exiting. The idea is that the foreground GC 6080 // thread should not be blocked if it wants to terminate 6081 // the CMS thread and yet continue to run the VM for a while 6082 // after that. 6083 void CMSCollector::verify_ok_to_terminate() const { 6084 assert(Thread::current()->is_ConcurrentGC_thread(), 6085 "should be called by CMS thread"); 6086 assert(!_foregroundGCShouldWait, "should be false"); 6087 // We could check here that all the various low-level locks 6088 // are not held by the CMS thread, but that is overkill; see 6089 // also CMSThread::verify_ok_to_terminate() where the CGC_lock 6090 // is checked. 6091 } 6092 #endif 6093 6094 size_t CMSCollector::block_size_using_printezis_bits(HeapWord* addr) const { 6095 assert(_markBitMap.isMarked(addr) && _markBitMap.isMarked(addr + 1), 6096 "missing Printezis mark?"); 6097 HeapWord* nextOneAddr = _markBitMap.getNextMarkedWordAddress(addr + 2); 6098 size_t size = pointer_delta(nextOneAddr + 1, addr); 6099 assert(size == CompactibleFreeListSpace::adjustObjectSize(size), 6100 "alignment problem"); 6101 assert(size >= 3, "Necessary for Printezis marks to work"); 6102 return size; 6103 } 6104 6105 // A variant of the above (block_size_using_printezis_bits()) except 6106 // that we return 0 if the P-bits are not yet set. 6107 size_t CMSCollector::block_size_if_printezis_bits(HeapWord* addr) const { 6108 if (_markBitMap.isMarked(addr)) { 6109 assert(_markBitMap.isMarked(addr + 1), "Missing Printezis bit?"); 6110 HeapWord* nextOneAddr = _markBitMap.getNextMarkedWordAddress(addr + 2); 6111 size_t size = pointer_delta(nextOneAddr + 1, addr); 6112 assert(size == CompactibleFreeListSpace::adjustObjectSize(size), 6113 "alignment problem"); 6114 assert(size >= 3, "Necessary for Printezis marks to work"); 6115 return size; 6116 } else { 6117 assert(!_markBitMap.isMarked(addr + 1), "Bit map inconsistency?"); 6118 return 0; 6119 } 6120 } 6121 6122 HeapWord* CMSCollector::next_card_start_after_block(HeapWord* addr) const { 6123 size_t sz = 0; 6124 oop p = (oop)addr; 6125 if (p->klass() != NULL && p->is_parsable()) { 6126 sz = CompactibleFreeListSpace::adjustObjectSize(p->size()); 6127 } else { 6128 sz = block_size_using_printezis_bits(addr); 6129 } 6130 assert(sz > 0, "size must be nonzero"); 6131 HeapWord* next_block = addr + sz; 6132 HeapWord* next_card = (HeapWord*)round_to((uintptr_t)next_block, 6133 CardTableModRefBS::card_size); 6134 assert(round_down((uintptr_t)addr, CardTableModRefBS::card_size) < 6135 round_down((uintptr_t)next_card, CardTableModRefBS::card_size), 6136 "must be different cards"); 6137 return next_card; 6138 } 6139 6140 6141 // CMS Bit Map Wrapper ///////////////////////////////////////// 6142 6143 // Construct a CMS bit map infrastructure, but don't create the 6144 // bit vector itself. That is done by a separate call CMSBitMap::allocate() 6145 // further below. 6146 CMSBitMap::CMSBitMap(int shifter, int mutex_rank, const char* mutex_name): 6147 _bm(NULL,0), 6148 _shifter(shifter), 6149 _lock(mutex_rank >= 0 ? new Mutex(mutex_rank, mutex_name, true) : NULL) 6150 { 6151 _bmStartWord = 0; 6152 _bmWordSize = 0; 6153 } 6154 6155 bool CMSBitMap::allocate(MemRegion mr) { 6156 _bmStartWord = mr.start(); 6157 _bmWordSize = mr.word_size(); 6158 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 6159 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 6160 if (!brs.is_reserved()) { 6161 warning("CMS bit map allocation failure"); 6162 return false; 6163 } 6164 // For now we'll just commit all of the bit map up fromt. 6165 // Later on we'll try to be more parsimonious with swap. 6166 if (!_virtual_space.initialize(brs, brs.size())) { 6167 warning("CMS bit map backing store failure"); 6168 return false; 6169 } 6170 assert(_virtual_space.committed_size() == brs.size(), 6171 "didn't reserve backing store for all of CMS bit map?"); 6172 _bm.set_map((uintptr_t*)_virtual_space.low()); 6173 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 6174 _bmWordSize, "inconsistency in bit map sizing"); 6175 _bm.set_size(_bmWordSize >> _shifter); 6176 6177 // bm.clear(); // can we rely on getting zero'd memory? verify below 6178 assert(isAllClear(), 6179 "Expected zero'd memory from ReservedSpace constructor"); 6180 assert(_bm.size() == heapWordDiffToOffsetDiff(sizeInWords()), 6181 "consistency check"); 6182 return true; 6183 } 6184 6185 void CMSBitMap::dirty_range_iterate_clear(MemRegion mr, MemRegionClosure* cl) { 6186 HeapWord *next_addr, *end_addr, *last_addr; 6187 assert_locked(); 6188 assert(covers(mr), "out-of-range error"); 6189 // XXX assert that start and end are appropriately aligned 6190 for (next_addr = mr.start(), end_addr = mr.end(); 6191 next_addr < end_addr; next_addr = last_addr) { 6192 MemRegion dirty_region = getAndClearMarkedRegion(next_addr, end_addr); 6193 last_addr = dirty_region.end(); 6194 if (!dirty_region.is_empty()) { 6195 cl->do_MemRegion(dirty_region); 6196 } else { 6197 assert(last_addr == end_addr, "program logic"); 6198 return; 6199 } 6200 } 6201 } 6202 6203 #ifndef PRODUCT 6204 void CMSBitMap::assert_locked() const { 6205 CMSLockVerifier::assert_locked(lock()); 6206 } 6207 6208 bool CMSBitMap::covers(MemRegion mr) const { 6209 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 6210 assert((size_t)_bm.size() == (_bmWordSize >> _shifter), 6211 "size inconsistency"); 6212 return (mr.start() >= _bmStartWord) && 6213 (mr.end() <= endWord()); 6214 } 6215 6216 bool CMSBitMap::covers(HeapWord* start, size_t size) const { 6217 return (start >= _bmStartWord && (start + size) <= endWord()); 6218 } 6219 6220 void CMSBitMap::verifyNoOneBitsInRange(HeapWord* left, HeapWord* right) { 6221 // verify that there are no 1 bits in the interval [left, right) 6222 FalseBitMapClosure falseBitMapClosure; 6223 iterate(&falseBitMapClosure, left, right); 6224 } 6225 6226 void CMSBitMap::region_invariant(MemRegion mr) 6227 { 6228 assert_locked(); 6229 // mr = mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 6230 assert(!mr.is_empty(), "unexpected empty region"); 6231 assert(covers(mr), "mr should be covered by bit map"); 6232 // convert address range into offset range 6233 size_t start_ofs = heapWordToOffset(mr.start()); 6234 // Make sure that end() is appropriately aligned 6235 assert(mr.end() == (HeapWord*)round_to((intptr_t)mr.end(), 6236 (1 << (_shifter+LogHeapWordSize))), 6237 "Misaligned mr.end()"); 6238 size_t end_ofs = heapWordToOffset(mr.end()); 6239 assert(end_ofs > start_ofs, "Should mark at least one bit"); 6240 } 6241 6242 #endif 6243 6244 bool CMSMarkStack::allocate(size_t size) { 6245 // allocate a stack of the requisite depth 6246 ReservedSpace rs(ReservedSpace::allocation_align_size_up( 6247 size * sizeof(oop))); 6248 if (!rs.is_reserved()) { 6249 warning("CMSMarkStack allocation failure"); 6250 return false; 6251 } 6252 if (!_virtual_space.initialize(rs, rs.size())) { 6253 warning("CMSMarkStack backing store failure"); 6254 return false; 6255 } 6256 assert(_virtual_space.committed_size() == rs.size(), 6257 "didn't reserve backing store for all of CMS stack?"); 6258 _base = (oop*)(_virtual_space.low()); 6259 _index = 0; 6260 _capacity = size; 6261 NOT_PRODUCT(_max_depth = 0); 6262 return true; 6263 } 6264 6265 // XXX FIX ME !!! In the MT case we come in here holding a 6266 // leaf lock. For printing we need to take a further lock 6267 // which has lower rank. We need to recallibrate the two 6268 // lock-ranks involved in order to be able to rpint the 6269 // messages below. (Or defer the printing to the caller. 6270 // For now we take the expedient path of just disabling the 6271 // messages for the problematic case.) 6272 void CMSMarkStack::expand() { 6273 assert(_capacity <= CMSMarkStackSizeMax, "stack bigger than permitted"); 6274 if (_capacity == CMSMarkStackSizeMax) { 6275 if (_hit_limit++ == 0 && !CMSConcurrentMTEnabled && PrintGCDetails) { 6276 // We print a warning message only once per CMS cycle. 6277 gclog_or_tty->print_cr(" (benign) Hit CMSMarkStack max size limit"); 6278 } 6279 return; 6280 } 6281 // Double capacity if possible 6282 size_t new_capacity = MIN2(_capacity*2, CMSMarkStackSizeMax); 6283 // Do not give up existing stack until we have managed to 6284 // get the double capacity that we desired. 6285 ReservedSpace rs(ReservedSpace::allocation_align_size_up( 6286 new_capacity * sizeof(oop))); 6287 if (rs.is_reserved()) { 6288 // Release the backing store associated with old stack 6289 _virtual_space.release(); 6290 // Reinitialize virtual space for new stack 6291 if (!_virtual_space.initialize(rs, rs.size())) { 6292 fatal("Not enough swap for expanded marking stack"); 6293 } 6294 _base = (oop*)(_virtual_space.low()); 6295 _index = 0; 6296 _capacity = new_capacity; 6297 } else if (_failed_double++ == 0 && !CMSConcurrentMTEnabled && PrintGCDetails) { 6298 // Failed to double capacity, continue; 6299 // we print a detail message only once per CMS cycle. 6300 gclog_or_tty->print(" (benign) Failed to expand marking stack from "SIZE_FORMAT"K to " 6301 SIZE_FORMAT"K", 6302 _capacity / K, new_capacity / K); 6303 } 6304 } 6305 6306 6307 // Closures 6308 // XXX: there seems to be a lot of code duplication here; 6309 // should refactor and consolidate common code. 6310 6311 // This closure is used to mark refs into the CMS generation in 6312 // the CMS bit map. Called at the first checkpoint. This closure 6313 // assumes that we do not need to re-mark dirty cards; if the CMS 6314 // generation on which this is used is not an oldest (modulo perm gen) 6315 // generation then this will lose younger_gen cards! 6316 6317 MarkRefsIntoClosure::MarkRefsIntoClosure( 6318 MemRegion span, CMSBitMap* bitMap, bool should_do_nmethods): 6319 _span(span), 6320 _bitMap(bitMap), 6321 _should_do_nmethods(should_do_nmethods) 6322 { 6323 assert(_ref_processor == NULL, "deliberately left NULL"); 6324 assert(_bitMap->covers(_span), "_bitMap/_span mismatch"); 6325 } 6326 6327 void MarkRefsIntoClosure::do_oop(oop* p) { 6328 // if p points into _span, then mark corresponding bit in _markBitMap 6329 oop thisOop = *p; 6330 if (thisOop != NULL) { 6331 assert(thisOop->is_oop(), "expected an oop"); 6332 HeapWord* addr = (HeapWord*)thisOop; 6333 if (_span.contains(addr)) { 6334 // this should be made more efficient 6335 _bitMap->mark(addr); 6336 } 6337 } 6338 } 6339 6340 // A variant of the above, used for CMS marking verification. 6341 MarkRefsIntoVerifyClosure::MarkRefsIntoVerifyClosure( 6342 MemRegion span, CMSBitMap* verification_bm, CMSBitMap* cms_bm, 6343 bool should_do_nmethods): 6344 _span(span), 6345 _verification_bm(verification_bm), 6346 _cms_bm(cms_bm), 6347 _should_do_nmethods(should_do_nmethods) { 6348 assert(_ref_processor == NULL, "deliberately left NULL"); 6349 assert(_verification_bm->covers(_span), "_verification_bm/_span mismatch"); 6350 } 6351 6352 void MarkRefsIntoVerifyClosure::do_oop(oop* p) { 6353 // if p points into _span, then mark corresponding bit in _markBitMap 6354 oop this_oop = *p; 6355 if (this_oop != NULL) { 6356 assert(this_oop->is_oop(), "expected an oop"); 6357 HeapWord* addr = (HeapWord*)this_oop; 6358 if (_span.contains(addr)) { 6359 _verification_bm->mark(addr); 6360 if (!_cms_bm->isMarked(addr)) { 6361 oop(addr)->print(); 6362 gclog_or_tty->print_cr(" ("INTPTR_FORMAT" should have been marked)", addr); 6363 fatal("... aborting"); 6364 } 6365 } 6366 } 6367 } 6368 6369 ////////////////////////////////////////////////// 6370 // MarkRefsIntoAndScanClosure 6371 ////////////////////////////////////////////////// 6372 6373 MarkRefsIntoAndScanClosure::MarkRefsIntoAndScanClosure(MemRegion span, 6374 ReferenceProcessor* rp, 6375 CMSBitMap* bit_map, 6376 CMSBitMap* mod_union_table, 6377 CMSMarkStack* mark_stack, 6378 CMSMarkStack* revisit_stack, 6379 CMSCollector* collector, 6380 bool should_yield, 6381 bool concurrent_precleaning): 6382 _collector(collector), 6383 _span(span), 6384 _bit_map(bit_map), 6385 _mark_stack(mark_stack), 6386 _pushAndMarkClosure(collector, span, rp, bit_map, mod_union_table, 6387 mark_stack, revisit_stack, concurrent_precleaning), 6388 _yield(should_yield), 6389 _concurrent_precleaning(concurrent_precleaning), 6390 _freelistLock(NULL) 6391 { 6392 _ref_processor = rp; 6393 assert(_ref_processor != NULL, "_ref_processor shouldn't be NULL"); 6394 } 6395 6396 // This closure is used to mark refs into the CMS generation at the 6397 // second (final) checkpoint, and to scan and transitively follow 6398 // the unmarked oops. It is also used during the concurrent precleaning 6399 // phase while scanning objects on dirty cards in the CMS generation. 6400 // The marks are made in the marking bit map and the marking stack is 6401 // used for keeping the (newly) grey objects during the scan. 6402 // The parallel version (Par_...) appears further below. 6403 void MarkRefsIntoAndScanClosure::do_oop(oop* p) { 6404 oop this_oop = *p; 6405 if (this_oop != NULL) { 6406 assert(this_oop->is_oop(), "expected an oop"); 6407 HeapWord* addr = (HeapWord*)this_oop; 6408 assert(_mark_stack->isEmpty(), "post-condition (eager drainage)"); 6409 assert(_collector->overflow_list_is_empty(), "should be empty"); 6410 if (_span.contains(addr) && 6411 !_bit_map->isMarked(addr)) { 6412 // mark bit map (object is now grey) 6413 _bit_map->mark(addr); 6414 // push on marking stack (stack should be empty), and drain the 6415 // stack by applying this closure to the oops in the oops popped 6416 // from the stack (i.e. blacken the grey objects) 6417 bool res = _mark_stack->push(this_oop); 6418 assert(res, "Should have space to push on empty stack"); 6419 do { 6420 oop new_oop = _mark_stack->pop(); 6421 assert(new_oop != NULL && new_oop->is_oop(), "Expected an oop"); 6422 assert(new_oop->is_parsable(), "Found unparsable oop"); 6423 assert(_bit_map->isMarked((HeapWord*)new_oop), 6424 "only grey objects on this stack"); 6425 // iterate over the oops in this oop, marking and pushing 6426 // the ones in CMS heap (i.e. in _span). 6427 new_oop->oop_iterate(&_pushAndMarkClosure); 6428 // check if it's time to yield 6429 do_yield_check(); 6430 } while (!_mark_stack->isEmpty() || 6431 (!_concurrent_precleaning && take_from_overflow_list())); 6432 // if marking stack is empty, and we are not doing this 6433 // during precleaning, then check the overflow list 6434 } 6435 assert(_mark_stack->isEmpty(), "post-condition (eager drainage)"); 6436 assert(_collector->overflow_list_is_empty(), 6437 "overflow list was drained above"); 6438 // We could restore evacuated mark words, if any, used for 6439 // overflow list links here because the overflow list is 6440 // provably empty here. That would reduce the maximum 6441 // size requirements for preserved_{oop,mark}_stack. 6442 // But we'll just postpone it until we are all done 6443 // so we can just stream through. 6444 if (!_concurrent_precleaning && CMSOverflowEarlyRestoration) { 6445 _collector->restore_preserved_marks_if_any(); 6446 assert(_collector->no_preserved_marks(), "No preserved marks"); 6447 } 6448 assert(!CMSOverflowEarlyRestoration || _collector->no_preserved_marks(), 6449 "All preserved marks should have been restored above"); 6450 } 6451 } 6452 6453 void MarkRefsIntoAndScanClosure::do_yield_work() { 6454 assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(), 6455 "CMS thread should hold CMS token"); 6456 assert_lock_strong(_freelistLock); 6457 assert_lock_strong(_bit_map->lock()); 6458 // relinquish the free_list_lock and bitMaplock() 6459 _bit_map->lock()->unlock(); 6460 _freelistLock->unlock(); 6461 ConcurrentMarkSweepThread::desynchronize(true); 6462 ConcurrentMarkSweepThread::acknowledge_yield_request(); 6463 _collector->stopTimer(); 6464 GCPauseTimer p(_collector->size_policy()->concurrent_timer_ptr()); 6465 if (PrintCMSStatistics != 0) { 6466 _collector->incrementYields(); 6467 } 6468 _collector->icms_wait(); 6469 6470 // See the comment in coordinator_yield() 6471 for (unsigned i = 0; i < CMSYieldSleepCount && 6472 ConcurrentMarkSweepThread::should_yield() && 6473 !CMSCollector::foregroundGCIsActive(); ++i) { 6474 os::sleep(Thread::current(), 1, false); 6475 ConcurrentMarkSweepThread::acknowledge_yield_request(); 6476 } 6477 6478 ConcurrentMarkSweepThread::synchronize(true); 6479 _freelistLock->lock_without_safepoint_check(); 6480 _bit_map->lock()->lock_without_safepoint_check(); 6481 _collector->startTimer(); 6482 } 6483 6484 /////////////////////////////////////////////////////////// 6485 // Par_MarkRefsIntoAndScanClosure: a parallel version of 6486 // MarkRefsIntoAndScanClosure 6487 /////////////////////////////////////////////////////////// 6488 Par_MarkRefsIntoAndScanClosure::Par_MarkRefsIntoAndScanClosure( 6489 CMSCollector* collector, MemRegion span, ReferenceProcessor* rp, 6490 CMSBitMap* bit_map, OopTaskQueue* work_queue, CMSMarkStack* revisit_stack): 6491 _span(span), 6492 _bit_map(bit_map), 6493 _work_queue(work_queue), 6494 _low_water_mark(MIN2((uint)(work_queue->max_elems()/4), 6495 (uint)(CMSWorkQueueDrainThreshold * ParallelGCThreads))), 6496 _par_pushAndMarkClosure(collector, span, rp, bit_map, work_queue, 6497 revisit_stack) 6498 { 6499 _ref_processor = rp; 6500 assert(_ref_processor != NULL, "_ref_processor shouldn't be NULL"); 6501 } 6502 6503 // This closure is used to mark refs into the CMS generation at the 6504 // second (final) checkpoint, and to scan and transitively follow 6505 // the unmarked oops. The marks are made in the marking bit map and 6506 // the work_queue is used for keeping the (newly) grey objects during 6507 // the scan phase whence they are also available for stealing by parallel 6508 // threads. Since the marking bit map is shared, updates are 6509 // synchronized (via CAS). 6510 void Par_MarkRefsIntoAndScanClosure::do_oop(oop* p) { 6511 oop this_oop = *p; 6512 if (this_oop != NULL) { 6513 // Ignore mark word because this could be an already marked oop 6514 // that may be chained at the end of the overflow list. 6515 assert(this_oop->is_oop(true /* ignore mark word */), "expected an oop"); 6516 HeapWord* addr = (HeapWord*)this_oop; 6517 if (_span.contains(addr) && 6518 !_bit_map->isMarked(addr)) { 6519 // mark bit map (object will become grey): 6520 // It is possible for several threads to be 6521 // trying to "claim" this object concurrently; 6522 // the unique thread that succeeds in marking the 6523 // object first will do the subsequent push on 6524 // to the work queue (or overflow list). 6525 if (_bit_map->par_mark(addr)) { 6526 // push on work_queue (which may not be empty), and trim the 6527 // queue to an appropriate length by applying this closure to 6528 // the oops in the oops popped from the stack (i.e. blacken the 6529 // grey objects) 6530 bool res = _work_queue->push(this_oop); 6531 assert(res, "Low water mark should be less than capacity?"); 6532 trim_queue(_low_water_mark); 6533 } // Else, another thread claimed the object 6534 } 6535 } 6536 } 6537 6538 // This closure is used to rescan the marked objects on the dirty cards 6539 // in the mod union table and the card table proper. 6540 size_t ScanMarkedObjectsAgainCarefullyClosure::do_object_careful_m( 6541 oop p, MemRegion mr) { 6542 6543 size_t size = 0; 6544 HeapWord* addr = (HeapWord*)p; 6545 DEBUG_ONLY(_collector->verify_work_stacks_empty();) 6546 assert(_span.contains(addr), "we are scanning the CMS generation"); 6547 // check if it's time to yield 6548 if (do_yield_check()) { 6549 // We yielded for some foreground stop-world work, 6550 // and we have been asked to abort this ongoing preclean cycle. 6551 return 0; 6552 } 6553 if (_bitMap->isMarked(addr)) { 6554 // it's marked; is it potentially uninitialized? 6555 if (p->klass() != NULL) { 6556 if (CMSPermGenPrecleaningEnabled && !p->is_parsable()) { 6557 // Signal precleaning to redirty the card since 6558 // the klass pointer is already installed. 6559 assert(size == 0, "Initial value"); 6560 } else { 6561 assert(p->is_parsable(), "must be parsable."); 6562 // an initialized object; ignore mark word in verification below 6563 // since we are running concurrent with mutators 6564 assert(p->is_oop(true), "should be an oop"); 6565 if (p->is_objArray()) { 6566 // objArrays are precisely marked; restrict scanning 6567 // to dirty cards only. 6568 size = p->oop_iterate(_scanningClosure, mr); 6569 assert(size == CompactibleFreeListSpace::adjustObjectSize(size), 6570 "adjustObjectSize should be the identity for array sizes, " 6571 "which are necessarily larger than minimum object size of " 6572 "two heap words"); 6573 } else { 6574 // A non-array may have been imprecisely marked; we need 6575 // to scan object in its entirety. 6576 size = CompactibleFreeListSpace::adjustObjectSize( 6577 p->oop_iterate(_scanningClosure)); 6578 } 6579 #ifdef DEBUG 6580 size_t direct_size = 6581 CompactibleFreeListSpace::adjustObjectSize(p->size()); 6582 assert(size == direct_size, "Inconsistency in size"); 6583 assert(size >= 3, "Necessary for Printezis marks to work"); 6584 if (!_bitMap->isMarked(addr+1)) { 6585 _bitMap->verifyNoOneBitsInRange(addr+2, addr+size); 6586 } else { 6587 _bitMap->verifyNoOneBitsInRange(addr+2, addr+size-1); 6588 assert(_bitMap->isMarked(addr+size-1), 6589 "inconsistent Printezis mark"); 6590 } 6591 #endif // DEBUG 6592 } 6593 } else { 6594 // an unitialized object 6595 assert(_bitMap->isMarked(addr+1), "missing Printezis mark?"); 6596 HeapWord* nextOneAddr = _bitMap->getNextMarkedWordAddress(addr + 2); 6597 size = pointer_delta(nextOneAddr + 1, addr); 6598 assert(size == CompactibleFreeListSpace::adjustObjectSize(size), 6599 "alignment problem"); 6600 // Note that pre-cleaning needn't redirty the card. OopDesc::set_klass() 6601 // will dirty the card when the klass pointer is installed in the 6602 // object (signalling the completion of initialization). 6603 } 6604 } else { 6605 // Either a not yet marked object or an uninitialized object 6606 if (p->klass() == NULL || !p->is_parsable()) { 6607 // An uninitialized object, skip to the next card, since 6608 // we may not be able to read its P-bits yet. 6609 assert(size == 0, "Initial value"); 6610 } else { 6611 // An object not (yet) reached by marking: we merely need to 6612 // compute its size so as to go look at the next block. 6613 assert(p->is_oop(true), "should be an oop"); 6614 size = CompactibleFreeListSpace::adjustObjectSize(p->size()); 6615 } 6616 } 6617 DEBUG_ONLY(_collector->verify_work_stacks_empty();) 6618 return size; 6619 } 6620 6621 void ScanMarkedObjectsAgainCarefullyClosure::do_yield_work() { 6622 assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(), 6623 "CMS thread should hold CMS token"); 6624 assert_lock_strong(_freelistLock); 6625 assert_lock_strong(_bitMap->lock()); 6626 // relinquish the free_list_lock and bitMaplock() 6627 _bitMap->lock()->unlock(); 6628 _freelistLock->unlock(); 6629 ConcurrentMarkSweepThread::desynchronize(true); 6630 ConcurrentMarkSweepThread::acknowledge_yield_request(); 6631 _collector->stopTimer(); 6632 GCPauseTimer p(_collector->size_policy()->concurrent_timer_ptr()); 6633 if (PrintCMSStatistics != 0) { 6634 _collector->incrementYields(); 6635 } 6636 _collector->icms_wait(); 6637 6638 // See the comment in coordinator_yield() 6639 for (unsigned i = 0; i < CMSYieldSleepCount && 6640 ConcurrentMarkSweepThread::should_yield() && 6641 !CMSCollector::foregroundGCIsActive(); ++i) { 6642 os::sleep(Thread::current(), 1, false); 6643 ConcurrentMarkSweepThread::acknowledge_yield_request(); 6644 } 6645 6646 ConcurrentMarkSweepThread::synchronize(true); 6647 _freelistLock->lock_without_safepoint_check(); 6648 _bitMap->lock()->lock_without_safepoint_check(); 6649 _collector->startTimer(); 6650 } 6651 6652 6653 ////////////////////////////////////////////////////////////////// 6654 // SurvivorSpacePrecleanClosure 6655 ////////////////////////////////////////////////////////////////// 6656 // This (single-threaded) closure is used to preclean the oops in 6657 // the survivor spaces. 6658 size_t SurvivorSpacePrecleanClosure::do_object_careful(oop p) { 6659 6660 HeapWord* addr = (HeapWord*)p; 6661 DEBUG_ONLY(_collector->verify_work_stacks_empty();) 6662 assert(!_span.contains(addr), "we are scanning the survivor spaces"); 6663 assert(p->klass() != NULL, "object should be initializd"); 6664 assert(p->is_parsable(), "must be parsable."); 6665 // an initialized object; ignore mark word in verification below 6666 // since we are running concurrent with mutators 6667 assert(p->is_oop(true), "should be an oop"); 6668 // Note that we do not yield while we iterate over 6669 // the interior oops of p, pushing the relevant ones 6670 // on our marking stack. 6671 size_t size = p->oop_iterate(_scanning_closure); 6672 do_yield_check(); 6673 // Observe that below, we do not abandon the preclean 6674 // phase as soon as we should; rather we empty the 6675 // marking stack before returning. This is to satisfy 6676 // some existing assertions. In general, it may be a 6677 // good idea to abort immediately and complete the marking 6678 // from the grey objects at a later time. 6679 while (!_mark_stack->isEmpty()) { 6680 oop new_oop = _mark_stack->pop(); 6681 assert(new_oop != NULL && new_oop->is_oop(), "Expected an oop"); 6682 assert(new_oop->is_parsable(), "Found unparsable oop"); 6683 assert(_bit_map->isMarked((HeapWord*)new_oop), 6684 "only grey objects on this stack"); 6685 // iterate over the oops in this oop, marking and pushing 6686 // the ones in CMS heap (i.e. in _span). 6687 new_oop->oop_iterate(_scanning_closure); 6688 // check if it's time to yield 6689 do_yield_check(); 6690 } 6691 unsigned int after_count = 6692 GenCollectedHeap::heap()->total_collections(); 6693 bool abort = (_before_count != after_count) || 6694 _collector->should_abort_preclean(); 6695 return abort ? 0 : size; 6696 } 6697 6698 void SurvivorSpacePrecleanClosure::do_yield_work() { 6699 assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(), 6700 "CMS thread should hold CMS token"); 6701 assert_lock_strong(_bit_map->lock()); 6702 // Relinquish the bit map lock 6703 _bit_map->lock()->unlock(); 6704 ConcurrentMarkSweepThread::desynchronize(true); 6705 ConcurrentMarkSweepThread::acknowledge_yield_request(); 6706 _collector->stopTimer(); 6707 GCPauseTimer p(_collector->size_policy()->concurrent_timer_ptr()); 6708 if (PrintCMSStatistics != 0) { 6709 _collector->incrementYields(); 6710 } 6711 _collector->icms_wait(); 6712 6713 // See the comment in coordinator_yield() 6714 for (unsigned i = 0; i < CMSYieldSleepCount && 6715 ConcurrentMarkSweepThread::should_yield() && 6716 !CMSCollector::foregroundGCIsActive(); ++i) { 6717 os::sleep(Thread::current(), 1, false); 6718 ConcurrentMarkSweepThread::acknowledge_yield_request(); 6719 } 6720 6721 ConcurrentMarkSweepThread::synchronize(true); 6722 _bit_map->lock()->lock_without_safepoint_check(); 6723 _collector->startTimer(); 6724 } 6725 6726 // This closure is used to rescan the marked objects on the dirty cards 6727 // in the mod union table and the card table proper. In the parallel 6728 // case, although the bitMap is shared, we do a single read so the 6729 // isMarked() query is "safe". 6730 bool ScanMarkedObjectsAgainClosure::do_object_bm(oop p, MemRegion mr) { 6731 // Ignore mark word because we are running concurrent with mutators 6732 assert(p->is_oop_or_null(true), "expected an oop or null"); 6733 HeapWord* addr = (HeapWord*)p; 6734 assert(_span.contains(addr), "we are scanning the CMS generation"); 6735 bool is_obj_array = false; 6736 #ifdef DEBUG 6737 if (!_parallel) { 6738 assert(_mark_stack->isEmpty(), "pre-condition (eager drainage)"); 6739 assert(_collector->overflow_list_is_empty(), 6740 "overflow list should be empty"); 6741 6742 } 6743 #endif // DEBUG 6744 if (_bit_map->isMarked(addr)) { 6745 // Obj arrays are precisely marked, non-arrays are not; 6746 // so we scan objArrays precisely and non-arrays in their 6747 // entirety. 6748 if (p->is_objArray()) { 6749 is_obj_array = true; 6750 if (_parallel) { 6751 p->oop_iterate(_par_scan_closure, mr); 6752 } else { 6753 p->oop_iterate(_scan_closure, mr); 6754 } 6755 } else { 6756 if (_parallel) { 6757 p->oop_iterate(_par_scan_closure); 6758 } else { 6759 p->oop_iterate(_scan_closure); 6760 } 6761 } 6762 } 6763 #ifdef DEBUG 6764 if (!_parallel) { 6765 assert(_mark_stack->isEmpty(), "post-condition (eager drainage)"); 6766 assert(_collector->overflow_list_is_empty(), 6767 "overflow list should be empty"); 6768 6769 } 6770 #endif // DEBUG 6771 return is_obj_array; 6772 } 6773 6774 MarkFromRootsClosure::MarkFromRootsClosure(CMSCollector* collector, 6775 MemRegion span, 6776 CMSBitMap* bitMap, CMSMarkStack* markStack, 6777 CMSMarkStack* revisitStack, 6778 bool should_yield, bool verifying): 6779 _collector(collector), 6780 _span(span), 6781 _bitMap(bitMap), 6782 _mut(&collector->_modUnionTable), 6783 _markStack(markStack), 6784 _revisitStack(revisitStack), 6785 _yield(should_yield), 6786 _skipBits(0) 6787 { 6788 assert(_markStack->isEmpty(), "stack should be empty"); 6789 _finger = _bitMap->startWord(); 6790 _threshold = _finger; 6791 assert(_collector->_restart_addr == NULL, "Sanity check"); 6792 assert(_span.contains(_finger), "Out of bounds _finger?"); 6793 DEBUG_ONLY(_verifying = verifying;) 6794 } 6795 6796 void MarkFromRootsClosure::reset(HeapWord* addr) { 6797 assert(_markStack->isEmpty(), "would cause duplicates on stack"); 6798 assert(_span.contains(addr), "Out of bounds _finger?"); 6799 _finger = addr; 6800 _threshold = (HeapWord*)round_to( 6801 (intptr_t)_finger, CardTableModRefBS::card_size); 6802 } 6803 6804 // Should revisit to see if this should be restructured for 6805 // greater efficiency. 6806 void MarkFromRootsClosure::do_bit(size_t offset) { 6807 if (_skipBits > 0) { 6808 _skipBits--; 6809 return; 6810 } 6811 // convert offset into a HeapWord* 6812 HeapWord* addr = _bitMap->startWord() + offset; 6813 assert(_bitMap->endWord() && addr < _bitMap->endWord(), 6814 "address out of range"); 6815 assert(_bitMap->isMarked(addr), "tautology"); 6816 if (_bitMap->isMarked(addr+1)) { 6817 // this is an allocated but not yet initialized object 6818 assert(_skipBits == 0, "tautology"); 6819 _skipBits = 2; // skip next two marked bits ("Printezis-marks") 6820 oop p = oop(addr); 6821 if (p->klass() == NULL || !p->is_parsable()) { 6822 DEBUG_ONLY(if (!_verifying) {) 6823 // We re-dirty the cards on which this object lies and increase 6824 // the _threshold so that we'll come back to scan this object 6825 // during the preclean or remark phase. (CMSCleanOnEnter) 6826 if (CMSCleanOnEnter) { 6827 size_t sz = _collector->block_size_using_printezis_bits(addr); 6828 HeapWord* end_card_addr = (HeapWord*)round_to( 6829 (intptr_t)(addr+sz), CardTableModRefBS::card_size); 6830 MemRegion redirty_range = MemRegion(addr, end_card_addr); 6831 assert(!redirty_range.is_empty(), "Arithmetical tautology"); 6832 // Bump _threshold to end_card_addr; note that 6833 // _threshold cannot possibly exceed end_card_addr, anyhow. 6834 // This prevents future clearing of the card as the scan proceeds 6835 // to the right. 6836 assert(_threshold <= end_card_addr, 6837 "Because we are just scanning into this object"); 6838 if (_threshold < end_card_addr) { 6839 _threshold = end_card_addr; 6840 } 6841 if (p->klass() != NULL) { 6842 // Redirty the range of cards... 6843 _mut->mark_range(redirty_range); 6844 } // ...else the setting of klass will dirty the card anyway. 6845 } 6846 DEBUG_ONLY(}) 6847 return; 6848 } 6849 } 6850 scanOopsInOop(addr); 6851 } 6852 6853 // We take a break if we've been at this for a while, 6854 // so as to avoid monopolizing the locks involved. 6855 void MarkFromRootsClosure::do_yield_work() { 6856 // First give up the locks, then yield, then re-lock 6857 // We should probably use a constructor/destructor idiom to 6858 // do this unlock/lock or modify the MutexUnlocker class to 6859 // serve our purpose. XXX 6860 assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(), 6861 "CMS thread should hold CMS token"); 6862 assert_lock_strong(_bitMap->lock()); 6863 _bitMap->lock()->unlock(); 6864 ConcurrentMarkSweepThread::desynchronize(true); 6865 ConcurrentMarkSweepThread::acknowledge_yield_request(); 6866 _collector->stopTimer(); 6867 GCPauseTimer p(_collector->size_policy()->concurrent_timer_ptr()); 6868 if (PrintCMSStatistics != 0) { 6869 _collector->incrementYields(); 6870 } 6871 _collector->icms_wait(); 6872 6873 // See the comment in coordinator_yield() 6874 for (unsigned i = 0; i < CMSYieldSleepCount && 6875 ConcurrentMarkSweepThread::should_yield() && 6876 !CMSCollector::foregroundGCIsActive(); ++i) { 6877 os::sleep(Thread::current(), 1, false); 6878 ConcurrentMarkSweepThread::acknowledge_yield_request(); 6879 } 6880 6881 ConcurrentMarkSweepThread::synchronize(true); 6882 _bitMap->lock()->lock_without_safepoint_check(); 6883 _collector->startTimer(); 6884 } 6885 6886 void MarkFromRootsClosure::scanOopsInOop(HeapWord* ptr) { 6887 assert(_bitMap->isMarked(ptr), "expected bit to be set"); 6888 assert(_markStack->isEmpty(), 6889 "should drain stack to limit stack usage"); 6890 // convert ptr to an oop preparatory to scanning 6891 oop this_oop = oop(ptr); 6892 // Ignore mark word in verification below, since we 6893 // may be running concurrent with mutators. 6894 assert(this_oop->is_oop(true), "should be an oop"); 6895 assert(_finger <= ptr, "_finger runneth ahead"); 6896 // advance the finger to right end of this object 6897 _finger = ptr + this_oop->size(); 6898 assert(_finger > ptr, "we just incremented it above"); 6899 // On large heaps, it may take us some time to get through 6900 // the marking phase (especially if running iCMS). During 6901 // this time it's possible that a lot of mutations have 6902 // accumulated in the card table and the mod union table -- 6903 // these mutation records are redundant until we have 6904 // actually traced into the corresponding card. 6905 // Here, we check whether advancing the finger would make 6906 // us cross into a new card, and if so clear corresponding 6907 // cards in the MUT (preclean them in the card-table in the 6908 // future). 6909 6910 DEBUG_ONLY(if (!_verifying) {) 6911 // The clean-on-enter optimization is disabled by default, 6912 // until we fix 6178663. 6913 if (CMSCleanOnEnter && (_finger > _threshold)) { 6914 // [_threshold, _finger) represents the interval 6915 // of cards to be cleared in MUT (or precleaned in card table). 6916 // The set of cards to be cleared is all those that overlap 6917 // with the interval [_threshold, _finger); note that 6918 // _threshold is always kept card-aligned but _finger isn't 6919 // always card-aligned. 6920 HeapWord* old_threshold = _threshold; 6921 assert(old_threshold == (HeapWord*)round_to( 6922 (intptr_t)old_threshold, CardTableModRefBS::card_size), 6923 "_threshold should always be card-aligned"); 6924 _threshold = (HeapWord*)round_to( 6925 (intptr_t)_finger, CardTableModRefBS::card_size); 6926 MemRegion mr(old_threshold, _threshold); 6927 assert(!mr.is_empty(), "Control point invariant"); 6928 assert(_span.contains(mr), "Should clear within span"); 6929 // XXX When _finger crosses from old gen into perm gen 6930 // we may be doing unnecessary cleaning; do better in the 6931 // future by detecting that condition and clearing fewer 6932 // MUT/CT entries. 6933 _mut->clear_range(mr); 6934 } 6935 DEBUG_ONLY(}) 6936 6937 // Note: the finger doesn't advance while we drain 6938 // the stack below. 6939 PushOrMarkClosure pushOrMarkClosure(_collector, 6940 _span, _bitMap, _markStack, 6941 _revisitStack, 6942 _finger, this); 6943 bool res = _markStack->push(this_oop); 6944 assert(res, "Empty non-zero size stack should have space for single push"); 6945 while (!_markStack->isEmpty()) { 6946 oop new_oop = _markStack->pop(); 6947 // Skip verifying header mark word below because we are 6948 // running concurrent with mutators. 6949 assert(new_oop->is_oop(true), "Oops! expected to pop an oop"); 6950 // now scan this oop's oops 6951 new_oop->oop_iterate(&pushOrMarkClosure); 6952 do_yield_check(); 6953 } 6954 assert(_markStack->isEmpty(), "tautology, emphasizing post-condition"); 6955 } 6956 6957 Par_MarkFromRootsClosure::Par_MarkFromRootsClosure(CMSConcMarkingTask* task, 6958 CMSCollector* collector, MemRegion span, 6959 CMSBitMap* bit_map, 6960 OopTaskQueue* work_queue, 6961 CMSMarkStack* overflow_stack, 6962 CMSMarkStack* revisit_stack, 6963 bool should_yield): 6964 _collector(collector), 6965 _whole_span(collector->_span), 6966 _span(span), 6967 _bit_map(bit_map), 6968 _mut(&collector->_modUnionTable), 6969 _work_queue(work_queue), 6970 _overflow_stack(overflow_stack), 6971 _revisit_stack(revisit_stack), 6972 _yield(should_yield), 6973 _skip_bits(0), 6974 _task(task) 6975 { 6976 assert(_work_queue->size() == 0, "work_queue should be empty"); 6977 _finger = span.start(); 6978 _threshold = _finger; // XXX Defer clear-on-enter optimization for now 6979 assert(_span.contains(_finger), "Out of bounds _finger?"); 6980 } 6981 6982 // Should revisit to see if this should be restructured for 6983 // greater efficiency. 6984 void Par_MarkFromRootsClosure::do_bit(size_t offset) { 6985 if (_skip_bits > 0) { 6986 _skip_bits--; 6987 return; 6988 } 6989 // convert offset into a HeapWord* 6990 HeapWord* addr = _bit_map->startWord() + offset; 6991 assert(_bit_map->endWord() && addr < _bit_map->endWord(), 6992 "address out of range"); 6993 assert(_bit_map->isMarked(addr), "tautology"); 6994 if (_bit_map->isMarked(addr+1)) { 6995 // this is an allocated object that might not yet be initialized 6996 assert(_skip_bits == 0, "tautology"); 6997 _skip_bits = 2; // skip next two marked bits ("Printezis-marks") 6998 oop p = oop(addr); 6999 if (p->klass() == NULL || !p->is_parsable()) { 7000 // in the case of Clean-on-Enter optimization, redirty card 7001 // and avoid clearing card by increasing the threshold. 7002 return; 7003 } 7004 } 7005 scan_oops_in_oop(addr); 7006 } 7007 7008 void Par_MarkFromRootsClosure::scan_oops_in_oop(HeapWord* ptr) { 7009 assert(_bit_map->isMarked(ptr), "expected bit to be set"); 7010 // Should we assert that our work queue is empty or 7011 // below some drain limit? 7012 assert(_work_queue->size() == 0, 7013 "should drain stack to limit stack usage"); 7014 // convert ptr to an oop preparatory to scanning 7015 oop this_oop = oop(ptr); 7016 // Ignore mark word in verification below, since we 7017 // may be running concurrent with mutators. 7018 assert(this_oop->is_oop(true), "should be an oop"); 7019 assert(_finger <= ptr, "_finger runneth ahead"); 7020 // advance the finger to right end of this object 7021 _finger = ptr + this_oop->size(); 7022 assert(_finger > ptr, "we just incremented it above"); 7023 // On large heaps, it may take us some time to get through 7024 // the marking phase (especially if running iCMS). During 7025 // this time it's possible that a lot of mutations have 7026 // accumulated in the card table and the mod union table -- 7027 // these mutation records are redundant until we have 7028 // actually traced into the corresponding card. 7029 // Here, we check whether advancing the finger would make 7030 // us cross into a new card, and if so clear corresponding 7031 // cards in the MUT (preclean them in the card-table in the 7032 // future). 7033 7034 // The clean-on-enter optimization is disabled by default, 7035 // until we fix 6178663. 7036 if (CMSCleanOnEnter && (_finger > _threshold)) { 7037 // [_threshold, _finger) represents the interval 7038 // of cards to be cleared in MUT (or precleaned in card table). 7039 // The set of cards to be cleared is all those that overlap 7040 // with the interval [_threshold, _finger); note that 7041 // _threshold is always kept card-aligned but _finger isn't 7042 // always card-aligned. 7043 HeapWord* old_threshold = _threshold; 7044 assert(old_threshold == (HeapWord*)round_to( 7045 (intptr_t)old_threshold, CardTableModRefBS::card_size), 7046 "_threshold should always be card-aligned"); 7047 _threshold = (HeapWord*)round_to( 7048 (intptr_t)_finger, CardTableModRefBS::card_size); 7049 MemRegion mr(old_threshold, _threshold); 7050 assert(!mr.is_empty(), "Control point invariant"); 7051 assert(_span.contains(mr), "Should clear within span"); // _whole_span ?? 7052 // XXX When _finger crosses from old gen into perm gen 7053 // we may be doing unnecessary cleaning; do better in the 7054 // future by detecting that condition and clearing fewer 7055 // MUT/CT entries. 7056 _mut->clear_range(mr); 7057 } 7058 7059 // Note: the local finger doesn't advance while we drain 7060 // the stack below, but the global finger sure can and will. 7061 HeapWord** gfa = _task->global_finger_addr(); 7062 Par_PushOrMarkClosure pushOrMarkClosure(_collector, 7063 _span, _bit_map, 7064 _work_queue, 7065 _overflow_stack, 7066 _revisit_stack, 7067 _finger, 7068 gfa, this); 7069 bool res = _work_queue->push(this_oop); // overflow could occur here 7070 assert(res, "Will hold once we use workqueues"); 7071 while (true) { 7072 oop new_oop; 7073 if (!_work_queue->pop_local(new_oop)) { 7074 // We emptied our work_queue; check if there's stuff that can 7075 // be gotten from the overflow stack. 7076 if (CMSConcMarkingTask::get_work_from_overflow_stack( 7077 _overflow_stack, _work_queue)) { 7078 do_yield_check(); 7079 continue; 7080 } else { // done 7081 break; 7082 } 7083 } 7084 // Skip verifying header mark word below because we are 7085 // running concurrent with mutators. 7086 assert(new_oop->is_oop(true), "Oops! expected to pop an oop"); 7087 // now scan this oop's oops 7088 new_oop->oop_iterate(&pushOrMarkClosure); 7089 do_yield_check(); 7090 } 7091 assert(_work_queue->size() == 0, "tautology, emphasizing post-condition"); 7092 } 7093 7094 // Yield in response to a request from VM Thread or 7095 // from mutators. 7096 void Par_MarkFromRootsClosure::do_yield_work() { 7097 assert(_task != NULL, "sanity"); 7098 _task->yield(); 7099 } 7100 7101 // A variant of the above used for verifying CMS marking work. 7102 MarkFromRootsVerifyClosure::MarkFromRootsVerifyClosure(CMSCollector* collector, 7103 MemRegion span, 7104 CMSBitMap* verification_bm, CMSBitMap* cms_bm, 7105 CMSMarkStack* mark_stack): 7106 _collector(collector), 7107 _span(span), 7108 _verification_bm(verification_bm), 7109 _cms_bm(cms_bm), 7110 _mark_stack(mark_stack), 7111 _pam_verify_closure(collector, span, verification_bm, cms_bm, 7112 mark_stack) 7113 { 7114 assert(_mark_stack->isEmpty(), "stack should be empty"); 7115 _finger = _verification_bm->startWord(); 7116 assert(_collector->_restart_addr == NULL, "Sanity check"); 7117 assert(_span.contains(_finger), "Out of bounds _finger?"); 7118 } 7119 7120 void MarkFromRootsVerifyClosure::reset(HeapWord* addr) { 7121 assert(_mark_stack->isEmpty(), "would cause duplicates on stack"); 7122 assert(_span.contains(addr), "Out of bounds _finger?"); 7123 _finger = addr; 7124 } 7125 7126 // Should revisit to see if this should be restructured for 7127 // greater efficiency. 7128 void MarkFromRootsVerifyClosure::do_bit(size_t offset) { 7129 // convert offset into a HeapWord* 7130 HeapWord* addr = _verification_bm->startWord() + offset; 7131 assert(_verification_bm->endWord() && addr < _verification_bm->endWord(), 7132 "address out of range"); 7133 assert(_verification_bm->isMarked(addr), "tautology"); 7134 assert(_cms_bm->isMarked(addr), "tautology"); 7135 7136 assert(_mark_stack->isEmpty(), 7137 "should drain stack to limit stack usage"); 7138 // convert addr to an oop preparatory to scanning 7139 oop this_oop = oop(addr); 7140 assert(this_oop->is_oop(), "should be an oop"); 7141 assert(_finger <= addr, "_finger runneth ahead"); 7142 // advance the finger to right end of this object 7143 _finger = addr + this_oop->size(); 7144 assert(_finger > addr, "we just incremented it above"); 7145 // Note: the finger doesn't advance while we drain 7146 // the stack below. 7147 bool res = _mark_stack->push(this_oop); 7148 assert(res, "Empty non-zero size stack should have space for single push"); 7149 while (!_mark_stack->isEmpty()) { 7150 oop new_oop = _mark_stack->pop(); 7151 assert(new_oop->is_oop(), "Oops! expected to pop an oop"); 7152 // now scan this oop's oops 7153 new_oop->oop_iterate(&_pam_verify_closure); 7154 } 7155 assert(_mark_stack->isEmpty(), "tautology, emphasizing post-condition"); 7156 } 7157 7158 PushAndMarkVerifyClosure::PushAndMarkVerifyClosure( 7159 CMSCollector* collector, MemRegion span, 7160 CMSBitMap* verification_bm, CMSBitMap* cms_bm, 7161 CMSMarkStack* mark_stack): 7162 OopClosure(collector->ref_processor()), 7163 _collector(collector), 7164 _span(span), 7165 _verification_bm(verification_bm), 7166 _cms_bm(cms_bm), 7167 _mark_stack(mark_stack) 7168 { } 7169 7170 7171 // Upon stack overflow, we discard (part of) the stack, 7172 // remembering the least address amongst those discarded 7173 // in CMSCollector's _restart_address. 7174 void PushAndMarkVerifyClosure::handle_stack_overflow(HeapWord* lost) { 7175 // Remember the least grey address discarded 7176 HeapWord* ra = (HeapWord*)_mark_stack->least_value(lost); 7177 _collector->lower_restart_addr(ra); 7178 _mark_stack->reset(); // discard stack contents 7179 _mark_stack->expand(); // expand the stack if possible 7180 } 7181 7182 void PushAndMarkVerifyClosure::do_oop(oop* p) { 7183 oop this_oop = *p; 7184 assert(this_oop->is_oop_or_null(), "expected an oop or NULL"); 7185 HeapWord* addr = (HeapWord*)this_oop; 7186 if (_span.contains(addr) && !_verification_bm->isMarked(addr)) { 7187 // Oop lies in _span and isn't yet grey or black 7188 _verification_bm->mark(addr); // now grey 7189 if (!_cms_bm->isMarked(addr)) { 7190 oop(addr)->print(); 7191 gclog_or_tty->print_cr(" ("INTPTR_FORMAT" should have been marked)", addr); 7192 fatal("... aborting"); 7193 } 7194 7195 if (!_mark_stack->push(this_oop)) { // stack overflow 7196 if (PrintCMSStatistics != 0) { 7197 gclog_or_tty->print_cr("CMS marking stack overflow (benign) at " 7198 SIZE_FORMAT, _mark_stack->capacity()); 7199 } 7200 assert(_mark_stack->isFull(), "Else push should have succeeded"); 7201 handle_stack_overflow(addr); 7202 } 7203 // anything including and to the right of _finger 7204 // will be scanned as we iterate over the remainder of the 7205 // bit map 7206 } 7207 } 7208 7209 PushOrMarkClosure::PushOrMarkClosure(CMSCollector* collector, 7210 MemRegion span, 7211 CMSBitMap* bitMap, CMSMarkStack* markStack, 7212 CMSMarkStack* revisitStack, 7213 HeapWord* finger, MarkFromRootsClosure* parent) : 7214 OopClosure(collector->ref_processor()), 7215 _collector(collector), 7216 _span(span), 7217 _bitMap(bitMap), 7218 _markStack(markStack), 7219 _revisitStack(revisitStack), 7220 _finger(finger), 7221 _parent(parent), 7222 _should_remember_klasses(collector->cms_should_unload_classes()) 7223 { } 7224 7225 Par_PushOrMarkClosure::Par_PushOrMarkClosure(CMSCollector* collector, 7226 MemRegion span, 7227 CMSBitMap* bit_map, 7228 OopTaskQueue* work_queue, 7229 CMSMarkStack* overflow_stack, 7230 CMSMarkStack* revisit_stack, 7231 HeapWord* finger, 7232 HeapWord** global_finger_addr, 7233 Par_MarkFromRootsClosure* parent) : 7234 OopClosure(collector->ref_processor()), 7235 _collector(collector), 7236 _whole_span(collector->_span), 7237 _span(span), 7238 _bit_map(bit_map), 7239 _work_queue(work_queue), 7240 _overflow_stack(overflow_stack), 7241 _revisit_stack(revisit_stack), 7242 _finger(finger), 7243 _global_finger_addr(global_finger_addr), 7244 _parent(parent), 7245 _should_remember_klasses(collector->cms_should_unload_classes()) 7246 { } 7247 7248 // Assumes thread-safe access by callers, who are 7249 // responsible for mutual exclusion. 7250 void CMSCollector::lower_restart_addr(HeapWord* low) { 7251 assert(_span.contains(low), "Out of bounds addr"); 7252 if (_restart_addr == NULL) { 7253 _restart_addr = low; 7254 } else { 7255 _restart_addr = MIN2(_restart_addr, low); 7256 } 7257 } 7258 7259 // Upon stack overflow, we discard (part of) the stack, 7260 // remembering the least address amongst those discarded 7261 // in CMSCollector's _restart_address. 7262 void PushOrMarkClosure::handle_stack_overflow(HeapWord* lost) { 7263 // Remember the least grey address discarded 7264 HeapWord* ra = (HeapWord*)_markStack->least_value(lost); 7265 _collector->lower_restart_addr(ra); 7266 _markStack->reset(); // discard stack contents 7267 _markStack->expand(); // expand the stack if possible 7268 } 7269 7270 // Upon stack overflow, we discard (part of) the stack, 7271 // remembering the least address amongst those discarded 7272 // in CMSCollector's _restart_address. 7273 void Par_PushOrMarkClosure::handle_stack_overflow(HeapWord* lost) { 7274 // We need to do this under a mutex to prevent other 7275 // workers from interfering with the work done below. 7276 MutexLockerEx ml(_overflow_stack->par_lock(), 7277 Mutex::_no_safepoint_check_flag); 7278 // Remember the least grey address discarded 7279 HeapWord* ra = (HeapWord*)_overflow_stack->least_value(lost); 7280 _collector->lower_restart_addr(ra); 7281 _overflow_stack->reset(); // discard stack contents 7282 _overflow_stack->expand(); // expand the stack if possible 7283 } 7284 7285 7286 void PushOrMarkClosure::do_oop(oop* p) { 7287 oop thisOop = *p; 7288 // Ignore mark word because we are running concurrent with mutators. 7289 assert(thisOop->is_oop_or_null(true), "expected an oop or NULL"); 7290 HeapWord* addr = (HeapWord*)thisOop; 7291 if (_span.contains(addr) && !_bitMap->isMarked(addr)) { 7292 // Oop lies in _span and isn't yet grey or black 7293 _bitMap->mark(addr); // now grey 7294 if (addr < _finger) { 7295 // the bit map iteration has already either passed, or 7296 // sampled, this bit in the bit map; we'll need to 7297 // use the marking stack to scan this oop's oops. 7298 bool simulate_overflow = false; 7299 NOT_PRODUCT( 7300 if (CMSMarkStackOverflowALot && 7301 _collector->simulate_overflow()) { 7302 // simulate a stack overflow 7303 simulate_overflow = true; 7304 } 7305 ) 7306 if (simulate_overflow || !_markStack->push(thisOop)) { // stack overflow 7307 if (PrintCMSStatistics != 0) { 7308 gclog_or_tty->print_cr("CMS marking stack overflow (benign) at " 7309 SIZE_FORMAT, _markStack->capacity()); 7310 } 7311 assert(simulate_overflow || _markStack->isFull(), "Else push should have succeeded"); 7312 handle_stack_overflow(addr); 7313 } 7314 } 7315 // anything including and to the right of _finger 7316 // will be scanned as we iterate over the remainder of the 7317 // bit map 7318 do_yield_check(); 7319 } 7320 } 7321 7322 void Par_PushOrMarkClosure::do_oop(oop* p) { 7323 oop this_oop = *p; 7324 // Ignore mark word because we are running concurrent with mutators. 7325 assert(this_oop->is_oop_or_null(true), "expected an oop or NULL"); 7326 HeapWord* addr = (HeapWord*)this_oop; 7327 if (_whole_span.contains(addr) && !_bit_map->isMarked(addr)) { 7328 // Oop lies in _span and isn't yet grey or black 7329 // We read the global_finger (volatile read) strictly after marking oop 7330 bool res = _bit_map->par_mark(addr); // now grey 7331 volatile HeapWord** gfa = (volatile HeapWord**)_global_finger_addr; 7332 // Should we push this marked oop on our stack? 7333 // -- if someone else marked it, nothing to do 7334 // -- if target oop is above global finger nothing to do 7335 // -- if target oop is in chunk and above local finger 7336 // then nothing to do 7337 // -- else push on work queue 7338 if ( !res // someone else marked it, they will deal with it 7339 || (addr >= *gfa) // will be scanned in a later task 7340 || (_span.contains(addr) && addr >= _finger)) { // later in this chunk 7341 return; 7342 } 7343 // the bit map iteration has already either passed, or 7344 // sampled, this bit in the bit map; we'll need to 7345 // use the marking stack to scan this oop's oops. 7346 bool simulate_overflow = false; 7347 NOT_PRODUCT( 7348 if (CMSMarkStackOverflowALot && 7349 _collector->simulate_overflow()) { 7350 // simulate a stack overflow 7351 simulate_overflow = true; 7352 } 7353 ) 7354 if (simulate_overflow || 7355 !(_work_queue->push(this_oop) || _overflow_stack->par_push(this_oop))) { 7356 // stack overflow 7357 if (PrintCMSStatistics != 0) { 7358 gclog_or_tty->print_cr("CMS marking stack overflow (benign) at " 7359 SIZE_FORMAT, _overflow_stack->capacity()); 7360 } 7361 // We cannot assert that the overflow stack is full because 7362 // it may have been emptied since. 7363 assert(simulate_overflow || 7364 _work_queue->size() == _work_queue->max_elems(), 7365 "Else push should have succeeded"); 7366 handle_stack_overflow(addr); 7367 } 7368 do_yield_check(); 7369 } 7370 } 7371 7372 7373 PushAndMarkClosure::PushAndMarkClosure(CMSCollector* collector, 7374 MemRegion span, 7375 ReferenceProcessor* rp, 7376 CMSBitMap* bit_map, 7377 CMSBitMap* mod_union_table, 7378 CMSMarkStack* mark_stack, 7379 CMSMarkStack* revisit_stack, 7380 bool concurrent_precleaning): 7381 OopClosure(rp), 7382 _collector(collector), 7383 _span(span), 7384 _bit_map(bit_map), 7385 _mod_union_table(mod_union_table), 7386 _mark_stack(mark_stack), 7387 _revisit_stack(revisit_stack), 7388 _concurrent_precleaning(concurrent_precleaning), 7389 _should_remember_klasses(collector->cms_should_unload_classes()) 7390 { 7391 assert(_ref_processor != NULL, "_ref_processor shouldn't be NULL"); 7392 } 7393 7394 // Grey object rescan during pre-cleaning and second checkpoint phases -- 7395 // the non-parallel version (the parallel version appears further below.) 7396 void PushAndMarkClosure::do_oop(oop* p) { 7397 oop this_oop = *p; 7398 // Ignore mark word verification. If during concurrent precleaning 7399 // the object monitor may be locked. If during the checkpoint 7400 // phases, the object may already have been reached by a different 7401 // path and may be at the end of the global overflow list (so 7402 // the mark word may be NULL). 7403 assert(this_oop->is_oop_or_null(true/* ignore mark word */), 7404 "expected an oop or NULL"); 7405 HeapWord* addr = (HeapWord*)this_oop; 7406 // Check if oop points into the CMS generation 7407 // and is not marked 7408 if (_span.contains(addr) && !_bit_map->isMarked(addr)) { 7409 // a white object ... 7410 _bit_map->mark(addr); // ... now grey 7411 // push on the marking stack (grey set) 7412 bool simulate_overflow = false; 7413 NOT_PRODUCT( 7414 if (CMSMarkStackOverflowALot && 7415 _collector->simulate_overflow()) { 7416 // simulate a stack overflow 7417 simulate_overflow = true; 7418 } 7419 ) 7420 if (simulate_overflow || !_mark_stack->push(this_oop)) { 7421 if (_concurrent_precleaning) { 7422 // During precleaning we can just dirty the appropriate card(s) 7423 // in the mod union table, thus ensuring that the object remains 7424 // in the grey set and continue. In the case of object arrays 7425 // we need to dirty all of the cards that the object spans, 7426 // since the rescan of object arrays will be limited to the 7427 // dirty cards. 7428 // Note that no one can be intefering with us in this action 7429 // of dirtying the mod union table, so no locking or atomics 7430 // are required. 7431 if (this_oop->is_objArray()) { 7432 size_t sz = this_oop->size(); 7433 HeapWord* end_card_addr = (HeapWord*)round_to( 7434 (intptr_t)(addr+sz), CardTableModRefBS::card_size); 7435 MemRegion redirty_range = MemRegion(addr, end_card_addr); 7436 assert(!redirty_range.is_empty(), "Arithmetical tautology"); 7437 _mod_union_table->mark_range(redirty_range); 7438 } else { 7439 _mod_union_table->mark(addr); 7440 } 7441 _collector->_ser_pmc_preclean_ovflw++; 7442 } else { 7443 // During the remark phase, we need to remember this oop 7444 // in the overflow list. 7445 _collector->push_on_overflow_list(this_oop); 7446 _collector->_ser_pmc_remark_ovflw++; 7447 } 7448 } 7449 } 7450 } 7451 7452 Par_PushAndMarkClosure::Par_PushAndMarkClosure(CMSCollector* collector, 7453 MemRegion span, 7454 ReferenceProcessor* rp, 7455 CMSBitMap* bit_map, 7456 OopTaskQueue* work_queue, 7457 CMSMarkStack* revisit_stack): 7458 OopClosure(rp), 7459 _collector(collector), 7460 _span(span), 7461 _bit_map(bit_map), 7462 _work_queue(work_queue), 7463 _revisit_stack(revisit_stack), 7464 _should_remember_klasses(collector->cms_should_unload_classes()) 7465 { 7466 assert(_ref_processor != NULL, "_ref_processor shouldn't be NULL"); 7467 } 7468 7469 // Grey object rescan during second checkpoint phase -- 7470 // the parallel version. 7471 void Par_PushAndMarkClosure::do_oop(oop* p) { 7472 oop this_oop = *p; 7473 // In the assert below, we ignore the mark word because 7474 // this oop may point to an already visited object that is 7475 // on the overflow stack (in which case the mark word has 7476 // been hijacked for chaining into the overflow stack -- 7477 // if this is the last object in the overflow stack then 7478 // its mark word will be NULL). Because this object may 7479 // have been subsequently popped off the global overflow 7480 // stack, and the mark word possibly restored to the prototypical 7481 // value, by the time we get to examined this failing assert in 7482 // the debugger, is_oop_or_null(false) may subsequently start 7483 // to hold. 7484 assert(this_oop->is_oop_or_null(true), 7485 "expected an oop or NULL"); 7486 HeapWord* addr = (HeapWord*)this_oop; 7487 // Check if oop points into the CMS generation 7488 // and is not marked 7489 if (_span.contains(addr) && !_bit_map->isMarked(addr)) { 7490 // a white object ... 7491 // If we manage to "claim" the object, by being the 7492 // first thread to mark it, then we push it on our 7493 // marking stack 7494 if (_bit_map->par_mark(addr)) { // ... now grey 7495 // push on work queue (grey set) 7496 bool simulate_overflow = false; 7497 NOT_PRODUCT( 7498 if (CMSMarkStackOverflowALot && 7499 _collector->par_simulate_overflow()) { 7500 // simulate a stack overflow 7501 simulate_overflow = true; 7502 } 7503 ) 7504 if (simulate_overflow || !_work_queue->push(this_oop)) { 7505 _collector->par_push_on_overflow_list(this_oop); 7506 _collector->_par_pmc_remark_ovflw++; // imprecise OK: no need to CAS 7507 } 7508 } // Else, some other thread got there first 7509 } 7510 } 7511 7512 void PushAndMarkClosure::remember_klass(Klass* k) { 7513 if (!_revisit_stack->push(oop(k))) { 7514 fatal("Revisit stack overflowed in PushAndMarkClosure"); 7515 } 7516 } 7517 7518 void Par_PushAndMarkClosure::remember_klass(Klass* k) { 7519 if (!_revisit_stack->par_push(oop(k))) { 7520 fatal("Revist stack overflowed in Par_PushAndMarkClosure"); 7521 } 7522 } 7523 7524 void CMSPrecleanRefsYieldClosure::do_yield_work() { 7525 Mutex* bml = _collector->bitMapLock(); 7526 assert_lock_strong(bml); 7527 assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(), 7528 "CMS thread should hold CMS token"); 7529 7530 bml->unlock(); 7531 ConcurrentMarkSweepThread::desynchronize(true); 7532 7533 ConcurrentMarkSweepThread::acknowledge_yield_request(); 7534 7535 _collector->stopTimer(); 7536 GCPauseTimer p(_collector->size_policy()->concurrent_timer_ptr()); 7537 if (PrintCMSStatistics != 0) { 7538 _collector->incrementYields(); 7539 } 7540 _collector->icms_wait(); 7541 7542 // See the comment in coordinator_yield() 7543 for (unsigned i = 0; i < CMSYieldSleepCount && 7544 ConcurrentMarkSweepThread::should_yield() && 7545 !CMSCollector::foregroundGCIsActive(); ++i) { 7546 os::sleep(Thread::current(), 1, false); 7547 ConcurrentMarkSweepThread::acknowledge_yield_request(); 7548 } 7549 7550 ConcurrentMarkSweepThread::synchronize(true); 7551 bml->lock(); 7552 7553 _collector->startTimer(); 7554 } 7555 7556 bool CMSPrecleanRefsYieldClosure::should_return() { 7557 if (ConcurrentMarkSweepThread::should_yield()) { 7558 do_yield_work(); 7559 } 7560 return _collector->foregroundGCIsActive(); 7561 } 7562 7563 void MarkFromDirtyCardsClosure::do_MemRegion(MemRegion mr) { 7564 assert(((size_t)mr.start())%CardTableModRefBS::card_size_in_words == 0, 7565 "mr should be aligned to start at a card boundary"); 7566 // We'd like to assert: 7567 // assert(mr.word_size()%CardTableModRefBS::card_size_in_words == 0, 7568 // "mr should be a range of cards"); 7569 // However, that would be too strong in one case -- the last 7570 // partition ends at _unallocated_block which, in general, can be 7571 // an arbitrary boundary, not necessarily card aligned. 7572 if (PrintCMSStatistics != 0) { 7573 _num_dirty_cards += 7574 mr.word_size()/CardTableModRefBS::card_size_in_words; 7575 } 7576 _space->object_iterate_mem(mr, &_scan_cl); 7577 } 7578 7579 SweepClosure::SweepClosure(CMSCollector* collector, 7580 ConcurrentMarkSweepGeneration* g, 7581 CMSBitMap* bitMap, bool should_yield) : 7582 _collector(collector), 7583 _g(g), 7584 _sp(g->cmsSpace()), 7585 _limit(_sp->sweep_limit()), 7586 _freelistLock(_sp->freelistLock()), 7587 _bitMap(bitMap), 7588 _yield(should_yield), 7589 _inFreeRange(false), // No free range at beginning of sweep 7590 _freeRangeInFreeLists(false), // No free range at beginning of sweep 7591 _lastFreeRangeCoalesced(false), 7592 _freeFinger(g->used_region().start()) 7593 { 7594 NOT_PRODUCT( 7595 _numObjectsFreed = 0; 7596 _numWordsFreed = 0; 7597 _numObjectsLive = 0; 7598 _numWordsLive = 0; 7599 _numObjectsAlreadyFree = 0; 7600 _numWordsAlreadyFree = 0; 7601 _last_fc = NULL; 7602 7603 _sp->initializeIndexedFreeListArrayReturnedBytes(); 7604 _sp->dictionary()->initializeDictReturnedBytes(); 7605 ) 7606 assert(_limit >= _sp->bottom() && _limit <= _sp->end(), 7607 "sweep _limit out of bounds"); 7608 if (CMSTraceSweeper) { 7609 gclog_or_tty->print("\n====================\nStarting new sweep\n"); 7610 } 7611 } 7612 7613 // We need this destructor to reclaim any space at the end 7614 // of the space, which do_blk below may not have added back to 7615 // the free lists. [basically dealing with the "fringe effect"] 7616 SweepClosure::~SweepClosure() { 7617 assert_lock_strong(_freelistLock); 7618 // this should be treated as the end of a free run if any 7619 // The current free range should be returned to the free lists 7620 // as one coalesced chunk. 7621 if (inFreeRange()) { 7622 flushCurFreeChunk(freeFinger(), 7623 pointer_delta(_limit, freeFinger())); 7624 assert(freeFinger() < _limit, "the finger pointeth off base"); 7625 if (CMSTraceSweeper) { 7626 gclog_or_tty->print("destructor:"); 7627 gclog_or_tty->print("Sweep:put_free_blk 0x%x ("SIZE_FORMAT") " 7628 "[coalesced:"SIZE_FORMAT"]\n", 7629 freeFinger(), pointer_delta(_limit, freeFinger()), 7630 lastFreeRangeCoalesced()); 7631 } 7632 } 7633 NOT_PRODUCT( 7634 if (Verbose && PrintGC) { 7635 gclog_or_tty->print("Collected "SIZE_FORMAT" objects, " 7636 SIZE_FORMAT " bytes", 7637 _numObjectsFreed, _numWordsFreed*sizeof(HeapWord)); 7638 gclog_or_tty->print_cr("\nLive "SIZE_FORMAT" objects, " 7639 SIZE_FORMAT" bytes " 7640 "Already free "SIZE_FORMAT" objects, "SIZE_FORMAT" bytes", 7641 _numObjectsLive, _numWordsLive*sizeof(HeapWord), 7642 _numObjectsAlreadyFree, _numWordsAlreadyFree*sizeof(HeapWord)); 7643 size_t totalBytes = (_numWordsFreed + _numWordsLive + _numWordsAlreadyFree) * 7644 sizeof(HeapWord); 7645 gclog_or_tty->print_cr("Total sweep: "SIZE_FORMAT" bytes", totalBytes); 7646 7647 if (PrintCMSStatistics && CMSVerifyReturnedBytes) { 7648 size_t indexListReturnedBytes = _sp->sumIndexedFreeListArrayReturnedBytes(); 7649 size_t dictReturnedBytes = _sp->dictionary()->sumDictReturnedBytes(); 7650 size_t returnedBytes = indexListReturnedBytes + dictReturnedBytes; 7651 gclog_or_tty->print("Returned "SIZE_FORMAT" bytes", returnedBytes); 7652 gclog_or_tty->print(" Indexed List Returned "SIZE_FORMAT" bytes", 7653 indexListReturnedBytes); 7654 gclog_or_tty->print_cr(" Dictionary Returned "SIZE_FORMAT" bytes", 7655 dictReturnedBytes); 7656 } 7657 } 7658 ) 7659 // Now, in debug mode, just null out the sweep_limit 7660 NOT_PRODUCT(_sp->clear_sweep_limit();) 7661 if (CMSTraceSweeper) { 7662 gclog_or_tty->print("end of sweep\n================\n"); 7663 } 7664 } 7665 7666 void SweepClosure::initialize_free_range(HeapWord* freeFinger, 7667 bool freeRangeInFreeLists) { 7668 if (CMSTraceSweeper) { 7669 gclog_or_tty->print("---- Start free range 0x%x with free block [%d] (%d)\n", 7670 freeFinger, _sp->block_size(freeFinger), 7671 freeRangeInFreeLists); 7672 } 7673 assert(!inFreeRange(), "Trampling existing free range"); 7674 set_inFreeRange(true); 7675 set_lastFreeRangeCoalesced(false); 7676 7677 set_freeFinger(freeFinger); 7678 set_freeRangeInFreeLists(freeRangeInFreeLists); 7679 if (CMSTestInFreeList) { 7680 if (freeRangeInFreeLists) { 7681 FreeChunk* fc = (FreeChunk*) freeFinger; 7682 assert(fc->isFree(), "A chunk on the free list should be free."); 7683 assert(fc->size() > 0, "Free range should have a size"); 7684 assert(_sp->verifyChunkInFreeLists(fc), "Chunk is not in free lists"); 7685 } 7686 } 7687 } 7688 7689 // Note that the sweeper runs concurrently with mutators. Thus, 7690 // it is possible for direct allocation in this generation to happen 7691 // in the middle of the sweep. Note that the sweeper also coalesces 7692 // contiguous free blocks. Thus, unless the sweeper and the allocator 7693 // synchronize appropriately freshly allocated blocks may get swept up. 7694 // This is accomplished by the sweeper locking the free lists while 7695 // it is sweeping. Thus blocks that are determined to be free are 7696 // indeed free. There is however one additional complication: 7697 // blocks that have been allocated since the final checkpoint and 7698 // mark, will not have been marked and so would be treated as 7699 // unreachable and swept up. To prevent this, the allocator marks 7700 // the bit map when allocating during the sweep phase. This leads, 7701 // however, to a further complication -- objects may have been allocated 7702 // but not yet initialized -- in the sense that the header isn't yet 7703 // installed. The sweeper can not then determine the size of the block 7704 // in order to skip over it. To deal with this case, we use a technique 7705 // (due to Printezis) to encode such uninitialized block sizes in the 7706 // bit map. Since the bit map uses a bit per every HeapWord, but the 7707 // CMS generation has a minimum object size of 3 HeapWords, it follows 7708 // that "normal marks" won't be adjacent in the bit map (there will 7709 // always be at least two 0 bits between successive 1 bits). We make use 7710 // of these "unused" bits to represent uninitialized blocks -- the bit 7711 // corresponding to the start of the uninitialized object and the next 7712 // bit are both set. Finally, a 1 bit marks the end of the object that 7713 // started with the two consecutive 1 bits to indicate its potentially 7714 // uninitialized state. 7715 7716 size_t SweepClosure::do_blk_careful(HeapWord* addr) { 7717 FreeChunk* fc = (FreeChunk*)addr; 7718 size_t res; 7719 7720 // check if we are done sweepinrg 7721 if (addr == _limit) { // we have swept up to the limit, do nothing more 7722 assert(_limit >= _sp->bottom() && _limit <= _sp->end(), 7723 "sweep _limit out of bounds"); 7724 // help the closure application finish 7725 return pointer_delta(_sp->end(), _limit); 7726 } 7727 assert(addr <= _limit, "sweep invariant"); 7728 7729 // check if we should yield 7730 do_yield_check(addr); 7731 if (fc->isFree()) { 7732 // Chunk that is already free 7733 res = fc->size(); 7734 doAlreadyFreeChunk(fc); 7735 debug_only(_sp->verifyFreeLists()); 7736 assert(res == fc->size(), "Don't expect the size to change"); 7737 NOT_PRODUCT( 7738 _numObjectsAlreadyFree++; 7739 _numWordsAlreadyFree += res; 7740 ) 7741 NOT_PRODUCT(_last_fc = fc;) 7742 } else if (!_bitMap->isMarked(addr)) { 7743 // Chunk is fresh garbage 7744 res = doGarbageChunk(fc); 7745 debug_only(_sp->verifyFreeLists()); 7746 NOT_PRODUCT( 7747 _numObjectsFreed++; 7748 _numWordsFreed += res; 7749 ) 7750 } else { 7751 // Chunk that is alive. 7752 res = doLiveChunk(fc); 7753 debug_only(_sp->verifyFreeLists()); 7754 NOT_PRODUCT( 7755 _numObjectsLive++; 7756 _numWordsLive += res; 7757 ) 7758 } 7759 return res; 7760 } 7761 7762 // For the smart allocation, record following 7763 // split deaths - a free chunk is removed from its free list because 7764 // it is being split into two or more chunks. 7765 // split birth - a free chunk is being added to its free list because 7766 // a larger free chunk has been split and resulted in this free chunk. 7767 // coal death - a free chunk is being removed from its free list because 7768 // it is being coalesced into a large free chunk. 7769 // coal birth - a free chunk is being added to its free list because 7770 // it was created when two or more free chunks where coalesced into 7771 // this free chunk. 7772 // 7773 // These statistics are used to determine the desired number of free 7774 // chunks of a given size. The desired number is chosen to be relative 7775 // to the end of a CMS sweep. The desired number at the end of a sweep 7776 // is the 7777 // count-at-end-of-previous-sweep (an amount that was enough) 7778 // - count-at-beginning-of-current-sweep (the excess) 7779 // + split-births (gains in this size during interval) 7780 // - split-deaths (demands on this size during interval) 7781 // where the interval is from the end of one sweep to the end of the 7782 // next. 7783 // 7784 // When sweeping the sweeper maintains an accumulated chunk which is 7785 // the chunk that is made up of chunks that have been coalesced. That 7786 // will be termed the left-hand chunk. A new chunk of garbage that 7787 // is being considered for coalescing will be referred to as the 7788 // right-hand chunk. 7789 // 7790 // When making a decision on whether to coalesce a right-hand chunk with 7791 // the current left-hand chunk, the current count vs. the desired count 7792 // of the left-hand chunk is considered. Also if the right-hand chunk 7793 // is near the large chunk at the end of the heap (see 7794 // ConcurrentMarkSweepGeneration::isNearLargestChunk()), then the 7795 // left-hand chunk is coalesced. 7796 // 7797 // When making a decision about whether to split a chunk, the desired count 7798 // vs. the current count of the candidate to be split is also considered. 7799 // If the candidate is underpopulated (currently fewer chunks than desired) 7800 // a chunk of an overpopulated (currently more chunks than desired) size may 7801 // be chosen. The "hint" associated with a free list, if non-null, points 7802 // to a free list which may be overpopulated. 7803 // 7804 7805 void SweepClosure::doAlreadyFreeChunk(FreeChunk* fc) { 7806 size_t size = fc->size(); 7807 // Chunks that cannot be coalesced are not in the 7808 // free lists. 7809 if (CMSTestInFreeList && !fc->cantCoalesce()) { 7810 assert(_sp->verifyChunkInFreeLists(fc), 7811 "free chunk should be in free lists"); 7812 } 7813 // a chunk that is already free, should not have been 7814 // marked in the bit map 7815 HeapWord* addr = (HeapWord*) fc; 7816 assert(!_bitMap->isMarked(addr), "free chunk should be unmarked"); 7817 // Verify that the bit map has no bits marked between 7818 // addr and purported end of this block. 7819 _bitMap->verifyNoOneBitsInRange(addr + 1, addr + size); 7820 7821 // Some chunks cannot be coalesced in under any circumstances. 7822 // See the definition of cantCoalesce(). 7823 if (!fc->cantCoalesce()) { 7824 // This chunk can potentially be coalesced. 7825 if (_sp->adaptive_freelists()) { 7826 // All the work is done in 7827 doPostIsFreeOrGarbageChunk(fc, size); 7828 } else { // Not adaptive free lists 7829 // this is a free chunk that can potentially be coalesced by the sweeper; 7830 if (!inFreeRange()) { 7831 // if the next chunk is a free block that can't be coalesced 7832 // it doesn't make sense to remove this chunk from the free lists 7833 FreeChunk* nextChunk = (FreeChunk*)(addr + size); 7834 assert((HeapWord*)nextChunk <= _limit, "sweep invariant"); 7835 if ((HeapWord*)nextChunk < _limit && // there's a next chunk... 7836 nextChunk->isFree() && // which is free... 7837 nextChunk->cantCoalesce()) { // ... but cant be coalesced 7838 // nothing to do 7839 } else { 7840 // Potentially the start of a new free range: 7841 // Don't eagerly remove it from the free lists. 7842 // No need to remove it if it will just be put 7843 // back again. (Also from a pragmatic point of view 7844 // if it is a free block in a region that is beyond 7845 // any allocated blocks, an assertion will fail) 7846 // Remember the start of a free run. 7847 initialize_free_range(addr, true); 7848 // end - can coalesce with next chunk 7849 } 7850 } else { 7851 // the midst of a free range, we are coalescing 7852 debug_only(record_free_block_coalesced(fc);) 7853 if (CMSTraceSweeper) { 7854 gclog_or_tty->print(" -- pick up free block 0x%x (%d)\n", fc, size); 7855 } 7856 // remove it from the free lists 7857 _sp->removeFreeChunkFromFreeLists(fc); 7858 set_lastFreeRangeCoalesced(true); 7859 // If the chunk is being coalesced and the current free range is 7860 // in the free lists, remove the current free range so that it 7861 // will be returned to the free lists in its entirety - all 7862 // the coalesced pieces included. 7863 if (freeRangeInFreeLists()) { 7864 FreeChunk* ffc = (FreeChunk*) freeFinger(); 7865 assert(ffc->size() == pointer_delta(addr, freeFinger()), 7866 "Size of free range is inconsistent with chunk size."); 7867 if (CMSTestInFreeList) { 7868 assert(_sp->verifyChunkInFreeLists(ffc), 7869 "free range is not in free lists"); 7870 } 7871 _sp->removeFreeChunkFromFreeLists(ffc); 7872 set_freeRangeInFreeLists(false); 7873 } 7874 } 7875 } 7876 } else { 7877 // Code path common to both original and adaptive free lists. 7878 7879 // cant coalesce with previous block; this should be treated 7880 // as the end of a free run if any 7881 if (inFreeRange()) { 7882 // we kicked some butt; time to pick up the garbage 7883 assert(freeFinger() < addr, "the finger pointeth off base"); 7884 flushCurFreeChunk(freeFinger(), pointer_delta(addr, freeFinger())); 7885 } 7886 // else, nothing to do, just continue 7887 } 7888 } 7889 7890 size_t SweepClosure::doGarbageChunk(FreeChunk* fc) { 7891 // This is a chunk of garbage. It is not in any free list. 7892 // Add it to a free list or let it possibly be coalesced into 7893 // a larger chunk. 7894 HeapWord* addr = (HeapWord*) fc; 7895 size_t size = CompactibleFreeListSpace::adjustObjectSize(oop(addr)->size()); 7896 7897 if (_sp->adaptive_freelists()) { 7898 // Verify that the bit map has no bits marked between 7899 // addr and purported end of just dead object. 7900 _bitMap->verifyNoOneBitsInRange(addr + 1, addr + size); 7901 7902 doPostIsFreeOrGarbageChunk(fc, size); 7903 } else { 7904 if (!inFreeRange()) { 7905 // start of a new free range 7906 assert(size > 0, "A free range should have a size"); 7907 initialize_free_range(addr, false); 7908 7909 } else { 7910 // this will be swept up when we hit the end of the 7911 // free range 7912 if (CMSTraceSweeper) { 7913 gclog_or_tty->print(" -- pick up garbage 0x%x (%d) \n", fc, size); 7914 } 7915 // If the chunk is being coalesced and the current free range is 7916 // in the free lists, remove the current free range so that it 7917 // will be returned to the free lists in its entirety - all 7918 // the coalesced pieces included. 7919 if (freeRangeInFreeLists()) { 7920 FreeChunk* ffc = (FreeChunk*)freeFinger(); 7921 assert(ffc->size() == pointer_delta(addr, freeFinger()), 7922 "Size of free range is inconsistent with chunk size."); 7923 if (CMSTestInFreeList) { 7924 assert(_sp->verifyChunkInFreeLists(ffc), 7925 "free range is not in free lists"); 7926 } 7927 _sp->removeFreeChunkFromFreeLists(ffc); 7928 set_freeRangeInFreeLists(false); 7929 } 7930 set_lastFreeRangeCoalesced(true); 7931 } 7932 // this will be swept up when we hit the end of the free range 7933 7934 // Verify that the bit map has no bits marked between 7935 // addr and purported end of just dead object. 7936 _bitMap->verifyNoOneBitsInRange(addr + 1, addr + size); 7937 } 7938 return size; 7939 } 7940 7941 size_t SweepClosure::doLiveChunk(FreeChunk* fc) { 7942 HeapWord* addr = (HeapWord*) fc; 7943 // The sweeper has just found a live object. Return any accumulated 7944 // left hand chunk to the free lists. 7945 if (inFreeRange()) { 7946 if (_sp->adaptive_freelists()) { 7947 flushCurFreeChunk(freeFinger(), 7948 pointer_delta(addr, freeFinger())); 7949 } else { // not adaptive freelists 7950 set_inFreeRange(false); 7951 // Add the free range back to the free list if it is not already 7952 // there. 7953 if (!freeRangeInFreeLists()) { 7954 assert(freeFinger() < addr, "the finger pointeth off base"); 7955 if (CMSTraceSweeper) { 7956 gclog_or_tty->print("Sweep:put_free_blk 0x%x (%d) " 7957 "[coalesced:%d]\n", 7958 freeFinger(), pointer_delta(addr, freeFinger()), 7959 lastFreeRangeCoalesced()); 7960 } 7961 _sp->addChunkAndRepairOffsetTable(freeFinger(), 7962 pointer_delta(addr, freeFinger()), lastFreeRangeCoalesced()); 7963 } 7964 } 7965 } 7966 7967 // Common code path for original and adaptive free lists. 7968 7969 // this object is live: we'd normally expect this to be 7970 // an oop, and like to assert the following: 7971 // assert(oop(addr)->is_oop(), "live block should be an oop"); 7972 // However, as we commented above, this may be an object whose 7973 // header hasn't yet been initialized. 7974 size_t size; 7975 assert(_bitMap->isMarked(addr), "Tautology for this control point"); 7976 if (_bitMap->isMarked(addr + 1)) { 7977 // Determine the size from the bit map, rather than trying to 7978 // compute it from the object header. 7979 HeapWord* nextOneAddr = _bitMap->getNextMarkedWordAddress(addr + 2); 7980 size = pointer_delta(nextOneAddr + 1, addr); 7981 assert(size == CompactibleFreeListSpace::adjustObjectSize(size), 7982 "alignment problem"); 7983 7984 #ifdef DEBUG 7985 if (oop(addr)->klass() != NULL && 7986 ( !_collector->cms_should_unload_classes() 7987 || oop(addr)->is_parsable())) { 7988 // Ignore mark word because we are running concurrent with mutators 7989 assert(oop(addr)->is_oop(true), "live block should be an oop"); 7990 assert(size == 7991 CompactibleFreeListSpace::adjustObjectSize(oop(addr)->size()), 7992 "P-mark and computed size do not agree"); 7993 } 7994 #endif 7995 7996 } else { 7997 // This should be an initialized object that's alive. 7998 assert(oop(addr)->klass() != NULL && 7999 (!_collector->cms_should_unload_classes() 8000 || oop(addr)->is_parsable()), 8001 "Should be an initialized object"); 8002 // Ignore mark word because we are running concurrent with mutators 8003 assert(oop(addr)->is_oop(true), "live block should be an oop"); 8004 // Verify that the bit map has no bits marked between 8005 // addr and purported end of this block. 8006 size = CompactibleFreeListSpace::adjustObjectSize(oop(addr)->size()); 8007 assert(size >= 3, "Necessary for Printezis marks to work"); 8008 assert(!_bitMap->isMarked(addr+1), "Tautology for this control point"); 8009 DEBUG_ONLY(_bitMap->verifyNoOneBitsInRange(addr+2, addr+size);) 8010 } 8011 return size; 8012 } 8013 8014 void SweepClosure::doPostIsFreeOrGarbageChunk(FreeChunk* fc, 8015 size_t chunkSize) { 8016 // doPostIsFreeOrGarbageChunk() should only be called in the smart allocation 8017 // scheme. 8018 bool fcInFreeLists = fc->isFree(); 8019 assert(_sp->adaptive_freelists(), "Should only be used in this case."); 8020 assert((HeapWord*)fc <= _limit, "sweep invariant"); 8021 if (CMSTestInFreeList && fcInFreeLists) { 8022 assert(_sp->verifyChunkInFreeLists(fc), 8023 "free chunk is not in free lists"); 8024 } 8025 8026 8027 if (CMSTraceSweeper) { 8028 gclog_or_tty->print_cr(" -- pick up another chunk at 0x%x (%d)", fc, chunkSize); 8029 } 8030 8031 HeapWord* addr = (HeapWord*) fc; 8032 8033 bool coalesce; 8034 size_t left = pointer_delta(addr, freeFinger()); 8035 size_t right = chunkSize; 8036 switch (FLSCoalescePolicy) { 8037 // numeric value forms a coalition aggressiveness metric 8038 case 0: { // never coalesce 8039 coalesce = false; 8040 break; 8041 } 8042 case 1: { // coalesce if left & right chunks on overpopulated lists 8043 coalesce = _sp->coalOverPopulated(left) && 8044 _sp->coalOverPopulated(right); 8045 break; 8046 } 8047 case 2: { // coalesce if left chunk on overpopulated list (default) 8048 coalesce = _sp->coalOverPopulated(left); 8049 break; 8050 } 8051 case 3: { // coalesce if left OR right chunk on overpopulated list 8052 coalesce = _sp->coalOverPopulated(left) || 8053 _sp->coalOverPopulated(right); 8054 break; 8055 } 8056 case 4: { // always coalesce 8057 coalesce = true; 8058 break; 8059 } 8060 default: 8061 ShouldNotReachHere(); 8062 } 8063 8064 // Should the current free range be coalesced? 8065 // If the chunk is in a free range and either we decided to coalesce above 8066 // or the chunk is near the large block at the end of the heap 8067 // (isNearLargestChunk() returns true), then coalesce this chunk. 8068 bool doCoalesce = inFreeRange() && 8069 (coalesce || _g->isNearLargestChunk((HeapWord*)fc)); 8070 if (doCoalesce) { 8071 // Coalesce the current free range on the left with the new 8072 // chunk on the right. If either is on a free list, 8073 // it must be removed from the list and stashed in the closure. 8074 if (freeRangeInFreeLists()) { 8075 FreeChunk* ffc = (FreeChunk*)freeFinger(); 8076 assert(ffc->size() == pointer_delta(addr, freeFinger()), 8077 "Size of free range is inconsistent with chunk size."); 8078 if (CMSTestInFreeList) { 8079 assert(_sp->verifyChunkInFreeLists(ffc), 8080 "Chunk is not in free lists"); 8081 } 8082 _sp->coalDeath(ffc->size()); 8083 _sp->removeFreeChunkFromFreeLists(ffc); 8084 set_freeRangeInFreeLists(false); 8085 } 8086 if (fcInFreeLists) { 8087 _sp->coalDeath(chunkSize); 8088 assert(fc->size() == chunkSize, 8089 "The chunk has the wrong size or is not in the free lists"); 8090 _sp->removeFreeChunkFromFreeLists(fc); 8091 } 8092 set_lastFreeRangeCoalesced(true); 8093 } else { // not in a free range and/or should not coalesce 8094 // Return the current free range and start a new one. 8095 if (inFreeRange()) { 8096 // In a free range but cannot coalesce with the right hand chunk. 8097 // Put the current free range into the free lists. 8098 flushCurFreeChunk(freeFinger(), 8099 pointer_delta(addr, freeFinger())); 8100 } 8101 // Set up for new free range. Pass along whether the right hand 8102 // chunk is in the free lists. 8103 initialize_free_range((HeapWord*)fc, fcInFreeLists); 8104 } 8105 } 8106 void SweepClosure::flushCurFreeChunk(HeapWord* chunk, size_t size) { 8107 assert(inFreeRange(), "Should only be called if currently in a free range."); 8108 assert(size > 0, 8109 "A zero sized chunk cannot be added to the free lists."); 8110 if (!freeRangeInFreeLists()) { 8111 if(CMSTestInFreeList) { 8112 FreeChunk* fc = (FreeChunk*) chunk; 8113 fc->setSize(size); 8114 assert(!_sp->verifyChunkInFreeLists(fc), 8115 "chunk should not be in free lists yet"); 8116 } 8117 if (CMSTraceSweeper) { 8118 gclog_or_tty->print_cr(" -- add free block 0x%x (%d) to free lists", 8119 chunk, size); 8120 } 8121 // A new free range is going to be starting. The current 8122 // free range has not been added to the free lists yet or 8123 // was removed so add it back. 8124 // If the current free range was coalesced, then the death 8125 // of the free range was recorded. Record a birth now. 8126 if (lastFreeRangeCoalesced()) { 8127 _sp->coalBirth(size); 8128 } 8129 _sp->addChunkAndRepairOffsetTable(chunk, size, 8130 lastFreeRangeCoalesced()); 8131 } 8132 set_inFreeRange(false); 8133 set_freeRangeInFreeLists(false); 8134 } 8135 8136 // We take a break if we've been at this for a while, 8137 // so as to avoid monopolizing the locks involved. 8138 void SweepClosure::do_yield_work(HeapWord* addr) { 8139 // Return current free chunk being used for coalescing (if any) 8140 // to the appropriate freelist. After yielding, the next 8141 // free block encountered will start a coalescing range of 8142 // free blocks. If the next free block is adjacent to the 8143 // chunk just flushed, they will need to wait for the next 8144 // sweep to be coalesced. 8145 if (inFreeRange()) { 8146 flushCurFreeChunk(freeFinger(), pointer_delta(addr, freeFinger())); 8147 } 8148 8149 // First give up the locks, then yield, then re-lock. 8150 // We should probably use a constructor/destructor idiom to 8151 // do this unlock/lock or modify the MutexUnlocker class to 8152 // serve our purpose. XXX 8153 assert_lock_strong(_bitMap->lock()); 8154 assert_lock_strong(_freelistLock); 8155 assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(), 8156 "CMS thread should hold CMS token"); 8157 _bitMap->lock()->unlock(); 8158 _freelistLock->unlock(); 8159 ConcurrentMarkSweepThread::desynchronize(true); 8160 ConcurrentMarkSweepThread::acknowledge_yield_request(); 8161 _collector->stopTimer(); 8162 GCPauseTimer p(_collector->size_policy()->concurrent_timer_ptr()); 8163 if (PrintCMSStatistics != 0) { 8164 _collector->incrementYields(); 8165 } 8166 _collector->icms_wait(); 8167 8168 // See the comment in coordinator_yield() 8169 for (unsigned i = 0; i < CMSYieldSleepCount && 8170 ConcurrentMarkSweepThread::should_yield() && 8171 !CMSCollector::foregroundGCIsActive(); ++i) { 8172 os::sleep(Thread::current(), 1, false); 8173 ConcurrentMarkSweepThread::acknowledge_yield_request(); 8174 } 8175 8176 ConcurrentMarkSweepThread::synchronize(true); 8177 _freelistLock->lock(); 8178 _bitMap->lock()->lock_without_safepoint_check(); 8179 _collector->startTimer(); 8180 } 8181 8182 #ifndef PRODUCT 8183 // This is actually very useful in a product build if it can 8184 // be called from the debugger. Compile it into the product 8185 // as needed. 8186 bool debug_verifyChunkInFreeLists(FreeChunk* fc) { 8187 return debug_cms_space->verifyChunkInFreeLists(fc); 8188 } 8189 8190 void SweepClosure::record_free_block_coalesced(FreeChunk* fc) const { 8191 if (CMSTraceSweeper) { 8192 gclog_or_tty->print("Sweep:coal_free_blk 0x%x (%d)\n", fc, fc->size()); 8193 } 8194 } 8195 #endif 8196 8197 // CMSIsAliveClosure 8198 bool CMSIsAliveClosure::do_object_b(oop obj) { 8199 HeapWord* addr = (HeapWord*)obj; 8200 return addr != NULL && 8201 (!_span.contains(addr) || _bit_map->isMarked(addr)); 8202 } 8203 8204 // CMSKeepAliveClosure: the serial version 8205 void CMSKeepAliveClosure::do_oop(oop* p) { 8206 oop this_oop = *p; 8207 HeapWord* addr = (HeapWord*)this_oop; 8208 if (_span.contains(addr) && 8209 !_bit_map->isMarked(addr)) { 8210 _bit_map->mark(addr); 8211 bool simulate_overflow = false; 8212 NOT_PRODUCT( 8213 if (CMSMarkStackOverflowALot && 8214 _collector->simulate_overflow()) { 8215 // simulate a stack overflow 8216 simulate_overflow = true; 8217 } 8218 ) 8219 if (simulate_overflow || !_mark_stack->push(this_oop)) { 8220 _collector->push_on_overflow_list(this_oop); 8221 _collector->_ser_kac_ovflw++; 8222 } 8223 } 8224 } 8225 8226 // CMSParKeepAliveClosure: a parallel version of the above. 8227 // The work queues are private to each closure (thread), 8228 // but (may be) available for stealing by other threads. 8229 void CMSParKeepAliveClosure::do_oop(oop* p) { 8230 oop this_oop = *p; 8231 HeapWord* addr = (HeapWord*)this_oop; 8232 if (_span.contains(addr) && 8233 !_bit_map->isMarked(addr)) { 8234 // In general, during recursive tracing, several threads 8235 // may be concurrently getting here; the first one to 8236 // "tag" it, claims it. 8237 if (_bit_map->par_mark(addr)) { 8238 bool res = _work_queue->push(this_oop); 8239 assert(res, "Low water mark should be much less than capacity"); 8240 // Do a recursive trim in the hope that this will keep 8241 // stack usage lower, but leave some oops for potential stealers 8242 trim_queue(_low_water_mark); 8243 } // Else, another thread got there first 8244 } 8245 } 8246 8247 void CMSParKeepAliveClosure::trim_queue(uint max) { 8248 while (_work_queue->size() > max) { 8249 oop new_oop; 8250 if (_work_queue->pop_local(new_oop)) { 8251 assert(new_oop != NULL && new_oop->is_oop(), "Expected an oop"); 8252 assert(_bit_map->isMarked((HeapWord*)new_oop), 8253 "no white objects on this stack!"); 8254 assert(_span.contains((HeapWord*)new_oop), "Out of bounds oop"); 8255 // iterate over the oops in this oop, marking and pushing 8256 // the ones in CMS heap (i.e. in _span). 8257 new_oop->oop_iterate(&_mark_and_push); 8258 } 8259 } 8260 } 8261 8262 void CMSInnerParMarkAndPushClosure::do_oop(oop* p) { 8263 oop this_oop = *p; 8264 HeapWord* addr = (HeapWord*)this_oop; 8265 if (_span.contains(addr) && 8266 !_bit_map->isMarked(addr)) { 8267 if (_bit_map->par_mark(addr)) { 8268 bool simulate_overflow = false; 8269 NOT_PRODUCT( 8270 if (CMSMarkStackOverflowALot && 8271 _collector->par_simulate_overflow()) { 8272 // simulate a stack overflow 8273 simulate_overflow = true; 8274 } 8275 ) 8276 if (simulate_overflow || !_work_queue->push(this_oop)) { 8277 _collector->par_push_on_overflow_list(this_oop); 8278 _collector->_par_kac_ovflw++; 8279 } 8280 } // Else another thread got there already 8281 } 8282 } 8283 8284 ////////////////////////////////////////////////////////////////// 8285 // CMSExpansionCause ///////////////////////////// 8286 ////////////////////////////////////////////////////////////////// 8287 const char* CMSExpansionCause::to_string(CMSExpansionCause::Cause cause) { 8288 switch (cause) { 8289 case _no_expansion: 8290 return "No expansion"; 8291 case _satisfy_free_ratio: 8292 return "Free ratio"; 8293 case _satisfy_promotion: 8294 return "Satisfy promotion"; 8295 case _satisfy_allocation: 8296 return "allocation"; 8297 case _allocate_par_lab: 8298 return "Par LAB"; 8299 case _allocate_par_spooling_space: 8300 return "Par Spooling Space"; 8301 case _adaptive_size_policy: 8302 return "Ergonomics"; 8303 default: 8304 return "unknown"; 8305 } 8306 } 8307 8308 void CMSDrainMarkingStackClosure::do_void() { 8309 // the max number to take from overflow list at a time 8310 const size_t num = _mark_stack->capacity()/4; 8311 while (!_mark_stack->isEmpty() || 8312 // if stack is empty, check the overflow list 8313 _collector->take_from_overflow_list(num, _mark_stack)) { 8314 oop this_oop = _mark_stack->pop(); 8315 HeapWord* addr = (HeapWord*)this_oop; 8316 assert(_span.contains(addr), "Should be within span"); 8317 assert(_bit_map->isMarked(addr), "Should be marked"); 8318 assert(this_oop->is_oop(), "Should be an oop"); 8319 this_oop->oop_iterate(_keep_alive); 8320 } 8321 } 8322 8323 void CMSParDrainMarkingStackClosure::do_void() { 8324 // drain queue 8325 trim_queue(0); 8326 } 8327 8328 // Trim our work_queue so its length is below max at return 8329 void CMSParDrainMarkingStackClosure::trim_queue(uint max) { 8330 while (_work_queue->size() > max) { 8331 oop new_oop; 8332 if (_work_queue->pop_local(new_oop)) { 8333 assert(new_oop->is_oop(), "Expected an oop"); 8334 assert(_bit_map->isMarked((HeapWord*)new_oop), 8335 "no white objects on this stack!"); 8336 assert(_span.contains((HeapWord*)new_oop), "Out of bounds oop"); 8337 // iterate over the oops in this oop, marking and pushing 8338 // the ones in CMS heap (i.e. in _span). 8339 new_oop->oop_iterate(&_mark_and_push); 8340 } 8341 } 8342 } 8343 8344 //////////////////////////////////////////////////////////////////// 8345 // Support for Marking Stack Overflow list handling and related code 8346 //////////////////////////////////////////////////////////////////// 8347 // Much of the following code is similar in shape and spirit to the 8348 // code used in ParNewGC. We should try and share that code 8349 // as much as possible in the future. 8350 8351 #ifndef PRODUCT 8352 // Debugging support for CMSStackOverflowALot 8353 8354 // It's OK to call this multi-threaded; the worst thing 8355 // that can happen is that we'll get a bunch of closely 8356 // spaced simulated oveflows, but that's OK, in fact 8357 // probably good as it would exercise the overflow code 8358 // under contention. 8359 bool CMSCollector::simulate_overflow() { 8360 if (_overflow_counter-- <= 0) { // just being defensive 8361 _overflow_counter = CMSMarkStackOverflowInterval; 8362 return true; 8363 } else { 8364 return false; 8365 } 8366 } 8367 8368 bool CMSCollector::par_simulate_overflow() { 8369 return simulate_overflow(); 8370 } 8371 #endif 8372 8373 // Single-threaded 8374 bool CMSCollector::take_from_overflow_list(size_t num, CMSMarkStack* stack) { 8375 assert(stack->isEmpty(), "Expected precondition"); 8376 assert(stack->capacity() > num, "Shouldn't bite more than can chew"); 8377 size_t i = num; 8378 oop cur = _overflow_list; 8379 const markOop proto = markOopDesc::prototype(); 8380 NOT_PRODUCT(size_t n = 0;) 8381 for (oop next; i > 0 && cur != NULL; cur = next, i--) { 8382 next = oop(cur->mark()); 8383 cur->set_mark(proto); // until proven otherwise 8384 assert(cur->is_oop(), "Should be an oop"); 8385 bool res = stack->push(cur); 8386 assert(res, "Bit off more than can chew?"); 8387 NOT_PRODUCT(n++;) 8388 } 8389 _overflow_list = cur; 8390 #ifndef PRODUCT 8391 assert(_num_par_pushes >= n, "Too many pops?"); 8392 _num_par_pushes -=n; 8393 #endif 8394 return !stack->isEmpty(); 8395 } 8396 8397 // Multi-threaded; use CAS to break off a prefix 8398 bool CMSCollector::par_take_from_overflow_list(size_t num, 8399 OopTaskQueue* work_q) { 8400 assert(work_q->size() == 0, "That's the current policy"); 8401 assert(num < work_q->max_elems(), "Can't bite more than we can chew"); 8402 if (_overflow_list == NULL) { 8403 return false; 8404 } 8405 // Grab the entire list; we'll put back a suffix 8406 oop prefix = (oop)Atomic::xchg_ptr(NULL, &_overflow_list); 8407 if (prefix == NULL) { // someone grabbed it before we did ... 8408 // ... we could spin for a short while, but for now we don't 8409 return false; 8410 } 8411 size_t i = num; 8412 oop cur = prefix; 8413 for (; i > 1 && cur->mark() != NULL; cur = oop(cur->mark()), i--); 8414 if (cur->mark() != NULL) { 8415 oop suffix_head = cur->mark(); // suffix will be put back on global list 8416 cur->set_mark(NULL); // break off suffix 8417 // Find tail of suffix so we can prepend suffix to global list 8418 for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark())); 8419 oop suffix_tail = cur; 8420 assert(suffix_tail != NULL && suffix_tail->mark() == NULL, 8421 "Tautology"); 8422 oop observed_overflow_list = _overflow_list; 8423 do { 8424 cur = observed_overflow_list; 8425 suffix_tail->set_mark(markOop(cur)); 8426 observed_overflow_list = 8427 (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur); 8428 } while (cur != observed_overflow_list); 8429 } 8430 8431 // Push the prefix elements on work_q 8432 assert(prefix != NULL, "control point invariant"); 8433 const markOop proto = markOopDesc::prototype(); 8434 oop next; 8435 NOT_PRODUCT(size_t n = 0;) 8436 for (cur = prefix; cur != NULL; cur = next) { 8437 next = oop(cur->mark()); 8438 cur->set_mark(proto); // until proven otherwise 8439 assert(cur->is_oop(), "Should be an oop"); 8440 bool res = work_q->push(cur); 8441 assert(res, "Bit off more than we can chew?"); 8442 NOT_PRODUCT(n++;) 8443 } 8444 #ifndef PRODUCT 8445 assert(_num_par_pushes >= n, "Too many pops?"); 8446 Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes); 8447 #endif 8448 return true; 8449 } 8450 8451 // Single-threaded 8452 void CMSCollector::push_on_overflow_list(oop p) { 8453 NOT_PRODUCT(_num_par_pushes++;) 8454 assert(p->is_oop(), "Not an oop"); 8455 preserve_mark_if_necessary(p); 8456 p->set_mark((markOop)_overflow_list); 8457 _overflow_list = p; 8458 } 8459 8460 // Multi-threaded; use CAS to prepend to overflow list 8461 void CMSCollector::par_push_on_overflow_list(oop p) { 8462 NOT_PRODUCT(Atomic::inc_ptr(&_num_par_pushes);) 8463 assert(p->is_oop(), "Not an oop"); 8464 par_preserve_mark_if_necessary(p); 8465 oop observed_overflow_list = _overflow_list; 8466 oop cur_overflow_list; 8467 do { 8468 cur_overflow_list = observed_overflow_list; 8469 p->set_mark(markOop(cur_overflow_list)); 8470 observed_overflow_list = 8471 (oop) Atomic::cmpxchg_ptr(p, &_overflow_list, cur_overflow_list); 8472 } while (cur_overflow_list != observed_overflow_list); 8473 } 8474 8475 // Single threaded 8476 // General Note on GrowableArray: pushes may silently fail 8477 // because we are (temporarily) out of C-heap for expanding 8478 // the stack. The problem is quite ubiquitous and affects 8479 // a lot of code in the JVM. The prudent thing for GrowableArray 8480 // to do (for now) is to exit with an error. However, that may 8481 // be too draconian in some cases because the caller may be 8482 // able to recover without much harm. For suych cases, we 8483 // should probably introduce a "soft_push" method which returns 8484 // an indication of success or failure with the assumption that 8485 // the caller may be able to recover from a failure; code in 8486 // the VM can then be changed, incrementally, to deal with such 8487 // failures where possible, thus, incrementally hardening the VM 8488 // in such low resource situations. 8489 void CMSCollector::preserve_mark_work(oop p, markOop m) { 8490 int PreserveMarkStackSize = 128; 8491 8492 if (_preserved_oop_stack == NULL) { 8493 assert(_preserved_mark_stack == NULL, 8494 "bijection with preserved_oop_stack"); 8495 // Allocate the stacks 8496 _preserved_oop_stack = new (ResourceObj::C_HEAP) 8497 GrowableArray<oop>(PreserveMarkStackSize, true); 8498 _preserved_mark_stack = new (ResourceObj::C_HEAP) 8499 GrowableArray<markOop>(PreserveMarkStackSize, true); 8500 if (_preserved_oop_stack == NULL || _preserved_mark_stack == NULL) { 8501 vm_exit_out_of_memory(2* PreserveMarkStackSize * sizeof(oop) /* punt */, 8502 "Preserved Mark/Oop Stack for CMS (C-heap)"); 8503 } 8504 } 8505 _preserved_oop_stack->push(p); 8506 _preserved_mark_stack->push(m); 8507 assert(m == p->mark(), "Mark word changed"); 8508 assert(_preserved_oop_stack->length() == _preserved_mark_stack->length(), 8509 "bijection"); 8510 } 8511 8512 // Single threaded 8513 void CMSCollector::preserve_mark_if_necessary(oop p) { 8514 markOop m = p->mark(); 8515 if (m->must_be_preserved(p)) { 8516 preserve_mark_work(p, m); 8517 } 8518 } 8519 8520 void CMSCollector::par_preserve_mark_if_necessary(oop p) { 8521 markOop m = p->mark(); 8522 if (m->must_be_preserved(p)) { 8523 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 8524 // Even though we read the mark word without holding 8525 // the lock, we are assured that it will not change 8526 // because we "own" this oop, so no other thread can 8527 // be trying to push it on the overflow list; see 8528 // the assertion in preserve_mark_work() that checks 8529 // that m == p->mark(). 8530 preserve_mark_work(p, m); 8531 } 8532 } 8533 8534 // We should be able to do this multi-threaded, 8535 // a chunk of stack being a task (this is 8536 // correct because each oop only ever appears 8537 // once in the overflow list. However, it's 8538 // not very easy to completely overlap this with 8539 // other operations, so will generally not be done 8540 // until all work's been completed. Because we 8541 // expect the preserved oop stack (set) to be small, 8542 // it's probably fine to do this single-threaded. 8543 // We can explore cleverer concurrent/overlapped/parallel 8544 // processing of preserved marks if we feel the 8545 // need for this in the future. Stack overflow should 8546 // be so rare in practice and, when it happens, its 8547 // effect on performance so great that this will 8548 // likely just be in the noise anyway. 8549 void CMSCollector::restore_preserved_marks_if_any() { 8550 if (_preserved_oop_stack == NULL) { 8551 assert(_preserved_mark_stack == NULL, 8552 "bijection with preserved_oop_stack"); 8553 return; 8554 } 8555 8556 assert(SafepointSynchronize::is_at_safepoint(), 8557 "world should be stopped"); 8558 assert(Thread::current()->is_ConcurrentGC_thread() || 8559 Thread::current()->is_VM_thread(), 8560 "should be single-threaded"); 8561 8562 int length = _preserved_oop_stack->length(); 8563 assert(_preserved_mark_stack->length() == length, "bijection"); 8564 for (int i = 0; i < length; i++) { 8565 oop p = _preserved_oop_stack->at(i); 8566 assert(p->is_oop(), "Should be an oop"); 8567 assert(_span.contains(p), "oop should be in _span"); 8568 assert(p->mark() == markOopDesc::prototype(), 8569 "Set when taken from overflow list"); 8570 markOop m = _preserved_mark_stack->at(i); 8571 p->set_mark(m); 8572 } 8573 _preserved_mark_stack->clear(); 8574 _preserved_oop_stack->clear(); 8575 assert(_preserved_mark_stack->is_empty() && 8576 _preserved_oop_stack->is_empty(), 8577 "stacks were cleared above"); 8578 } 8579 8580 #ifndef PRODUCT 8581 bool CMSCollector::no_preserved_marks() const { 8582 return ( ( _preserved_mark_stack == NULL 8583 && _preserved_oop_stack == NULL) 8584 || ( _preserved_mark_stack->is_empty() 8585 && _preserved_oop_stack->is_empty())); 8586 } 8587 #endif 8588 8589 CMSAdaptiveSizePolicy* ASConcurrentMarkSweepGeneration::cms_size_policy() const 8590 { 8591 GenCollectedHeap* gch = (GenCollectedHeap*) GenCollectedHeap::heap(); 8592 CMSAdaptiveSizePolicy* size_policy = 8593 (CMSAdaptiveSizePolicy*) gch->gen_policy()->size_policy(); 8594 assert(size_policy->is_gc_cms_adaptive_size_policy(), 8595 "Wrong type for size policy"); 8596 return size_policy; 8597 } 8598 8599 void ASConcurrentMarkSweepGeneration::resize(size_t cur_promo_size, 8600 size_t desired_promo_size) { 8601 if (cur_promo_size < desired_promo_size) { 8602 size_t expand_bytes = desired_promo_size - cur_promo_size; 8603 if (PrintAdaptiveSizePolicy && Verbose) { 8604 gclog_or_tty->print_cr(" ASConcurrentMarkSweepGeneration::resize " 8605 "Expanding tenured generation by " SIZE_FORMAT " (bytes)", 8606 expand_bytes); 8607 } 8608 expand(expand_bytes, 8609 MinHeapDeltaBytes, 8610 CMSExpansionCause::_adaptive_size_policy); 8611 } else if (desired_promo_size < cur_promo_size) { 8612 size_t shrink_bytes = cur_promo_size - desired_promo_size; 8613 if (PrintAdaptiveSizePolicy && Verbose) { 8614 gclog_or_tty->print_cr(" ASConcurrentMarkSweepGeneration::resize " 8615 "Shrinking tenured generation by " SIZE_FORMAT " (bytes)", 8616 shrink_bytes); 8617 } 8618 shrink(shrink_bytes); 8619 } 8620 } 8621 8622 CMSGCAdaptivePolicyCounters* ASConcurrentMarkSweepGeneration::gc_adaptive_policy_counters() { 8623 GenCollectedHeap* gch = GenCollectedHeap::heap(); 8624 CMSGCAdaptivePolicyCounters* counters = 8625 (CMSGCAdaptivePolicyCounters*) gch->collector_policy()->counters(); 8626 assert(counters->kind() == GCPolicyCounters::CMSGCAdaptivePolicyCountersKind, 8627 "Wrong kind of counters"); 8628 return counters; 8629 } 8630 8631 8632 void ASConcurrentMarkSweepGeneration::update_counters() { 8633 if (UsePerfData) { 8634 _space_counters->update_all(); 8635 _gen_counters->update_all(); 8636 CMSGCAdaptivePolicyCounters* counters = gc_adaptive_policy_counters(); 8637 GenCollectedHeap* gch = GenCollectedHeap::heap(); 8638 CMSGCStats* gc_stats_l = (CMSGCStats*) gc_stats(); 8639 assert(gc_stats_l->kind() == GCStats::CMSGCStatsKind, 8640 "Wrong gc statistics type"); 8641 counters->update_counters(gc_stats_l); 8642 } 8643 } 8644 8645 void ASConcurrentMarkSweepGeneration::update_counters(size_t used) { 8646 if (UsePerfData) { 8647 _space_counters->update_used(used); 8648 _space_counters->update_capacity(); 8649 _gen_counters->update_all(); 8650 8651 CMSGCAdaptivePolicyCounters* counters = gc_adaptive_policy_counters(); 8652 GenCollectedHeap* gch = GenCollectedHeap::heap(); 8653 CMSGCStats* gc_stats_l = (CMSGCStats*) gc_stats(); 8654 assert(gc_stats_l->kind() == GCStats::CMSGCStatsKind, 8655 "Wrong gc statistics type"); 8656 counters->update_counters(gc_stats_l); 8657 } 8658 } 8659 8660 // The desired expansion delta is computed so that: 8661 // . desired free percentage or greater is used 8662 void ASConcurrentMarkSweepGeneration::compute_new_size() { 8663 assert_locked_or_safepoint(Heap_lock); 8664 8665 GenCollectedHeap* gch = (GenCollectedHeap*) GenCollectedHeap::heap(); 8666 8667 // If incremental collection failed, we just want to expand 8668 // to the limit. 8669 if (incremental_collection_failed()) { 8670 clear_incremental_collection_failed(); 8671 grow_to_reserved(); 8672 return; 8673 } 8674 8675 assert(UseAdaptiveSizePolicy, "Should be using adaptive sizing"); 8676 8677 assert(gch->kind() == CollectedHeap::GenCollectedHeap, 8678 "Wrong type of heap"); 8679 int prev_level = level() - 1; 8680 assert(prev_level >= 0, "The cms generation is the lowest generation"); 8681 Generation* prev_gen = gch->get_gen(prev_level); 8682 assert(prev_gen->kind() == Generation::ASParNew, 8683 "Wrong type of young generation"); 8684 ParNewGeneration* younger_gen = (ParNewGeneration*) prev_gen; 8685 size_t cur_eden = younger_gen->eden()->capacity(); 8686 CMSAdaptiveSizePolicy* size_policy = cms_size_policy(); 8687 size_t cur_promo = free(); 8688 size_policy->compute_tenured_generation_free_space(cur_promo, 8689 max_available(), 8690 cur_eden); 8691 resize(cur_promo, size_policy->promo_size()); 8692 8693 // Record the new size of the space in the cms generation 8694 // that is available for promotions. This is temporary. 8695 // It should be the desired promo size. 8696 size_policy->avg_cms_promo()->sample(free()); 8697 size_policy->avg_old_live()->sample(used()); 8698 8699 if (UsePerfData) { 8700 CMSGCAdaptivePolicyCounters* counters = gc_adaptive_policy_counters(); 8701 counters->update_cms_capacity_counter(capacity()); 8702 } 8703 } 8704 8705 void ASConcurrentMarkSweepGeneration::shrink_by(size_t desired_bytes) { 8706 assert_locked_or_safepoint(Heap_lock); 8707 assert_lock_strong(freelistLock()); 8708 HeapWord* old_end = _cmsSpace->end(); 8709 HeapWord* unallocated_start = _cmsSpace->unallocated_block(); 8710 assert(old_end >= unallocated_start, "Miscalculation of unallocated_start"); 8711 FreeChunk* chunk_at_end = find_chunk_at_end(); 8712 if (chunk_at_end == NULL) { 8713 // No room to shrink 8714 if (PrintGCDetails && Verbose) { 8715 gclog_or_tty->print_cr("No room to shrink: old_end " 8716 PTR_FORMAT " unallocated_start " PTR_FORMAT 8717 " chunk_at_end " PTR_FORMAT, 8718 old_end, unallocated_start, chunk_at_end); 8719 } 8720 return; 8721 } else { 8722 8723 // Find the chunk at the end of the space and determine 8724 // how much it can be shrunk. 8725 size_t shrinkable_size_in_bytes = chunk_at_end->size(); 8726 size_t aligned_shrinkable_size_in_bytes = 8727 align_size_down(shrinkable_size_in_bytes, os::vm_page_size()); 8728 assert(unallocated_start <= chunk_at_end->end(), 8729 "Inconsistent chunk at end of space"); 8730 size_t bytes = MIN2(desired_bytes, aligned_shrinkable_size_in_bytes); 8731 size_t word_size_before = heap_word_size(_virtual_space.committed_size()); 8732 8733 // Shrink the underlying space 8734 _virtual_space.shrink_by(bytes); 8735 if (PrintGCDetails && Verbose) { 8736 gclog_or_tty->print_cr("ConcurrentMarkSweepGeneration::shrink_by:" 8737 " desired_bytes " SIZE_FORMAT 8738 " shrinkable_size_in_bytes " SIZE_FORMAT 8739 " aligned_shrinkable_size_in_bytes " SIZE_FORMAT 8740 " bytes " SIZE_FORMAT, 8741 desired_bytes, shrinkable_size_in_bytes, 8742 aligned_shrinkable_size_in_bytes, bytes); 8743 gclog_or_tty->print_cr(" old_end " SIZE_FORMAT 8744 " unallocated_start " SIZE_FORMAT, 8745 old_end, unallocated_start); 8746 } 8747 8748 // If the space did shrink (shrinking is not guaranteed), 8749 // shrink the chunk at the end by the appropriate amount. 8750 if (((HeapWord*)_virtual_space.high()) < old_end) { 8751 size_t new_word_size = 8752 heap_word_size(_virtual_space.committed_size()); 8753 8754 // Have to remove the chunk from the dictionary because it is changing 8755 // size and might be someplace elsewhere in the dictionary. 8756 8757 // Get the chunk at end, shrink it, and put it 8758 // back. 8759 _cmsSpace->removeChunkFromDictionary(chunk_at_end); 8760 size_t word_size_change = word_size_before - new_word_size; 8761 size_t chunk_at_end_old_size = chunk_at_end->size(); 8762 assert(chunk_at_end_old_size >= word_size_change, 8763 "Shrink is too large"); 8764 chunk_at_end->setSize(chunk_at_end_old_size - 8765 word_size_change); 8766 _cmsSpace->freed((HeapWord*) chunk_at_end->end(), 8767 word_size_change); 8768 8769 _cmsSpace->returnChunkToDictionary(chunk_at_end); 8770 8771 MemRegion mr(_cmsSpace->bottom(), new_word_size); 8772 _bts->resize(new_word_size); // resize the block offset shared array 8773 Universe::heap()->barrier_set()->resize_covered_region(mr); 8774 _cmsSpace->assert_locked(); 8775 _cmsSpace->set_end((HeapWord*)_virtual_space.high()); 8776 8777 NOT_PRODUCT(_cmsSpace->dictionary()->verify()); 8778 8779 // update the space and generation capacity counters 8780 if (UsePerfData) { 8781 _space_counters->update_capacity(); 8782 _gen_counters->update_all(); 8783 } 8784 8785 if (Verbose && PrintGCDetails) { 8786 size_t new_mem_size = _virtual_space.committed_size(); 8787 size_t old_mem_size = new_mem_size + bytes; 8788 gclog_or_tty->print_cr("Shrinking %s from %ldK by %ldK to %ldK", 8789 name(), old_mem_size/K, bytes/K, new_mem_size/K); 8790 } 8791 } 8792 8793 assert(_cmsSpace->unallocated_block() <= _cmsSpace->end(), 8794 "Inconsistency at end of space"); 8795 assert(chunk_at_end->end() == _cmsSpace->end(), 8796 "Shrinking is inconsistent"); 8797 return; 8798 } 8799 } 8800 8801 // Transfer some number of overflown objects to usual marking 8802 // stack. Return true if some objects were transferred. 8803 bool MarkRefsIntoAndScanClosure::take_from_overflow_list() { 8804 size_t num = MIN2((size_t)_mark_stack->capacity()/4, 8805 (size_t)ParGCDesiredObjsFromOverflowList); 8806 8807 bool res = _collector->take_from_overflow_list(num, _mark_stack); 8808 assert(_collector->overflow_list_is_empty() || res, 8809 "If list is not empty, we should have taken something"); 8810 assert(!res || !_mark_stack->isEmpty(), 8811 "If we took something, it should now be on our stack"); 8812 return res; 8813 } 8814 8815 size_t MarkDeadObjectsClosure::do_blk(HeapWord* addr) { 8816 size_t res = _sp->block_size_no_stall(addr, _collector); 8817 assert(res != 0, "Should always be able to compute a size"); 8818 if (_sp->block_is_obj(addr)) { 8819 if (_live_bit_map->isMarked(addr)) { 8820 // It can't have been dead in a previous cycle 8821 guarantee(!_dead_bit_map->isMarked(addr), "No resurrection!"); 8822 } else { 8823 _dead_bit_map->mark(addr); // mark the dead object 8824 } 8825 } 8826 return res; 8827 }