1 /* 2 * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "code/codeCache.hpp" 28 #include "gc_implementation/g1/concurrentMark.inline.hpp" 29 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 30 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 31 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 32 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 33 #include "gc_implementation/g1/g1Log.hpp" 34 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 35 #include "gc_implementation/g1/g1RemSet.hpp" 36 #include "gc_implementation/g1/heapRegion.inline.hpp" 37 #include "gc_implementation/g1/heapRegionRemSet.hpp" 38 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 39 #include "gc_implementation/shared/vmGCOperations.hpp" 40 #include "gc_implementation/shared/gcTimer.hpp" 41 #include "gc_implementation/shared/gcTrace.hpp" 42 #include "gc_implementation/shared/gcTraceTime.hpp" 43 #include "memory/allocation.hpp" 44 #include "memory/genOopClosures.inline.hpp" 45 #include "memory/referencePolicy.hpp" 46 #include "memory/resourceArea.hpp" 47 #include "oops/oop.inline.hpp" 48 #include "runtime/handles.inline.hpp" 49 #include "runtime/java.hpp" 50 #include "runtime/atomic.inline.hpp" 51 #include "runtime/prefetch.inline.hpp" 52 #include "services/memTracker.hpp" 53 54 // Concurrent marking bit map wrapper 55 56 CMBitMapRO::CMBitMapRO(int shifter) : 57 _bm(), 58 _shifter(shifter) { 59 _bmStartWord = 0; 60 _bmWordSize = 0; 61 } 62 63 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 64 const HeapWord* limit) const { 65 // First we must round addr *up* to a possible object boundary. 66 addr = (HeapWord*)align_size_up((intptr_t)addr, 67 HeapWordSize << _shifter); 68 size_t addrOffset = heapWordToOffset(addr); 69 if (limit == NULL) { 70 limit = _bmStartWord + _bmWordSize; 71 } 72 size_t limitOffset = heapWordToOffset(limit); 73 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 74 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 75 assert(nextAddr >= addr, "get_next_one postcondition"); 76 assert(nextAddr == limit || isMarked(nextAddr), 77 "get_next_one postcondition"); 78 return nextAddr; 79 } 80 81 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr, 82 const HeapWord* limit) const { 83 size_t addrOffset = heapWordToOffset(addr); 84 if (limit == NULL) { 85 limit = _bmStartWord + _bmWordSize; 86 } 87 size_t limitOffset = heapWordToOffset(limit); 88 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 89 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 90 assert(nextAddr >= addr, "get_next_one postcondition"); 91 assert(nextAddr == limit || !isMarked(nextAddr), 92 "get_next_one postcondition"); 93 return nextAddr; 94 } 95 96 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 97 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 98 return (int) (diff >> _shifter); 99 } 100 101 #ifndef PRODUCT 102 bool CMBitMapRO::covers(ReservedSpace heap_rs) const { 103 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 104 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 105 "size inconsistency"); 106 return _bmStartWord == (HeapWord*)(heap_rs.base()) && 107 _bmWordSize == heap_rs.size()>>LogHeapWordSize; 108 } 109 #endif 110 111 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 112 _bm.print_on_error(st, prefix); 113 } 114 115 bool CMBitMap::allocate(ReservedSpace heap_rs) { 116 _bmStartWord = (HeapWord*)(heap_rs.base()); 117 _bmWordSize = heap_rs.size()/HeapWordSize; // heap_rs.size() is in bytes 118 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 119 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 120 if (!brs.is_reserved()) { 121 warning("ConcurrentMark marking bit map allocation failure"); 122 return false; 123 } 124 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC); 125 // For now we'll just commit all of the bit map up front. 126 // Later on we'll try to be more parsimonious with swap. 127 if (!_virtual_space.initialize(brs, brs.size())) { 128 warning("ConcurrentMark marking bit map backing store failure"); 129 return false; 130 } 131 assert(_virtual_space.committed_size() == brs.size(), 132 "didn't reserve backing store for all of concurrent marking bit map?"); 133 _bm.set_map((BitMap::bm_word_t*)_virtual_space.low()); 134 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 135 _bmWordSize, "inconsistency in bit map sizing"); 136 _bm.set_size(_bmWordSize >> _shifter); 137 return true; 138 } 139 140 void CMBitMap::clearAll() { 141 _bm.clear(); 142 return; 143 } 144 145 void CMBitMap::markRange(MemRegion mr) { 146 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 147 assert(!mr.is_empty(), "unexpected empty region"); 148 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 149 ((HeapWord *) mr.end())), 150 "markRange memory region end is not card aligned"); 151 // convert address range into offset range 152 _bm.at_put_range(heapWordToOffset(mr.start()), 153 heapWordToOffset(mr.end()), true); 154 } 155 156 void CMBitMap::clearRange(MemRegion mr) { 157 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 158 assert(!mr.is_empty(), "unexpected empty region"); 159 // convert address range into offset range 160 _bm.at_put_range(heapWordToOffset(mr.start()), 161 heapWordToOffset(mr.end()), false); 162 } 163 164 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 165 HeapWord* end_addr) { 166 HeapWord* start = getNextMarkedWordAddress(addr); 167 start = MIN2(start, end_addr); 168 HeapWord* end = getNextUnmarkedWordAddress(start); 169 end = MIN2(end, end_addr); 170 assert(start <= end, "Consistency check"); 171 MemRegion mr(start, end); 172 if (!mr.is_empty()) { 173 clearRange(mr); 174 } 175 return mr; 176 } 177 178 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 179 _base(NULL), _cm(cm) 180 #ifdef ASSERT 181 , _drain_in_progress(false) 182 , _drain_in_progress_yields(false) 183 #endif 184 {} 185 186 bool CMMarkStack::allocate(size_t capacity) { 187 // allocate a stack of the requisite depth 188 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 189 if (!rs.is_reserved()) { 190 warning("ConcurrentMark MarkStack allocation failure"); 191 return false; 192 } 193 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 194 if (!_virtual_space.initialize(rs, rs.size())) { 195 warning("ConcurrentMark MarkStack backing store failure"); 196 // Release the virtual memory reserved for the marking stack 197 rs.release(); 198 return false; 199 } 200 assert(_virtual_space.committed_size() == rs.size(), 201 "Didn't reserve backing store for all of ConcurrentMark stack?"); 202 _base = (oop*) _virtual_space.low(); 203 setEmpty(); 204 _capacity = (jint) capacity; 205 _saved_index = -1; 206 _should_expand = false; 207 NOT_PRODUCT(_max_depth = 0); 208 return true; 209 } 210 211 void CMMarkStack::expand() { 212 // Called, during remark, if we've overflown the marking stack during marking. 213 assert(isEmpty(), "stack should been emptied while handling overflow"); 214 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 215 // Clear expansion flag 216 _should_expand = false; 217 if (_capacity == (jint) MarkStackSizeMax) { 218 if (PrintGCDetails && Verbose) { 219 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 220 } 221 return; 222 } 223 // Double capacity if possible 224 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 225 // Do not give up existing stack until we have managed to 226 // get the double capacity that we desired. 227 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 228 sizeof(oop))); 229 if (rs.is_reserved()) { 230 // Release the backing store associated with old stack 231 _virtual_space.release(); 232 // Reinitialize virtual space for new stack 233 if (!_virtual_space.initialize(rs, rs.size())) { 234 fatal("Not enough swap for expanded marking stack capacity"); 235 } 236 _base = (oop*)(_virtual_space.low()); 237 _index = 0; 238 _capacity = new_capacity; 239 } else { 240 if (PrintGCDetails && Verbose) { 241 // Failed to double capacity, continue; 242 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 243 SIZE_FORMAT"K to " SIZE_FORMAT"K", 244 _capacity / K, new_capacity / K); 245 } 246 } 247 } 248 249 void CMMarkStack::set_should_expand() { 250 // If we're resetting the marking state because of an 251 // marking stack overflow, record that we should, if 252 // possible, expand the stack. 253 _should_expand = _cm->has_overflown(); 254 } 255 256 CMMarkStack::~CMMarkStack() { 257 if (_base != NULL) { 258 _base = NULL; 259 _virtual_space.release(); 260 } 261 } 262 263 void CMMarkStack::par_push(oop ptr) { 264 while (true) { 265 if (isFull()) { 266 _overflow = true; 267 return; 268 } 269 // Otherwise... 270 jint index = _index; 271 jint next_index = index+1; 272 jint res = Atomic::cmpxchg(next_index, &_index, index); 273 if (res == index) { 274 _base[index] = ptr; 275 // Note that we don't maintain this atomically. We could, but it 276 // doesn't seem necessary. 277 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 278 return; 279 } 280 // Otherwise, we need to try again. 281 } 282 } 283 284 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 285 while (true) { 286 if (isFull()) { 287 _overflow = true; 288 return; 289 } 290 // Otherwise... 291 jint index = _index; 292 jint next_index = index + n; 293 if (next_index > _capacity) { 294 _overflow = true; 295 return; 296 } 297 jint res = Atomic::cmpxchg(next_index, &_index, index); 298 if (res == index) { 299 for (int i = 0; i < n; i++) { 300 int ind = index + i; 301 assert(ind < _capacity, "By overflow test above."); 302 _base[ind] = ptr_arr[i]; 303 } 304 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 305 return; 306 } 307 // Otherwise, we need to try again. 308 } 309 } 310 311 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 312 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 313 jint start = _index; 314 jint next_index = start + n; 315 if (next_index > _capacity) { 316 _overflow = true; 317 return; 318 } 319 // Otherwise. 320 _index = next_index; 321 for (int i = 0; i < n; i++) { 322 int ind = start + i; 323 assert(ind < _capacity, "By overflow test above."); 324 _base[ind] = ptr_arr[i]; 325 } 326 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 327 } 328 329 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 330 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 331 jint index = _index; 332 if (index == 0) { 333 *n = 0; 334 return false; 335 } else { 336 int k = MIN2(max, index); 337 jint new_ind = index - k; 338 for (int j = 0; j < k; j++) { 339 ptr_arr[j] = _base[new_ind + j]; 340 } 341 _index = new_ind; 342 *n = k; 343 return true; 344 } 345 } 346 347 template<class OopClosureClass> 348 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 349 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 350 || SafepointSynchronize::is_at_safepoint(), 351 "Drain recursion must be yield-safe."); 352 bool res = true; 353 debug_only(_drain_in_progress = true); 354 debug_only(_drain_in_progress_yields = yield_after); 355 while (!isEmpty()) { 356 oop newOop = pop(); 357 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 358 assert(newOop->is_oop(), "Expected an oop"); 359 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 360 "only grey objects on this stack"); 361 newOop->oop_iterate(cl); 362 if (yield_after && _cm->do_yield_check()) { 363 res = false; 364 break; 365 } 366 } 367 debug_only(_drain_in_progress = false); 368 return res; 369 } 370 371 void CMMarkStack::note_start_of_gc() { 372 assert(_saved_index == -1, 373 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 374 _saved_index = _index; 375 } 376 377 void CMMarkStack::note_end_of_gc() { 378 // This is intentionally a guarantee, instead of an assert. If we 379 // accidentally add something to the mark stack during GC, it 380 // will be a correctness issue so it's better if we crash. we'll 381 // only check this once per GC anyway, so it won't be a performance 382 // issue in any way. 383 guarantee(_saved_index == _index, 384 err_msg("saved index: %d index: %d", _saved_index, _index)); 385 _saved_index = -1; 386 } 387 388 void CMMarkStack::oops_do(OopClosure* f) { 389 assert(_saved_index == _index, 390 err_msg("saved index: %d index: %d", _saved_index, _index)); 391 for (int i = 0; i < _index; i += 1) { 392 f->do_oop(&_base[i]); 393 } 394 } 395 396 bool ConcurrentMark::not_yet_marked(oop obj) const { 397 return _g1h->is_obj_ill(obj); 398 } 399 400 CMRootRegions::CMRootRegions() : 401 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 402 _should_abort(false), _next_survivor(NULL) { } 403 404 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 405 _young_list = g1h->young_list(); 406 _cm = cm; 407 } 408 409 void CMRootRegions::prepare_for_scan() { 410 assert(!scan_in_progress(), "pre-condition"); 411 412 // Currently, only survivors can be root regions. 413 assert(_next_survivor == NULL, "pre-condition"); 414 _next_survivor = _young_list->first_survivor_region(); 415 _scan_in_progress = (_next_survivor != NULL); 416 _should_abort = false; 417 } 418 419 HeapRegion* CMRootRegions::claim_next() { 420 if (_should_abort) { 421 // If someone has set the should_abort flag, we return NULL to 422 // force the caller to bail out of their loop. 423 return NULL; 424 } 425 426 // Currently, only survivors can be root regions. 427 HeapRegion* res = _next_survivor; 428 if (res != NULL) { 429 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 430 // Read it again in case it changed while we were waiting for the lock. 431 res = _next_survivor; 432 if (res != NULL) { 433 if (res == _young_list->last_survivor_region()) { 434 // We just claimed the last survivor so store NULL to indicate 435 // that we're done. 436 _next_survivor = NULL; 437 } else { 438 _next_survivor = res->get_next_young_region(); 439 } 440 } else { 441 // Someone else claimed the last survivor while we were trying 442 // to take the lock so nothing else to do. 443 } 444 } 445 assert(res == NULL || res->is_survivor(), "post-condition"); 446 447 return res; 448 } 449 450 void CMRootRegions::scan_finished() { 451 assert(scan_in_progress(), "pre-condition"); 452 453 // Currently, only survivors can be root regions. 454 if (!_should_abort) { 455 assert(_next_survivor == NULL, "we should have claimed all survivors"); 456 } 457 _next_survivor = NULL; 458 459 { 460 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 461 _scan_in_progress = false; 462 RootRegionScan_lock->notify_all(); 463 } 464 } 465 466 bool CMRootRegions::wait_until_scan_finished() { 467 if (!scan_in_progress()) return false; 468 469 { 470 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 471 while (scan_in_progress()) { 472 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 473 } 474 } 475 return true; 476 } 477 478 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 479 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 480 #endif // _MSC_VER 481 482 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 483 return MAX2((n_par_threads + 2) / 4, 1U); 484 } 485 486 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) : 487 _g1h(g1h), 488 _markBitMap1(log2_intptr(MinObjAlignment)), 489 _markBitMap2(log2_intptr(MinObjAlignment)), 490 _parallel_marking_threads(0), 491 _max_parallel_marking_threads(0), 492 _sleep_factor(0.0), 493 _marking_task_overhead(1.0), 494 _cleanup_sleep_factor(0.0), 495 _cleanup_task_overhead(1.0), 496 _cleanup_list("Cleanup List"), 497 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 498 _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >> 499 CardTableModRefBS::card_shift, 500 false /* in_resource_area*/), 501 502 _prevMarkBitMap(&_markBitMap1), 503 _nextMarkBitMap(&_markBitMap2), 504 505 _markStack(this), 506 // _finger set in set_non_marking_state 507 508 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)), 509 // _active_tasks set in set_non_marking_state 510 // _tasks set inside the constructor 511 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 512 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 513 514 _has_overflown(false), 515 _concurrent(false), 516 _has_aborted(false), 517 _aborted_gc_id(GCId::undefined()), 518 _restart_for_overflow(false), 519 _concurrent_marking_in_progress(false), 520 521 // _verbose_level set below 522 523 _init_times(), 524 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 525 _cleanup_times(), 526 _total_counting_time(0.0), 527 _total_rs_scrub_time(0.0), 528 529 _parallel_workers(NULL), 530 531 _count_card_bitmaps(NULL), 532 _count_marked_bytes(NULL), 533 _completed_initialization(false) { 534 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 535 if (verbose_level < no_verbose) { 536 verbose_level = no_verbose; 537 } 538 if (verbose_level > high_verbose) { 539 verbose_level = high_verbose; 540 } 541 _verbose_level = verbose_level; 542 543 if (verbose_low()) { 544 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 545 "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end)); 546 } 547 548 if (!_markBitMap1.allocate(heap_rs)) { 549 warning("Failed to allocate first CM bit map"); 550 return; 551 } 552 if (!_markBitMap2.allocate(heap_rs)) { 553 warning("Failed to allocate second CM bit map"); 554 return; 555 } 556 557 // Create & start a ConcurrentMark thread. 558 _cmThread = new ConcurrentMarkThread(this); 559 assert(cmThread() != NULL, "CM Thread should have been created"); 560 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 561 if (_cmThread->osthread() == NULL) { 562 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 563 } 564 565 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 566 assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency"); 567 assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency"); 568 569 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 570 satb_qs.set_buffer_size(G1SATBBufferSize); 571 572 _root_regions.init(_g1h, this); 573 574 if (ConcGCThreads > ParallelGCThreads) { 575 warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") " 576 "than ParallelGCThreads (" UINTX_FORMAT ").", 577 ConcGCThreads, ParallelGCThreads); 578 return; 579 } 580 if (ParallelGCThreads == 0) { 581 // if we are not running with any parallel GC threads we will not 582 // spawn any marking threads either 583 _parallel_marking_threads = 0; 584 _max_parallel_marking_threads = 0; 585 _sleep_factor = 0.0; 586 _marking_task_overhead = 1.0; 587 } else { 588 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 589 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 590 // if both are set 591 _sleep_factor = 0.0; 592 _marking_task_overhead = 1.0; 593 } else if (G1MarkingOverheadPercent > 0) { 594 // We will calculate the number of parallel marking threads based 595 // on a target overhead with respect to the soft real-time goal 596 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 597 double overall_cm_overhead = 598 (double) MaxGCPauseMillis * marking_overhead / 599 (double) GCPauseIntervalMillis; 600 double cpu_ratio = 1.0 / (double) os::processor_count(); 601 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 602 double marking_task_overhead = 603 overall_cm_overhead / marking_thread_num * 604 (double) os::processor_count(); 605 double sleep_factor = 606 (1.0 - marking_task_overhead) / marking_task_overhead; 607 608 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num); 609 _sleep_factor = sleep_factor; 610 _marking_task_overhead = marking_task_overhead; 611 } else { 612 // Calculate the number of parallel marking threads by scaling 613 // the number of parallel GC threads. 614 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads); 615 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num); 616 _sleep_factor = 0.0; 617 _marking_task_overhead = 1.0; 618 } 619 620 assert(ConcGCThreads > 0, "Should have been set"); 621 _parallel_marking_threads = (uint) ConcGCThreads; 622 _max_parallel_marking_threads = _parallel_marking_threads; 623 624 if (parallel_marking_threads() > 1) { 625 _cleanup_task_overhead = 1.0; 626 } else { 627 _cleanup_task_overhead = marking_task_overhead(); 628 } 629 _cleanup_sleep_factor = 630 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 631 632 #if 0 633 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 634 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 635 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 636 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 637 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 638 #endif 639 640 guarantee(parallel_marking_threads() > 0, "peace of mind"); 641 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 642 _max_parallel_marking_threads, false, true); 643 if (_parallel_workers == NULL) { 644 vm_exit_during_initialization("Failed necessary allocation."); 645 } else { 646 _parallel_workers->initialize_workers(); 647 } 648 } 649 650 if (FLAG_IS_DEFAULT(MarkStackSize)) { 651 uintx mark_stack_size = 652 MIN2(MarkStackSizeMax, 653 MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE))); 654 // Verify that the calculated value for MarkStackSize is in range. 655 // It would be nice to use the private utility routine from Arguments. 656 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 657 warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): " 658 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 659 mark_stack_size, (uintx) 1, MarkStackSizeMax); 660 return; 661 } 662 FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size); 663 } else { 664 // Verify MarkStackSize is in range. 665 if (FLAG_IS_CMDLINE(MarkStackSize)) { 666 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 667 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 668 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): " 669 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 670 MarkStackSize, (uintx) 1, MarkStackSizeMax); 671 return; 672 } 673 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 674 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 675 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")" 676 " or for MarkStackSizeMax (" UINTX_FORMAT ")", 677 MarkStackSize, MarkStackSizeMax); 678 return; 679 } 680 } 681 } 682 } 683 684 if (!_markStack.allocate(MarkStackSize)) { 685 warning("Failed to allocate CM marking stack"); 686 return; 687 } 688 689 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 690 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 691 692 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 693 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 694 695 BitMap::idx_t card_bm_size = _card_bm.size(); 696 697 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 698 _active_tasks = _max_worker_id; 699 700 size_t max_regions = (size_t) _g1h->max_regions(); 701 for (uint i = 0; i < _max_worker_id; ++i) { 702 CMTaskQueue* task_queue = new CMTaskQueue(); 703 task_queue->initialize(); 704 _task_queues->register_queue(i, task_queue); 705 706 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 707 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 708 709 _tasks[i] = new CMTask(i, this, 710 _count_marked_bytes[i], 711 &_count_card_bitmaps[i], 712 task_queue, _task_queues); 713 714 _accum_task_vtime[i] = 0.0; 715 } 716 717 // Calculate the card number for the bottom of the heap. Used 718 // in biasing indexes into the accounting card bitmaps. 719 _heap_bottom_card_num = 720 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 721 CardTableModRefBS::card_shift); 722 723 // Clear all the liveness counting data 724 clear_all_count_data(); 725 726 // so that the call below can read a sensible value 727 _heap_start = (HeapWord*) heap_rs.base(); 728 set_non_marking_state(); 729 _completed_initialization = true; 730 } 731 732 void ConcurrentMark::update_g1_committed(bool force) { 733 // If concurrent marking is not in progress, then we do not need to 734 // update _heap_end. 735 if (!concurrent_marking_in_progress() && !force) return; 736 737 MemRegion committed = _g1h->g1_committed(); 738 assert(committed.start() == _heap_start, "start shouldn't change"); 739 HeapWord* new_end = committed.end(); 740 if (new_end > _heap_end) { 741 // The heap has been expanded. 742 743 _heap_end = new_end; 744 } 745 // Notice that the heap can also shrink. However, this only happens 746 // during a Full GC (at least currently) and the entire marking 747 // phase will bail out and the task will not be restarted. So, let's 748 // do nothing. 749 } 750 751 void ConcurrentMark::reset() { 752 // Starting values for these two. This should be called in a STW 753 // phase. CM will be notified of any future g1_committed expansions 754 // will be at the end of evacuation pauses, when tasks are 755 // inactive. 756 MemRegion committed = _g1h->g1_committed(); 757 _heap_start = committed.start(); 758 _heap_end = committed.end(); 759 760 // Separated the asserts so that we know which one fires. 761 assert(_heap_start != NULL, "heap bounds should look ok"); 762 assert(_heap_end != NULL, "heap bounds should look ok"); 763 assert(_heap_start < _heap_end, "heap bounds should look ok"); 764 765 // Reset all the marking data structures and any necessary flags 766 reset_marking_state(); 767 768 if (verbose_low()) { 769 gclog_or_tty->print_cr("[global] resetting"); 770 } 771 772 // We do reset all of them, since different phases will use 773 // different number of active threads. So, it's easiest to have all 774 // of them ready. 775 for (uint i = 0; i < _max_worker_id; ++i) { 776 _tasks[i]->reset(_nextMarkBitMap); 777 } 778 779 // we need this to make sure that the flag is on during the evac 780 // pause with initial mark piggy-backed 781 set_concurrent_marking_in_progress(); 782 } 783 784 785 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 786 _markStack.set_should_expand(); 787 _markStack.setEmpty(); // Also clears the _markStack overflow flag 788 if (clear_overflow) { 789 clear_has_overflown(); 790 } else { 791 assert(has_overflown(), "pre-condition"); 792 } 793 _finger = _heap_start; 794 795 for (uint i = 0; i < _max_worker_id; ++i) { 796 CMTaskQueue* queue = _task_queues->queue(i); 797 queue->set_empty(); 798 } 799 } 800 801 void ConcurrentMark::set_concurrency(uint active_tasks) { 802 assert(active_tasks <= _max_worker_id, "we should not have more"); 803 804 _active_tasks = active_tasks; 805 // Need to update the three data structures below according to the 806 // number of active threads for this phase. 807 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 808 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 809 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 810 } 811 812 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 813 set_concurrency(active_tasks); 814 815 _concurrent = concurrent; 816 // We propagate this to all tasks, not just the active ones. 817 for (uint i = 0; i < _max_worker_id; ++i) 818 _tasks[i]->set_concurrent(concurrent); 819 820 if (concurrent) { 821 set_concurrent_marking_in_progress(); 822 } else { 823 // We currently assume that the concurrent flag has been set to 824 // false before we start remark. At this point we should also be 825 // in a STW phase. 826 assert(!concurrent_marking_in_progress(), "invariant"); 827 assert(out_of_regions(), 828 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT, 829 p2i(_finger), p2i(_heap_end))); 830 update_g1_committed(true); 831 } 832 } 833 834 void ConcurrentMark::set_non_marking_state() { 835 // We set the global marking state to some default values when we're 836 // not doing marking. 837 reset_marking_state(); 838 _active_tasks = 0; 839 clear_concurrent_marking_in_progress(); 840 } 841 842 ConcurrentMark::~ConcurrentMark() { 843 // The ConcurrentMark instance is never freed. 844 ShouldNotReachHere(); 845 } 846 847 void ConcurrentMark::clearNextBitmap() { 848 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 849 G1CollectorPolicy* g1p = g1h->g1_policy(); 850 851 // Make sure that the concurrent mark thread looks to still be in 852 // the current cycle. 853 guarantee(cmThread()->during_cycle(), "invariant"); 854 855 // We are finishing up the current cycle by clearing the next 856 // marking bitmap and getting it ready for the next cycle. During 857 // this time no other cycle can start. So, let's make sure that this 858 // is the case. 859 guarantee(!g1h->mark_in_progress(), "invariant"); 860 861 // clear the mark bitmap (no grey objects to start with). 862 // We need to do this in chunks and offer to yield in between 863 // each chunk. 864 HeapWord* start = _nextMarkBitMap->startWord(); 865 HeapWord* end = _nextMarkBitMap->endWord(); 866 HeapWord* cur = start; 867 size_t chunkSize = M; 868 while (cur < end) { 869 HeapWord* next = cur + chunkSize; 870 if (next > end) { 871 next = end; 872 } 873 MemRegion mr(cur,next); 874 _nextMarkBitMap->clearRange(mr); 875 cur = next; 876 do_yield_check(); 877 878 // Repeat the asserts from above. We'll do them as asserts here to 879 // minimize their overhead on the product. However, we'll have 880 // them as guarantees at the beginning / end of the bitmap 881 // clearing to get some checking in the product. 882 assert(cmThread()->during_cycle(), "invariant"); 883 assert(!g1h->mark_in_progress(), "invariant"); 884 } 885 886 // Clear the liveness counting data 887 clear_all_count_data(); 888 889 // Repeat the asserts from above. 890 guarantee(cmThread()->during_cycle(), "invariant"); 891 guarantee(!g1h->mark_in_progress(), "invariant"); 892 } 893 894 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 895 public: 896 bool doHeapRegion(HeapRegion* r) { 897 if (!r->continuesHumongous()) { 898 r->note_start_of_marking(); 899 } 900 return false; 901 } 902 }; 903 904 void ConcurrentMark::checkpointRootsInitialPre() { 905 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 906 G1CollectorPolicy* g1p = g1h->g1_policy(); 907 908 _has_aborted = false; 909 910 #ifndef PRODUCT 911 if (G1PrintReachableAtInitialMark) { 912 print_reachable("at-cycle-start", 913 VerifyOption_G1UsePrevMarking, true /* all */); 914 } 915 #endif 916 917 // Initialize marking structures. This has to be done in a STW phase. 918 reset(); 919 920 // For each region note start of marking. 921 NoteStartOfMarkHRClosure startcl; 922 g1h->heap_region_iterate(&startcl); 923 } 924 925 926 void ConcurrentMark::checkpointRootsInitialPost() { 927 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 928 929 // If we force an overflow during remark, the remark operation will 930 // actually abort and we'll restart concurrent marking. If we always 931 // force an overflow during remark we'll never actually complete the 932 // marking phase. So, we initialize this here, at the start of the 933 // cycle, so that at the remaining overflow number will decrease at 934 // every remark and we'll eventually not need to cause one. 935 force_overflow_stw()->init(); 936 937 // Start Concurrent Marking weak-reference discovery. 938 ReferenceProcessor* rp = g1h->ref_processor_cm(); 939 // enable ("weak") refs discovery 940 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 941 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 942 943 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 944 // This is the start of the marking cycle, we're expected all 945 // threads to have SATB queues with active set to false. 946 satb_mq_set.set_active_all_threads(true, /* new active value */ 947 false /* expected_active */); 948 949 _root_regions.prepare_for_scan(); 950 951 // update_g1_committed() will be called at the end of an evac pause 952 // when marking is on. So, it's also called at the end of the 953 // initial-mark pause to update the heap end, if the heap expands 954 // during it. No need to call it here. 955 } 956 957 /* 958 * Notice that in the next two methods, we actually leave the STS 959 * during the barrier sync and join it immediately afterwards. If we 960 * do not do this, the following deadlock can occur: one thread could 961 * be in the barrier sync code, waiting for the other thread to also 962 * sync up, whereas another one could be trying to yield, while also 963 * waiting for the other threads to sync up too. 964 * 965 * Note, however, that this code is also used during remark and in 966 * this case we should not attempt to leave / enter the STS, otherwise 967 * we'll either hit an assert (debug / fastdebug) or deadlock 968 * (product). So we should only leave / enter the STS if we are 969 * operating concurrently. 970 * 971 * Because the thread that does the sync barrier has left the STS, it 972 * is possible to be suspended for a Full GC or an evacuation pause 973 * could occur. This is actually safe, since the entering the sync 974 * barrier is one of the last things do_marking_step() does, and it 975 * doesn't manipulate any data structures afterwards. 976 */ 977 978 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 979 if (verbose_low()) { 980 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id); 981 } 982 983 if (concurrent()) { 984 SuspendibleThreadSet::leave(); 985 } 986 987 bool barrier_aborted = !_first_overflow_barrier_sync.enter(); 988 989 if (concurrent()) { 990 SuspendibleThreadSet::join(); 991 } 992 // at this point everyone should have synced up and not be doing any 993 // more work 994 995 if (verbose_low()) { 996 if (barrier_aborted) { 997 gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id); 998 } else { 999 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); 1000 } 1001 } 1002 1003 if (barrier_aborted) { 1004 // If the barrier aborted we ignore the overflow condition and 1005 // just abort the whole marking phase as quickly as possible. 1006 return; 1007 } 1008 1009 // If we're executing the concurrent phase of marking, reset the marking 1010 // state; otherwise the marking state is reset after reference processing, 1011 // during the remark pause. 1012 // If we reset here as a result of an overflow during the remark we will 1013 // see assertion failures from any subsequent set_concurrency_and_phase() 1014 // calls. 1015 if (concurrent()) { 1016 // let the task associated with with worker 0 do this 1017 if (worker_id == 0) { 1018 // task 0 is responsible for clearing the global data structures 1019 // We should be here because of an overflow. During STW we should 1020 // not clear the overflow flag since we rely on it being true when 1021 // we exit this method to abort the pause and restart concurrent 1022 // marking. 1023 reset_marking_state(true /* clear_overflow */); 1024 force_overflow()->update(); 1025 1026 if (G1Log::fine()) { 1027 gclog_or_tty->gclog_stamp(concurrent_gc_id()); 1028 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 1029 } 1030 } 1031 } 1032 1033 // after this, each task should reset its own data structures then 1034 // then go into the second barrier 1035 } 1036 1037 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 1038 if (verbose_low()) { 1039 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id); 1040 } 1041 1042 if (concurrent()) { 1043 SuspendibleThreadSet::leave(); 1044 } 1045 1046 bool barrier_aborted = !_second_overflow_barrier_sync.enter(); 1047 1048 if (concurrent()) { 1049 SuspendibleThreadSet::join(); 1050 } 1051 // at this point everything should be re-initialized and ready to go 1052 1053 if (verbose_low()) { 1054 if (barrier_aborted) { 1055 gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id); 1056 } else { 1057 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); 1058 } 1059 } 1060 } 1061 1062 #ifndef PRODUCT 1063 void ForceOverflowSettings::init() { 1064 _num_remaining = G1ConcMarkForceOverflow; 1065 _force = false; 1066 update(); 1067 } 1068 1069 void ForceOverflowSettings::update() { 1070 if (_num_remaining > 0) { 1071 _num_remaining -= 1; 1072 _force = true; 1073 } else { 1074 _force = false; 1075 } 1076 } 1077 1078 bool ForceOverflowSettings::should_force() { 1079 if (_force) { 1080 _force = false; 1081 return true; 1082 } else { 1083 return false; 1084 } 1085 } 1086 #endif // !PRODUCT 1087 1088 class CMConcurrentMarkingTask: public AbstractGangTask { 1089 private: 1090 ConcurrentMark* _cm; 1091 ConcurrentMarkThread* _cmt; 1092 1093 public: 1094 void work(uint worker_id) { 1095 assert(Thread::current()->is_ConcurrentGC_thread(), 1096 "this should only be done by a conc GC thread"); 1097 ResourceMark rm; 1098 1099 double start_vtime = os::elapsedVTime(); 1100 1101 SuspendibleThreadSet::join(); 1102 1103 assert(worker_id < _cm->active_tasks(), "invariant"); 1104 CMTask* the_task = _cm->task(worker_id); 1105 the_task->record_start_time(); 1106 if (!_cm->has_aborted()) { 1107 do { 1108 double start_vtime_sec = os::elapsedVTime(); 1109 double start_time_sec = os::elapsedTime(); 1110 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1111 1112 the_task->do_marking_step(mark_step_duration_ms, 1113 true /* do_termination */, 1114 false /* is_serial*/); 1115 1116 double end_time_sec = os::elapsedTime(); 1117 double end_vtime_sec = os::elapsedVTime(); 1118 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1119 double elapsed_time_sec = end_time_sec - start_time_sec; 1120 _cm->clear_has_overflown(); 1121 1122 bool ret = _cm->do_yield_check(worker_id); 1123 1124 jlong sleep_time_ms; 1125 if (!_cm->has_aborted() && the_task->has_aborted()) { 1126 sleep_time_ms = 1127 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1128 SuspendibleThreadSet::leave(); 1129 os::sleep(Thread::current(), sleep_time_ms, false); 1130 SuspendibleThreadSet::join(); 1131 } 1132 double end_time2_sec = os::elapsedTime(); 1133 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1134 1135 #if 0 1136 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1137 "overhead %1.4lf", 1138 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1139 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1140 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1141 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1142 #endif 1143 } while (!_cm->has_aborted() && the_task->has_aborted()); 1144 } 1145 the_task->record_end_time(); 1146 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1147 1148 SuspendibleThreadSet::leave(); 1149 1150 double end_vtime = os::elapsedVTime(); 1151 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1152 } 1153 1154 CMConcurrentMarkingTask(ConcurrentMark* cm, 1155 ConcurrentMarkThread* cmt) : 1156 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1157 1158 ~CMConcurrentMarkingTask() { } 1159 }; 1160 1161 // Calculates the number of active workers for a concurrent 1162 // phase. 1163 uint ConcurrentMark::calc_parallel_marking_threads() { 1164 if (G1CollectedHeap::use_parallel_gc_threads()) { 1165 uint n_conc_workers = 0; 1166 if (!UseDynamicNumberOfGCThreads || 1167 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1168 !ForceDynamicNumberOfGCThreads)) { 1169 n_conc_workers = max_parallel_marking_threads(); 1170 } else { 1171 n_conc_workers = 1172 AdaptiveSizePolicy::calc_default_active_workers( 1173 max_parallel_marking_threads(), 1174 1, /* Minimum workers */ 1175 parallel_marking_threads(), 1176 Threads::number_of_non_daemon_threads()); 1177 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1178 // that scaling has already gone into "_max_parallel_marking_threads". 1179 } 1180 assert(n_conc_workers > 0, "Always need at least 1"); 1181 return n_conc_workers; 1182 } 1183 // If we are not running with any parallel GC threads we will not 1184 // have spawned any marking threads either. Hence the number of 1185 // concurrent workers should be 0. 1186 return 0; 1187 } 1188 1189 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1190 // Currently, only survivors can be root regions. 1191 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1192 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1193 1194 const uintx interval = PrefetchScanIntervalInBytes; 1195 HeapWord* curr = hr->bottom(); 1196 const HeapWord* end = hr->top(); 1197 while (curr < end) { 1198 Prefetch::read(curr, interval); 1199 oop obj = oop(curr); 1200 int size = obj->oop_iterate(&cl); 1201 assert(size == obj->size(), "sanity"); 1202 curr += size; 1203 } 1204 } 1205 1206 class CMRootRegionScanTask : public AbstractGangTask { 1207 private: 1208 ConcurrentMark* _cm; 1209 1210 public: 1211 CMRootRegionScanTask(ConcurrentMark* cm) : 1212 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1213 1214 void work(uint worker_id) { 1215 assert(Thread::current()->is_ConcurrentGC_thread(), 1216 "this should only be done by a conc GC thread"); 1217 1218 CMRootRegions* root_regions = _cm->root_regions(); 1219 HeapRegion* hr = root_regions->claim_next(); 1220 while (hr != NULL) { 1221 _cm->scanRootRegion(hr, worker_id); 1222 hr = root_regions->claim_next(); 1223 } 1224 } 1225 }; 1226 1227 void ConcurrentMark::scanRootRegions() { 1228 // Start of concurrent marking. 1229 ClassLoaderDataGraph::clear_claimed_marks(); 1230 1231 // scan_in_progress() will have been set to true only if there was 1232 // at least one root region to scan. So, if it's false, we 1233 // should not attempt to do any further work. 1234 if (root_regions()->scan_in_progress()) { 1235 _parallel_marking_threads = calc_parallel_marking_threads(); 1236 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1237 "Maximum number of marking threads exceeded"); 1238 uint active_workers = MAX2(1U, parallel_marking_threads()); 1239 1240 CMRootRegionScanTask task(this); 1241 if (use_parallel_marking_threads()) { 1242 _parallel_workers->set_active_workers((int) active_workers); 1243 _parallel_workers->run_task(&task); 1244 } else { 1245 task.work(0); 1246 } 1247 1248 // It's possible that has_aborted() is true here without actually 1249 // aborting the survivor scan earlier. This is OK as it's 1250 // mainly used for sanity checking. 1251 root_regions()->scan_finished(); 1252 } 1253 } 1254 1255 void ConcurrentMark::markFromRoots() { 1256 // we might be tempted to assert that: 1257 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1258 // "inconsistent argument?"); 1259 // However that wouldn't be right, because it's possible that 1260 // a safepoint is indeed in progress as a younger generation 1261 // stop-the-world GC happens even as we mark in this generation. 1262 1263 _restart_for_overflow = false; 1264 force_overflow_conc()->init(); 1265 1266 // _g1h has _n_par_threads 1267 _parallel_marking_threads = calc_parallel_marking_threads(); 1268 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1269 "Maximum number of marking threads exceeded"); 1270 1271 uint active_workers = MAX2(1U, parallel_marking_threads()); 1272 1273 // Parallel task terminator is set in "set_concurrency_and_phase()" 1274 set_concurrency_and_phase(active_workers, true /* concurrent */); 1275 1276 CMConcurrentMarkingTask markingTask(this, cmThread()); 1277 if (use_parallel_marking_threads()) { 1278 _parallel_workers->set_active_workers((int)active_workers); 1279 // Don't set _n_par_threads because it affects MT in process_roots() 1280 // and the decisions on that MT processing is made elsewhere. 1281 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1282 _parallel_workers->run_task(&markingTask); 1283 } else { 1284 markingTask.work(0); 1285 } 1286 print_stats(); 1287 } 1288 1289 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1290 // world is stopped at this checkpoint 1291 assert(SafepointSynchronize::is_at_safepoint(), 1292 "world should be stopped"); 1293 1294 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1295 1296 // If a full collection has happened, we shouldn't do this. 1297 if (has_aborted()) { 1298 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1299 return; 1300 } 1301 1302 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1303 1304 if (VerifyDuringGC) { 1305 HandleMark hm; // handle scope 1306 Universe::heap()->prepare_for_verify(); 1307 Universe::verify(VerifyOption_G1UsePrevMarking, 1308 " VerifyDuringGC:(before)"); 1309 } 1310 g1h->check_bitmaps("Remark Start"); 1311 1312 G1CollectorPolicy* g1p = g1h->g1_policy(); 1313 g1p->record_concurrent_mark_remark_start(); 1314 1315 double start = os::elapsedTime(); 1316 1317 checkpointRootsFinalWork(); 1318 1319 double mark_work_end = os::elapsedTime(); 1320 1321 weakRefsWork(clear_all_soft_refs); 1322 1323 if (has_overflown()) { 1324 // Oops. We overflowed. Restart concurrent marking. 1325 _restart_for_overflow = true; 1326 if (G1TraceMarkStackOverflow) { 1327 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1328 } 1329 1330 // Verify the heap w.r.t. the previous marking bitmap. 1331 if (VerifyDuringGC) { 1332 HandleMark hm; // handle scope 1333 Universe::heap()->prepare_for_verify(); 1334 Universe::verify(VerifyOption_G1UsePrevMarking, 1335 " VerifyDuringGC:(overflow)"); 1336 } 1337 1338 // Clear the marking state because we will be restarting 1339 // marking due to overflowing the global mark stack. 1340 reset_marking_state(); 1341 } else { 1342 // Aggregate the per-task counting data that we have accumulated 1343 // while marking. 1344 aggregate_count_data(); 1345 1346 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1347 // We're done with marking. 1348 // This is the end of the marking cycle, we're expected all 1349 // threads to have SATB queues with active set to true. 1350 satb_mq_set.set_active_all_threads(false, /* new active value */ 1351 true /* expected_active */); 1352 1353 if (VerifyDuringGC) { 1354 HandleMark hm; // handle scope 1355 Universe::heap()->prepare_for_verify(); 1356 Universe::verify(VerifyOption_G1UseNextMarking, 1357 " VerifyDuringGC:(after)"); 1358 } 1359 g1h->check_bitmaps("Remark End"); 1360 assert(!restart_for_overflow(), "sanity"); 1361 // Completely reset the marking state since marking completed 1362 set_non_marking_state(); 1363 } 1364 1365 // Expand the marking stack, if we have to and if we can. 1366 if (_markStack.should_expand()) { 1367 _markStack.expand(); 1368 } 1369 1370 // Statistics 1371 double now = os::elapsedTime(); 1372 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1373 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1374 _remark_times.add((now - start) * 1000.0); 1375 1376 g1p->record_concurrent_mark_remark_end(); 1377 1378 G1CMIsAliveClosure is_alive(g1h); 1379 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1380 } 1381 1382 // Base class of the closures that finalize and verify the 1383 // liveness counting data. 1384 class CMCountDataClosureBase: public HeapRegionClosure { 1385 protected: 1386 G1CollectedHeap* _g1h; 1387 ConcurrentMark* _cm; 1388 CardTableModRefBS* _ct_bs; 1389 1390 BitMap* _region_bm; 1391 BitMap* _card_bm; 1392 1393 // Takes a region that's not empty (i.e., it has at least one 1394 // live object in it and sets its corresponding bit on the region 1395 // bitmap to 1. If the region is "starts humongous" it will also set 1396 // to 1 the bits on the region bitmap that correspond to its 1397 // associated "continues humongous" regions. 1398 void set_bit_for_region(HeapRegion* hr) { 1399 assert(!hr->continuesHumongous(), "should have filtered those out"); 1400 1401 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1402 if (!hr->startsHumongous()) { 1403 // Normal (non-humongous) case: just set the bit. 1404 _region_bm->par_at_put(index, true); 1405 } else { 1406 // Starts humongous case: calculate how many regions are part of 1407 // this humongous region and then set the bit range. 1408 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1409 _region_bm->par_at_put_range(index, end_index, true); 1410 } 1411 } 1412 1413 public: 1414 CMCountDataClosureBase(G1CollectedHeap* g1h, 1415 BitMap* region_bm, BitMap* card_bm): 1416 _g1h(g1h), _cm(g1h->concurrent_mark()), 1417 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 1418 _region_bm(region_bm), _card_bm(card_bm) { } 1419 }; 1420 1421 // Closure that calculates the # live objects per region. Used 1422 // for verification purposes during the cleanup pause. 1423 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1424 CMBitMapRO* _bm; 1425 size_t _region_marked_bytes; 1426 1427 public: 1428 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1429 BitMap* region_bm, BitMap* card_bm) : 1430 CMCountDataClosureBase(g1h, region_bm, card_bm), 1431 _bm(bm), _region_marked_bytes(0) { } 1432 1433 bool doHeapRegion(HeapRegion* hr) { 1434 1435 if (hr->continuesHumongous()) { 1436 // We will ignore these here and process them when their 1437 // associated "starts humongous" region is processed (see 1438 // set_bit_for_heap_region()). Note that we cannot rely on their 1439 // associated "starts humongous" region to have their bit set to 1440 // 1 since, due to the region chunking in the parallel region 1441 // iteration, a "continues humongous" region might be visited 1442 // before its associated "starts humongous". 1443 return false; 1444 } 1445 1446 HeapWord* ntams = hr->next_top_at_mark_start(); 1447 HeapWord* start = hr->bottom(); 1448 1449 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1450 err_msg("Preconditions not met - " 1451 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT, 1452 p2i(start), p2i(ntams), p2i(hr->end()))); 1453 1454 // Find the first marked object at or after "start". 1455 start = _bm->getNextMarkedWordAddress(start, ntams); 1456 1457 size_t marked_bytes = 0; 1458 1459 while (start < ntams) { 1460 oop obj = oop(start); 1461 int obj_sz = obj->size(); 1462 HeapWord* obj_end = start + obj_sz; 1463 1464 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1465 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1466 1467 // Note: if we're looking at the last region in heap - obj_end 1468 // could be actually just beyond the end of the heap; end_idx 1469 // will then correspond to a (non-existent) card that is also 1470 // just beyond the heap. 1471 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1472 // end of object is not card aligned - increment to cover 1473 // all the cards spanned by the object 1474 end_idx += 1; 1475 } 1476 1477 // Set the bits in the card BM for the cards spanned by this object. 1478 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1479 1480 // Add the size of this object to the number of marked bytes. 1481 marked_bytes += (size_t)obj_sz * HeapWordSize; 1482 1483 // Find the next marked object after this one. 1484 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1485 } 1486 1487 // Mark the allocated-since-marking portion... 1488 HeapWord* top = hr->top(); 1489 if (ntams < top) { 1490 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1491 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1492 1493 // Note: if we're looking at the last region in heap - top 1494 // could be actually just beyond the end of the heap; end_idx 1495 // will then correspond to a (non-existent) card that is also 1496 // just beyond the heap. 1497 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1498 // end of object is not card aligned - increment to cover 1499 // all the cards spanned by the object 1500 end_idx += 1; 1501 } 1502 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1503 1504 // This definitely means the region has live objects. 1505 set_bit_for_region(hr); 1506 } 1507 1508 // Update the live region bitmap. 1509 if (marked_bytes > 0) { 1510 set_bit_for_region(hr); 1511 } 1512 1513 // Set the marked bytes for the current region so that 1514 // it can be queried by a calling verification routine 1515 _region_marked_bytes = marked_bytes; 1516 1517 return false; 1518 } 1519 1520 size_t region_marked_bytes() const { return _region_marked_bytes; } 1521 }; 1522 1523 // Heap region closure used for verifying the counting data 1524 // that was accumulated concurrently and aggregated during 1525 // the remark pause. This closure is applied to the heap 1526 // regions during the STW cleanup pause. 1527 1528 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1529 G1CollectedHeap* _g1h; 1530 ConcurrentMark* _cm; 1531 CalcLiveObjectsClosure _calc_cl; 1532 BitMap* _region_bm; // Region BM to be verified 1533 BitMap* _card_bm; // Card BM to be verified 1534 bool _verbose; // verbose output? 1535 1536 BitMap* _exp_region_bm; // Expected Region BM values 1537 BitMap* _exp_card_bm; // Expected card BM values 1538 1539 int _failures; 1540 1541 public: 1542 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1543 BitMap* region_bm, 1544 BitMap* card_bm, 1545 BitMap* exp_region_bm, 1546 BitMap* exp_card_bm, 1547 bool verbose) : 1548 _g1h(g1h), _cm(g1h->concurrent_mark()), 1549 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1550 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1551 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1552 _failures(0) { } 1553 1554 int failures() const { return _failures; } 1555 1556 bool doHeapRegion(HeapRegion* hr) { 1557 if (hr->continuesHumongous()) { 1558 // We will ignore these here and process them when their 1559 // associated "starts humongous" region is processed (see 1560 // set_bit_for_heap_region()). Note that we cannot rely on their 1561 // associated "starts humongous" region to have their bit set to 1562 // 1 since, due to the region chunking in the parallel region 1563 // iteration, a "continues humongous" region might be visited 1564 // before its associated "starts humongous". 1565 return false; 1566 } 1567 1568 int failures = 0; 1569 1570 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1571 // this region and set the corresponding bits in the expected region 1572 // and card bitmaps. 1573 bool res = _calc_cl.doHeapRegion(hr); 1574 assert(res == false, "should be continuing"); 1575 1576 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1577 Mutex::_no_safepoint_check_flag); 1578 1579 // Verify the marked bytes for this region. 1580 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1581 size_t act_marked_bytes = hr->next_marked_bytes(); 1582 1583 // We're not OK if expected marked bytes > actual marked bytes. It means 1584 // we have missed accounting some objects during the actual marking. 1585 if (exp_marked_bytes > act_marked_bytes) { 1586 if (_verbose) { 1587 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1588 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1589 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1590 } 1591 failures += 1; 1592 } 1593 1594 // Verify the bit, for this region, in the actual and expected 1595 // (which was just calculated) region bit maps. 1596 // We're not OK if the bit in the calculated expected region 1597 // bitmap is set and the bit in the actual region bitmap is not. 1598 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1599 1600 bool expected = _exp_region_bm->at(index); 1601 bool actual = _region_bm->at(index); 1602 if (expected && !actual) { 1603 if (_verbose) { 1604 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1605 "expected: %s, actual: %s", 1606 hr->hrs_index(), 1607 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1608 } 1609 failures += 1; 1610 } 1611 1612 // Verify that the card bit maps for the cards spanned by the current 1613 // region match. We have an error if we have a set bit in the expected 1614 // bit map and the corresponding bit in the actual bitmap is not set. 1615 1616 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1617 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1618 1619 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1620 expected = _exp_card_bm->at(i); 1621 actual = _card_bm->at(i); 1622 1623 if (expected && !actual) { 1624 if (_verbose) { 1625 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1626 "expected: %s, actual: %s", 1627 hr->hrs_index(), i, 1628 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1629 } 1630 failures += 1; 1631 } 1632 } 1633 1634 if (failures > 0 && _verbose) { 1635 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1636 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1637 HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()), 1638 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1639 } 1640 1641 _failures += failures; 1642 1643 // We could stop iteration over the heap when we 1644 // find the first violating region by returning true. 1645 return false; 1646 } 1647 }; 1648 1649 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1650 protected: 1651 G1CollectedHeap* _g1h; 1652 ConcurrentMark* _cm; 1653 BitMap* _actual_region_bm; 1654 BitMap* _actual_card_bm; 1655 1656 uint _n_workers; 1657 1658 BitMap* _expected_region_bm; 1659 BitMap* _expected_card_bm; 1660 1661 int _failures; 1662 bool _verbose; 1663 1664 public: 1665 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1666 BitMap* region_bm, BitMap* card_bm, 1667 BitMap* expected_region_bm, BitMap* expected_card_bm) 1668 : AbstractGangTask("G1 verify final counting"), 1669 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1670 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1671 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1672 _failures(0), _verbose(false), 1673 _n_workers(0) { 1674 assert(VerifyDuringGC, "don't call this otherwise"); 1675 1676 // Use the value already set as the number of active threads 1677 // in the call to run_task(). 1678 if (G1CollectedHeap::use_parallel_gc_threads()) { 1679 assert( _g1h->workers()->active_workers() > 0, 1680 "Should have been previously set"); 1681 _n_workers = _g1h->workers()->active_workers(); 1682 } else { 1683 _n_workers = 1; 1684 } 1685 1686 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1687 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1688 1689 _verbose = _cm->verbose_medium(); 1690 } 1691 1692 void work(uint worker_id) { 1693 assert(worker_id < _n_workers, "invariant"); 1694 1695 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1696 _actual_region_bm, _actual_card_bm, 1697 _expected_region_bm, 1698 _expected_card_bm, 1699 _verbose); 1700 1701 if (G1CollectedHeap::use_parallel_gc_threads()) { 1702 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1703 worker_id, 1704 _n_workers, 1705 HeapRegion::VerifyCountClaimValue); 1706 } else { 1707 _g1h->heap_region_iterate(&verify_cl); 1708 } 1709 1710 Atomic::add(verify_cl.failures(), &_failures); 1711 } 1712 1713 int failures() const { return _failures; } 1714 }; 1715 1716 // Closure that finalizes the liveness counting data. 1717 // Used during the cleanup pause. 1718 // Sets the bits corresponding to the interval [NTAMS, top] 1719 // (which contains the implicitly live objects) in the 1720 // card liveness bitmap. Also sets the bit for each region, 1721 // containing live data, in the region liveness bitmap. 1722 1723 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1724 public: 1725 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1726 BitMap* region_bm, 1727 BitMap* card_bm) : 1728 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1729 1730 bool doHeapRegion(HeapRegion* hr) { 1731 1732 if (hr->continuesHumongous()) { 1733 // We will ignore these here and process them when their 1734 // associated "starts humongous" region is processed (see 1735 // set_bit_for_heap_region()). Note that we cannot rely on their 1736 // associated "starts humongous" region to have their bit set to 1737 // 1 since, due to the region chunking in the parallel region 1738 // iteration, a "continues humongous" region might be visited 1739 // before its associated "starts humongous". 1740 return false; 1741 } 1742 1743 HeapWord* ntams = hr->next_top_at_mark_start(); 1744 HeapWord* top = hr->top(); 1745 1746 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1747 1748 // Mark the allocated-since-marking portion... 1749 if (ntams < top) { 1750 // This definitely means the region has live objects. 1751 set_bit_for_region(hr); 1752 1753 // Now set the bits in the card bitmap for [ntams, top) 1754 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1755 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1756 1757 // Note: if we're looking at the last region in heap - top 1758 // could be actually just beyond the end of the heap; end_idx 1759 // will then correspond to a (non-existent) card that is also 1760 // just beyond the heap. 1761 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1762 // end of object is not card aligned - increment to cover 1763 // all the cards spanned by the object 1764 end_idx += 1; 1765 } 1766 1767 assert(end_idx <= _card_bm->size(), 1768 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1769 end_idx, _card_bm->size())); 1770 assert(start_idx < _card_bm->size(), 1771 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1772 start_idx, _card_bm->size())); 1773 1774 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1775 } 1776 1777 // Set the bit for the region if it contains live data 1778 if (hr->next_marked_bytes() > 0) { 1779 set_bit_for_region(hr); 1780 } 1781 1782 return false; 1783 } 1784 }; 1785 1786 class G1ParFinalCountTask: public AbstractGangTask { 1787 protected: 1788 G1CollectedHeap* _g1h; 1789 ConcurrentMark* _cm; 1790 BitMap* _actual_region_bm; 1791 BitMap* _actual_card_bm; 1792 1793 uint _n_workers; 1794 1795 public: 1796 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1797 : AbstractGangTask("G1 final counting"), 1798 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1799 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1800 _n_workers(0) { 1801 // Use the value already set as the number of active threads 1802 // in the call to run_task(). 1803 if (G1CollectedHeap::use_parallel_gc_threads()) { 1804 assert( _g1h->workers()->active_workers() > 0, 1805 "Should have been previously set"); 1806 _n_workers = _g1h->workers()->active_workers(); 1807 } else { 1808 _n_workers = 1; 1809 } 1810 } 1811 1812 void work(uint worker_id) { 1813 assert(worker_id < _n_workers, "invariant"); 1814 1815 FinalCountDataUpdateClosure final_update_cl(_g1h, 1816 _actual_region_bm, 1817 _actual_card_bm); 1818 1819 if (G1CollectedHeap::use_parallel_gc_threads()) { 1820 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1821 worker_id, 1822 _n_workers, 1823 HeapRegion::FinalCountClaimValue); 1824 } else { 1825 _g1h->heap_region_iterate(&final_update_cl); 1826 } 1827 } 1828 }; 1829 1830 class G1ParNoteEndTask; 1831 1832 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1833 G1CollectedHeap* _g1; 1834 size_t _max_live_bytes; 1835 uint _regions_claimed; 1836 size_t _freed_bytes; 1837 FreeRegionList* _local_cleanup_list; 1838 HeapRegionSetCount _old_regions_removed; 1839 HeapRegionSetCount _humongous_regions_removed; 1840 HRRSCleanupTask* _hrrs_cleanup_task; 1841 double _claimed_region_time; 1842 double _max_region_time; 1843 1844 public: 1845 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1846 FreeRegionList* local_cleanup_list, 1847 HRRSCleanupTask* hrrs_cleanup_task) : 1848 _g1(g1), 1849 _max_live_bytes(0), _regions_claimed(0), 1850 _freed_bytes(0), 1851 _claimed_region_time(0.0), _max_region_time(0.0), 1852 _local_cleanup_list(local_cleanup_list), 1853 _old_regions_removed(), 1854 _humongous_regions_removed(), 1855 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1856 1857 size_t freed_bytes() { return _freed_bytes; } 1858 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; } 1859 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } 1860 1861 bool doHeapRegion(HeapRegion *hr) { 1862 if (hr->continuesHumongous()) { 1863 return false; 1864 } 1865 // We use a claim value of zero here because all regions 1866 // were claimed with value 1 in the FinalCount task. 1867 _g1->reset_gc_time_stamps(hr); 1868 double start = os::elapsedTime(); 1869 _regions_claimed++; 1870 hr->note_end_of_marking(); 1871 _max_live_bytes += hr->max_live_bytes(); 1872 1873 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1874 _freed_bytes += hr->used(); 1875 hr->set_containing_set(NULL); 1876 if (hr->isHumongous()) { 1877 assert(hr->startsHumongous(), "we should only see starts humongous"); 1878 _humongous_regions_removed.increment(1u, hr->capacity()); 1879 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1880 } else { 1881 _old_regions_removed.increment(1u, hr->capacity()); 1882 _g1->free_region(hr, _local_cleanup_list, true); 1883 } 1884 } else { 1885 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1886 } 1887 1888 double region_time = (os::elapsedTime() - start); 1889 _claimed_region_time += region_time; 1890 if (region_time > _max_region_time) { 1891 _max_region_time = region_time; 1892 } 1893 return false; 1894 } 1895 1896 size_t max_live_bytes() { return _max_live_bytes; } 1897 uint regions_claimed() { return _regions_claimed; } 1898 double claimed_region_time_sec() { return _claimed_region_time; } 1899 double max_region_time_sec() { return _max_region_time; } 1900 }; 1901 1902 class G1ParNoteEndTask: public AbstractGangTask { 1903 friend class G1NoteEndOfConcMarkClosure; 1904 1905 protected: 1906 G1CollectedHeap* _g1h; 1907 size_t _max_live_bytes; 1908 size_t _freed_bytes; 1909 FreeRegionList* _cleanup_list; 1910 1911 public: 1912 G1ParNoteEndTask(G1CollectedHeap* g1h, 1913 FreeRegionList* cleanup_list) : 1914 AbstractGangTask("G1 note end"), _g1h(g1h), 1915 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1916 1917 void work(uint worker_id) { 1918 double start = os::elapsedTime(); 1919 FreeRegionList local_cleanup_list("Local Cleanup List"); 1920 HRRSCleanupTask hrrs_cleanup_task; 1921 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1922 &hrrs_cleanup_task); 1923 if (G1CollectedHeap::use_parallel_gc_threads()) { 1924 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1925 _g1h->workers()->active_workers(), 1926 HeapRegion::NoteEndClaimValue); 1927 } else { 1928 _g1h->heap_region_iterate(&g1_note_end); 1929 } 1930 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1931 1932 // Now update the lists 1933 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1934 { 1935 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1936 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1937 _max_live_bytes += g1_note_end.max_live_bytes(); 1938 _freed_bytes += g1_note_end.freed_bytes(); 1939 1940 // If we iterate over the global cleanup list at the end of 1941 // cleanup to do this printing we will not guarantee to only 1942 // generate output for the newly-reclaimed regions (the list 1943 // might not be empty at the beginning of cleanup; we might 1944 // still be working on its previous contents). So we do the 1945 // printing here, before we append the new regions to the global 1946 // cleanup list. 1947 1948 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1949 if (hr_printer->is_active()) { 1950 FreeRegionListIterator iter(&local_cleanup_list); 1951 while (iter.more_available()) { 1952 HeapRegion* hr = iter.get_next(); 1953 hr_printer->cleanup(hr); 1954 } 1955 } 1956 1957 _cleanup_list->add_ordered(&local_cleanup_list); 1958 assert(local_cleanup_list.is_empty(), "post-condition"); 1959 1960 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1961 } 1962 } 1963 size_t max_live_bytes() { return _max_live_bytes; } 1964 size_t freed_bytes() { return _freed_bytes; } 1965 }; 1966 1967 class G1ParScrubRemSetTask: public AbstractGangTask { 1968 protected: 1969 G1RemSet* _g1rs; 1970 BitMap* _region_bm; 1971 BitMap* _card_bm; 1972 public: 1973 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1974 BitMap* region_bm, BitMap* card_bm) : 1975 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1976 _region_bm(region_bm), _card_bm(card_bm) { } 1977 1978 void work(uint worker_id) { 1979 if (G1CollectedHeap::use_parallel_gc_threads()) { 1980 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1981 HeapRegion::ScrubRemSetClaimValue); 1982 } else { 1983 _g1rs->scrub(_region_bm, _card_bm); 1984 } 1985 } 1986 1987 }; 1988 1989 void ConcurrentMark::cleanup() { 1990 // world is stopped at this checkpoint 1991 assert(SafepointSynchronize::is_at_safepoint(), 1992 "world should be stopped"); 1993 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1994 1995 // If a full collection has happened, we shouldn't do this. 1996 if (has_aborted()) { 1997 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1998 return; 1999 } 2000 2001 g1h->verify_region_sets_optional(); 2002 2003 if (VerifyDuringGC) { 2004 HandleMark hm; // handle scope 2005 Universe::heap()->prepare_for_verify(); 2006 Universe::verify(VerifyOption_G1UsePrevMarking, 2007 " VerifyDuringGC:(before)"); 2008 } 2009 g1h->check_bitmaps("Cleanup Start"); 2010 2011 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 2012 g1p->record_concurrent_mark_cleanup_start(); 2013 2014 double start = os::elapsedTime(); 2015 2016 HeapRegionRemSet::reset_for_cleanup_tasks(); 2017 2018 uint n_workers; 2019 2020 // Do counting once more with the world stopped for good measure. 2021 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 2022 2023 if (G1CollectedHeap::use_parallel_gc_threads()) { 2024 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 2025 "sanity check"); 2026 2027 g1h->set_par_threads(); 2028 n_workers = g1h->n_par_threads(); 2029 assert(g1h->n_par_threads() == n_workers, 2030 "Should not have been reset"); 2031 g1h->workers()->run_task(&g1_par_count_task); 2032 // Done with the parallel phase so reset to 0. 2033 g1h->set_par_threads(0); 2034 2035 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 2036 "sanity check"); 2037 } else { 2038 n_workers = 1; 2039 g1_par_count_task.work(0); 2040 } 2041 2042 if (VerifyDuringGC) { 2043 // Verify that the counting data accumulated during marking matches 2044 // that calculated by walking the marking bitmap. 2045 2046 // Bitmaps to hold expected values 2047 BitMap expected_region_bm(_region_bm.size(), true); 2048 BitMap expected_card_bm(_card_bm.size(), true); 2049 2050 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 2051 &_region_bm, 2052 &_card_bm, 2053 &expected_region_bm, 2054 &expected_card_bm); 2055 2056 if (G1CollectedHeap::use_parallel_gc_threads()) { 2057 g1h->set_par_threads((int)n_workers); 2058 g1h->workers()->run_task(&g1_par_verify_task); 2059 // Done with the parallel phase so reset to 0. 2060 g1h->set_par_threads(0); 2061 2062 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 2063 "sanity check"); 2064 } else { 2065 g1_par_verify_task.work(0); 2066 } 2067 2068 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 2069 } 2070 2071 size_t start_used_bytes = g1h->used(); 2072 g1h->set_marking_complete(); 2073 2074 double count_end = os::elapsedTime(); 2075 double this_final_counting_time = (count_end - start); 2076 _total_counting_time += this_final_counting_time; 2077 2078 if (G1PrintRegionLivenessInfo) { 2079 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 2080 _g1h->heap_region_iterate(&cl); 2081 } 2082 2083 // Install newly created mark bitMap as "prev". 2084 swapMarkBitMaps(); 2085 2086 g1h->reset_gc_time_stamp(); 2087 2088 // Note end of marking in all heap regions. 2089 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 2090 if (G1CollectedHeap::use_parallel_gc_threads()) { 2091 g1h->set_par_threads((int)n_workers); 2092 g1h->workers()->run_task(&g1_par_note_end_task); 2093 g1h->set_par_threads(0); 2094 2095 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 2096 "sanity check"); 2097 } else { 2098 g1_par_note_end_task.work(0); 2099 } 2100 g1h->check_gc_time_stamps(); 2101 2102 if (!cleanup_list_is_empty()) { 2103 // The cleanup list is not empty, so we'll have to process it 2104 // concurrently. Notify anyone else that might be wanting free 2105 // regions that there will be more free regions coming soon. 2106 g1h->set_free_regions_coming(); 2107 } 2108 2109 // call below, since it affects the metric by which we sort the heap 2110 // regions. 2111 if (G1ScrubRemSets) { 2112 double rs_scrub_start = os::elapsedTime(); 2113 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 2114 if (G1CollectedHeap::use_parallel_gc_threads()) { 2115 g1h->set_par_threads((int)n_workers); 2116 g1h->workers()->run_task(&g1_par_scrub_rs_task); 2117 g1h->set_par_threads(0); 2118 2119 assert(g1h->check_heap_region_claim_values( 2120 HeapRegion::ScrubRemSetClaimValue), 2121 "sanity check"); 2122 } else { 2123 g1_par_scrub_rs_task.work(0); 2124 } 2125 2126 double rs_scrub_end = os::elapsedTime(); 2127 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 2128 _total_rs_scrub_time += this_rs_scrub_time; 2129 } 2130 2131 // this will also free any regions totally full of garbage objects, 2132 // and sort the regions. 2133 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 2134 2135 // Statistics. 2136 double end = os::elapsedTime(); 2137 _cleanup_times.add((end - start) * 1000.0); 2138 2139 if (G1Log::fine()) { 2140 g1h->print_size_transition(gclog_or_tty, 2141 start_used_bytes, 2142 g1h->used(), 2143 g1h->capacity()); 2144 } 2145 2146 // Clean up will have freed any regions completely full of garbage. 2147 // Update the soft reference policy with the new heap occupancy. 2148 Universe::update_heap_info_at_gc(); 2149 2150 if (VerifyDuringGC) { 2151 HandleMark hm; // handle scope 2152 Universe::heap()->prepare_for_verify(); 2153 Universe::verify(VerifyOption_G1UsePrevMarking, 2154 " VerifyDuringGC:(after)"); 2155 } 2156 2157 g1h->check_bitmaps("Cleanup End"); 2158 2159 g1h->verify_region_sets_optional(); 2160 2161 // We need to make this be a "collection" so any collection pause that 2162 // races with it goes around and waits for completeCleanup to finish. 2163 g1h->increment_total_collections(); 2164 2165 // Clean out dead classes and update Metaspace sizes. 2166 ClassLoaderDataGraph::purge(); 2167 MetaspaceGC::compute_new_size(); 2168 2169 // We reclaimed old regions so we should calculate the sizes to make 2170 // sure we update the old gen/space data. 2171 g1h->g1mm()->update_sizes(); 2172 2173 g1h->trace_heap_after_concurrent_cycle(); 2174 } 2175 2176 void ConcurrentMark::completeCleanup() { 2177 if (has_aborted()) return; 2178 2179 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2180 2181 _cleanup_list.verify_optional(); 2182 FreeRegionList tmp_free_list("Tmp Free List"); 2183 2184 if (G1ConcRegionFreeingVerbose) { 2185 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2186 "cleanup list has %u entries", 2187 _cleanup_list.length()); 2188 } 2189 2190 // Noone else should be accessing the _cleanup_list at this point, 2191 // so it's not necessary to take any locks 2192 while (!_cleanup_list.is_empty()) { 2193 HeapRegion* hr = _cleanup_list.remove_head(); 2194 assert(hr != NULL, "Got NULL from a non-empty list"); 2195 hr->par_clear(); 2196 tmp_free_list.add_ordered(hr); 2197 2198 // Instead of adding one region at a time to the secondary_free_list, 2199 // we accumulate them in the local list and move them a few at a 2200 // time. This also cuts down on the number of notify_all() calls 2201 // we do during this process. We'll also append the local list when 2202 // _cleanup_list is empty (which means we just removed the last 2203 // region from the _cleanup_list). 2204 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2205 _cleanup_list.is_empty()) { 2206 if (G1ConcRegionFreeingVerbose) { 2207 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2208 "appending %u entries to the secondary_free_list, " 2209 "cleanup list still has %u entries", 2210 tmp_free_list.length(), 2211 _cleanup_list.length()); 2212 } 2213 2214 { 2215 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2216 g1h->secondary_free_list_add(&tmp_free_list); 2217 SecondaryFreeList_lock->notify_all(); 2218 } 2219 2220 if (G1StressConcRegionFreeing) { 2221 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2222 os::sleep(Thread::current(), (jlong) 1, false); 2223 } 2224 } 2225 } 2226 } 2227 assert(tmp_free_list.is_empty(), "post-condition"); 2228 } 2229 2230 // Supporting Object and Oop closures for reference discovery 2231 // and processing in during marking 2232 2233 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2234 HeapWord* addr = (HeapWord*)obj; 2235 return addr != NULL && 2236 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2237 } 2238 2239 // 'Keep Alive' oop closure used by both serial parallel reference processing. 2240 // Uses the CMTask associated with a worker thread (for serial reference 2241 // processing the CMTask for worker 0 is used) to preserve (mark) and 2242 // trace referent objects. 2243 // 2244 // Using the CMTask and embedded local queues avoids having the worker 2245 // threads operating on the global mark stack. This reduces the risk 2246 // of overflowing the stack - which we would rather avoid at this late 2247 // state. Also using the tasks' local queues removes the potential 2248 // of the workers interfering with each other that could occur if 2249 // operating on the global stack. 2250 2251 class G1CMKeepAliveAndDrainClosure: public OopClosure { 2252 ConcurrentMark* _cm; 2253 CMTask* _task; 2254 int _ref_counter_limit; 2255 int _ref_counter; 2256 bool _is_serial; 2257 public: 2258 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2259 _cm(cm), _task(task), _is_serial(is_serial), 2260 _ref_counter_limit(G1RefProcDrainInterval) { 2261 assert(_ref_counter_limit > 0, "sanity"); 2262 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2263 _ref_counter = _ref_counter_limit; 2264 } 2265 2266 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2267 virtual void do_oop( oop* p) { do_oop_work(p); } 2268 2269 template <class T> void do_oop_work(T* p) { 2270 if (!_cm->has_overflown()) { 2271 oop obj = oopDesc::load_decode_heap_oop(p); 2272 if (_cm->verbose_high()) { 2273 gclog_or_tty->print_cr("\t[%u] we're looking at location " 2274 "*"PTR_FORMAT" = "PTR_FORMAT, 2275 _task->worker_id(), p2i(p), p2i((void*) obj)); 2276 } 2277 2278 _task->deal_with_reference(obj); 2279 _ref_counter--; 2280 2281 if (_ref_counter == 0) { 2282 // We have dealt with _ref_counter_limit references, pushing them 2283 // and objects reachable from them on to the local stack (and 2284 // possibly the global stack). Call CMTask::do_marking_step() to 2285 // process these entries. 2286 // 2287 // We call CMTask::do_marking_step() in a loop, which we'll exit if 2288 // there's nothing more to do (i.e. we're done with the entries that 2289 // were pushed as a result of the CMTask::deal_with_reference() calls 2290 // above) or we overflow. 2291 // 2292 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2293 // flag while there may still be some work to do. (See the comment at 2294 // the beginning of CMTask::do_marking_step() for those conditions - 2295 // one of which is reaching the specified time target.) It is only 2296 // when CMTask::do_marking_step() returns without setting the 2297 // has_aborted() flag that the marking step has completed. 2298 do { 2299 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2300 _task->do_marking_step(mark_step_duration_ms, 2301 false /* do_termination */, 2302 _is_serial); 2303 } while (_task->has_aborted() && !_cm->has_overflown()); 2304 _ref_counter = _ref_counter_limit; 2305 } 2306 } else { 2307 if (_cm->verbose_high()) { 2308 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id()); 2309 } 2310 } 2311 } 2312 }; 2313 2314 // 'Drain' oop closure used by both serial and parallel reference processing. 2315 // Uses the CMTask associated with a given worker thread (for serial 2316 // reference processing the CMtask for worker 0 is used). Calls the 2317 // do_marking_step routine, with an unbelievably large timeout value, 2318 // to drain the marking data structures of the remaining entries 2319 // added by the 'keep alive' oop closure above. 2320 2321 class G1CMDrainMarkingStackClosure: public VoidClosure { 2322 ConcurrentMark* _cm; 2323 CMTask* _task; 2324 bool _is_serial; 2325 public: 2326 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2327 _cm(cm), _task(task), _is_serial(is_serial) { 2328 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2329 } 2330 2331 void do_void() { 2332 do { 2333 if (_cm->verbose_high()) { 2334 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s", 2335 _task->worker_id(), BOOL_TO_STR(_is_serial)); 2336 } 2337 2338 // We call CMTask::do_marking_step() to completely drain the local 2339 // and global marking stacks of entries pushed by the 'keep alive' 2340 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2341 // 2342 // CMTask::do_marking_step() is called in a loop, which we'll exit 2343 // if there's nothing more to do (i.e. we've completely drained the 2344 // entries that were pushed as a a result of applying the 'keep alive' 2345 // closure to the entries on the discovered ref lists) or we overflow 2346 // the global marking stack. 2347 // 2348 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2349 // flag while there may still be some work to do. (See the comment at 2350 // the beginning of CMTask::do_marking_step() for those conditions - 2351 // one of which is reaching the specified time target.) It is only 2352 // when CMTask::do_marking_step() returns without setting the 2353 // has_aborted() flag that the marking step has completed. 2354 2355 _task->do_marking_step(1000000000.0 /* something very large */, 2356 true /* do_termination */, 2357 _is_serial); 2358 } while (_task->has_aborted() && !_cm->has_overflown()); 2359 } 2360 }; 2361 2362 // Implementation of AbstractRefProcTaskExecutor for parallel 2363 // reference processing at the end of G1 concurrent marking 2364 2365 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2366 private: 2367 G1CollectedHeap* _g1h; 2368 ConcurrentMark* _cm; 2369 WorkGang* _workers; 2370 int _active_workers; 2371 2372 public: 2373 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2374 ConcurrentMark* cm, 2375 WorkGang* workers, 2376 int n_workers) : 2377 _g1h(g1h), _cm(cm), 2378 _workers(workers), _active_workers(n_workers) { } 2379 2380 // Executes the given task using concurrent marking worker threads. 2381 virtual void execute(ProcessTask& task); 2382 virtual void execute(EnqueueTask& task); 2383 }; 2384 2385 class G1CMRefProcTaskProxy: public AbstractGangTask { 2386 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2387 ProcessTask& _proc_task; 2388 G1CollectedHeap* _g1h; 2389 ConcurrentMark* _cm; 2390 2391 public: 2392 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2393 G1CollectedHeap* g1h, 2394 ConcurrentMark* cm) : 2395 AbstractGangTask("Process reference objects in parallel"), 2396 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2397 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2398 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2399 } 2400 2401 virtual void work(uint worker_id) { 2402 CMTask* task = _cm->task(worker_id); 2403 G1CMIsAliveClosure g1_is_alive(_g1h); 2404 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2405 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2406 2407 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2408 } 2409 }; 2410 2411 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2412 assert(_workers != NULL, "Need parallel worker threads."); 2413 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2414 2415 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2416 2417 // We need to reset the concurrency level before each 2418 // proxy task execution, so that the termination protocol 2419 // and overflow handling in CMTask::do_marking_step() knows 2420 // how many workers to wait for. 2421 _cm->set_concurrency(_active_workers); 2422 _g1h->set_par_threads(_active_workers); 2423 _workers->run_task(&proc_task_proxy); 2424 _g1h->set_par_threads(0); 2425 } 2426 2427 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2428 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2429 EnqueueTask& _enq_task; 2430 2431 public: 2432 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2433 AbstractGangTask("Enqueue reference objects in parallel"), 2434 _enq_task(enq_task) { } 2435 2436 virtual void work(uint worker_id) { 2437 _enq_task.work(worker_id); 2438 } 2439 }; 2440 2441 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2442 assert(_workers != NULL, "Need parallel worker threads."); 2443 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2444 2445 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2446 2447 // Not strictly necessary but... 2448 // 2449 // We need to reset the concurrency level before each 2450 // proxy task execution, so that the termination protocol 2451 // and overflow handling in CMTask::do_marking_step() knows 2452 // how many workers to wait for. 2453 _cm->set_concurrency(_active_workers); 2454 _g1h->set_par_threads(_active_workers); 2455 _workers->run_task(&enq_task_proxy); 2456 _g1h->set_par_threads(0); 2457 } 2458 2459 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 2460 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 2461 } 2462 2463 // Helper class to get rid of some boilerplate code. 2464 class G1RemarkGCTraceTime : public GCTraceTime { 2465 static bool doit_and_prepend(bool doit) { 2466 if (doit) { 2467 gclog_or_tty->put(' '); 2468 } 2469 return doit; 2470 } 2471 2472 public: 2473 G1RemarkGCTraceTime(const char* title, bool doit) 2474 : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(), 2475 G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) { 2476 } 2477 }; 2478 2479 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2480 if (has_overflown()) { 2481 // Skip processing the discovered references if we have 2482 // overflown the global marking stack. Reference objects 2483 // only get discovered once so it is OK to not 2484 // de-populate the discovered reference lists. We could have, 2485 // but the only benefit would be that, when marking restarts, 2486 // less reference objects are discovered. 2487 return; 2488 } 2489 2490 ResourceMark rm; 2491 HandleMark hm; 2492 2493 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2494 2495 // Is alive closure. 2496 G1CMIsAliveClosure g1_is_alive(g1h); 2497 2498 // Inner scope to exclude the cleaning of the string and symbol 2499 // tables from the displayed time. 2500 { 2501 if (G1Log::finer()) { 2502 gclog_or_tty->put(' '); 2503 } 2504 GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm(), concurrent_gc_id()); 2505 2506 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2507 2508 // See the comment in G1CollectedHeap::ref_processing_init() 2509 // about how reference processing currently works in G1. 2510 2511 // Set the soft reference policy 2512 rp->setup_policy(clear_all_soft_refs); 2513 assert(_markStack.isEmpty(), "mark stack should be empty"); 2514 2515 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2516 // in serial reference processing. Note these closures are also 2517 // used for serially processing (by the the current thread) the 2518 // JNI references during parallel reference processing. 2519 // 2520 // These closures do not need to synchronize with the worker 2521 // threads involved in parallel reference processing as these 2522 // instances are executed serially by the current thread (e.g. 2523 // reference processing is not multi-threaded and is thus 2524 // performed by the current thread instead of a gang worker). 2525 // 2526 // The gang tasks involved in parallel reference processing create 2527 // their own instances of these closures, which do their own 2528 // synchronization among themselves. 2529 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2530 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2531 2532 // We need at least one active thread. If reference processing 2533 // is not multi-threaded we use the current (VMThread) thread, 2534 // otherwise we use the work gang from the G1CollectedHeap and 2535 // we utilize all the worker threads we can. 2536 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL; 2537 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2538 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2539 2540 // Parallel processing task executor. 2541 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2542 g1h->workers(), active_workers); 2543 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2544 2545 // Set the concurrency level. The phase was already set prior to 2546 // executing the remark task. 2547 set_concurrency(active_workers); 2548 2549 // Set the degree of MT processing here. If the discovery was done MT, 2550 // the number of threads involved during discovery could differ from 2551 // the number of active workers. This is OK as long as the discovered 2552 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2553 rp->set_active_mt_degree(active_workers); 2554 2555 // Process the weak references. 2556 const ReferenceProcessorStats& stats = 2557 rp->process_discovered_references(&g1_is_alive, 2558 &g1_keep_alive, 2559 &g1_drain_mark_stack, 2560 executor, 2561 g1h->gc_timer_cm(), 2562 concurrent_gc_id()); 2563 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2564 2565 // The do_oop work routines of the keep_alive and drain_marking_stack 2566 // oop closures will set the has_overflown flag if we overflow the 2567 // global marking stack. 2568 2569 assert(_markStack.overflow() || _markStack.isEmpty(), 2570 "mark stack should be empty (unless it overflowed)"); 2571 2572 if (_markStack.overflow()) { 2573 // This should have been done already when we tried to push an 2574 // entry on to the global mark stack. But let's do it again. 2575 set_has_overflown(); 2576 } 2577 2578 assert(rp->num_q() == active_workers, "why not"); 2579 2580 rp->enqueue_discovered_references(executor); 2581 2582 rp->verify_no_references_recorded(); 2583 assert(!rp->discovery_enabled(), "Post condition"); 2584 } 2585 2586 if (has_overflown()) { 2587 // We can not trust g1_is_alive if the marking stack overflowed 2588 return; 2589 } 2590 2591 assert(_markStack.isEmpty(), "Marking should have completed"); 2592 2593 // Unload Klasses, String, Symbols, Code Cache, etc. 2594 2595 G1RemarkGCTraceTime trace("Unloading", G1Log::finer()); 2596 2597 bool purged_classes; 2598 2599 { 2600 G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest()); 2601 purged_classes = SystemDictionary::do_unloading(&g1_is_alive); 2602 } 2603 2604 { 2605 G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest()); 2606 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2607 } 2608 2609 if (G1StringDedup::is_enabled()) { 2610 G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest()); 2611 G1StringDedup::unlink(&g1_is_alive); 2612 } 2613 } 2614 2615 void ConcurrentMark::swapMarkBitMaps() { 2616 CMBitMapRO* temp = _prevMarkBitMap; 2617 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2618 _nextMarkBitMap = (CMBitMap*) temp; 2619 } 2620 2621 class CMObjectClosure; 2622 2623 // Closure for iterating over objects, currently only used for 2624 // processing SATB buffers. 2625 class CMObjectClosure : public ObjectClosure { 2626 private: 2627 CMTask* _task; 2628 2629 public: 2630 void do_object(oop obj) { 2631 _task->deal_with_reference(obj); 2632 } 2633 2634 CMObjectClosure(CMTask* task) : _task(task) { } 2635 }; 2636 2637 class G1RemarkThreadsClosure : public ThreadClosure { 2638 CMObjectClosure _cm_obj; 2639 G1CMOopClosure _cm_cl; 2640 MarkingCodeBlobClosure _code_cl; 2641 int _thread_parity; 2642 bool _is_par; 2643 2644 public: 2645 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) : 2646 _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2647 _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {} 2648 2649 void do_thread(Thread* thread) { 2650 if (thread->is_Java_thread()) { 2651 if (thread->claim_oops_do(_is_par, _thread_parity)) { 2652 JavaThread* jt = (JavaThread*)thread; 2653 2654 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2655 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2656 // * Alive if on the stack of an executing method 2657 // * Weakly reachable otherwise 2658 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2659 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2660 jt->nmethods_do(&_code_cl); 2661 2662 jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj); 2663 } 2664 } else if (thread->is_VM_thread()) { 2665 if (thread->claim_oops_do(_is_par, _thread_parity)) { 2666 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj); 2667 } 2668 } 2669 } 2670 }; 2671 2672 class CMRemarkTask: public AbstractGangTask { 2673 private: 2674 ConcurrentMark* _cm; 2675 bool _is_serial; 2676 public: 2677 void work(uint worker_id) { 2678 // Since all available tasks are actually started, we should 2679 // only proceed if we're supposed to be active. 2680 if (worker_id < _cm->active_tasks()) { 2681 CMTask* task = _cm->task(worker_id); 2682 task->record_start_time(); 2683 { 2684 ResourceMark rm; 2685 HandleMark hm; 2686 2687 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task, !_is_serial); 2688 Threads::threads_do(&threads_f); 2689 } 2690 2691 do { 2692 task->do_marking_step(1000000000.0 /* something very large */, 2693 true /* do_termination */, 2694 _is_serial); 2695 } while (task->has_aborted() && !_cm->has_overflown()); 2696 // If we overflow, then we do not want to restart. We instead 2697 // want to abort remark and do concurrent marking again. 2698 task->record_end_time(); 2699 } 2700 } 2701 2702 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) : 2703 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) { 2704 _cm->terminator()->reset_for_reuse(active_workers); 2705 } 2706 }; 2707 2708 void ConcurrentMark::checkpointRootsFinalWork() { 2709 ResourceMark rm; 2710 HandleMark hm; 2711 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2712 2713 G1RemarkGCTraceTime trace("Finalize Marking", G1Log::finer()); 2714 2715 g1h->ensure_parsability(false); 2716 2717 if (G1CollectedHeap::use_parallel_gc_threads()) { 2718 G1CollectedHeap::StrongRootsScope srs(g1h); 2719 // this is remark, so we'll use up all active threads 2720 uint active_workers = g1h->workers()->active_workers(); 2721 if (active_workers == 0) { 2722 assert(active_workers > 0, "Should have been set earlier"); 2723 active_workers = (uint) ParallelGCThreads; 2724 g1h->workers()->set_active_workers(active_workers); 2725 } 2726 set_concurrency_and_phase(active_workers, false /* concurrent */); 2727 // Leave _parallel_marking_threads at it's 2728 // value originally calculated in the ConcurrentMark 2729 // constructor and pass values of the active workers 2730 // through the gang in the task. 2731 2732 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */); 2733 // We will start all available threads, even if we decide that the 2734 // active_workers will be fewer. The extra ones will just bail out 2735 // immediately. 2736 g1h->set_par_threads(active_workers); 2737 g1h->workers()->run_task(&remarkTask); 2738 g1h->set_par_threads(0); 2739 } else { 2740 G1CollectedHeap::StrongRootsScope srs(g1h); 2741 uint active_workers = 1; 2742 set_concurrency_and_phase(active_workers, false /* concurrent */); 2743 2744 // Note - if there's no work gang then the VMThread will be 2745 // the thread to execute the remark - serially. We have 2746 // to pass true for the is_serial parameter so that 2747 // CMTask::do_marking_step() doesn't enter the sync 2748 // barriers in the event of an overflow. Doing so will 2749 // cause an assert that the current thread is not a 2750 // concurrent GC thread. 2751 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/); 2752 remarkTask.work(0); 2753 } 2754 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2755 guarantee(has_overflown() || 2756 satb_mq_set.completed_buffers_num() == 0, 2757 err_msg("Invariant: has_overflown = %s, num buffers = %d", 2758 BOOL_TO_STR(has_overflown()), 2759 satb_mq_set.completed_buffers_num())); 2760 2761 print_stats(); 2762 } 2763 2764 #ifndef PRODUCT 2765 2766 class PrintReachableOopClosure: public OopClosure { 2767 private: 2768 G1CollectedHeap* _g1h; 2769 outputStream* _out; 2770 VerifyOption _vo; 2771 bool _all; 2772 2773 public: 2774 PrintReachableOopClosure(outputStream* out, 2775 VerifyOption vo, 2776 bool all) : 2777 _g1h(G1CollectedHeap::heap()), 2778 _out(out), _vo(vo), _all(all) { } 2779 2780 void do_oop(narrowOop* p) { do_oop_work(p); } 2781 void do_oop( oop* p) { do_oop_work(p); } 2782 2783 template <class T> void do_oop_work(T* p) { 2784 oop obj = oopDesc::load_decode_heap_oop(p); 2785 const char* str = NULL; 2786 const char* str2 = ""; 2787 2788 if (obj == NULL) { 2789 str = ""; 2790 } else if (!_g1h->is_in_g1_reserved(obj)) { 2791 str = " O"; 2792 } else { 2793 HeapRegion* hr = _g1h->heap_region_containing(obj); 2794 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2795 bool marked = _g1h->is_marked(obj, _vo); 2796 2797 if (over_tams) { 2798 str = " >"; 2799 if (marked) { 2800 str2 = " AND MARKED"; 2801 } 2802 } else if (marked) { 2803 str = " M"; 2804 } else { 2805 str = " NOT"; 2806 } 2807 } 2808 2809 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2810 p2i(p), p2i((void*) obj), str, str2); 2811 } 2812 }; 2813 2814 class PrintReachableObjectClosure : public ObjectClosure { 2815 private: 2816 G1CollectedHeap* _g1h; 2817 outputStream* _out; 2818 VerifyOption _vo; 2819 bool _all; 2820 HeapRegion* _hr; 2821 2822 public: 2823 PrintReachableObjectClosure(outputStream* out, 2824 VerifyOption vo, 2825 bool all, 2826 HeapRegion* hr) : 2827 _g1h(G1CollectedHeap::heap()), 2828 _out(out), _vo(vo), _all(all), _hr(hr) { } 2829 2830 void do_object(oop o) { 2831 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2832 bool marked = _g1h->is_marked(o, _vo); 2833 bool print_it = _all || over_tams || marked; 2834 2835 if (print_it) { 2836 _out->print_cr(" "PTR_FORMAT"%s", 2837 p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : ""); 2838 PrintReachableOopClosure oopCl(_out, _vo, _all); 2839 o->oop_iterate_no_header(&oopCl); 2840 } 2841 } 2842 }; 2843 2844 class PrintReachableRegionClosure : public HeapRegionClosure { 2845 private: 2846 G1CollectedHeap* _g1h; 2847 outputStream* _out; 2848 VerifyOption _vo; 2849 bool _all; 2850 2851 public: 2852 bool doHeapRegion(HeapRegion* hr) { 2853 HeapWord* b = hr->bottom(); 2854 HeapWord* e = hr->end(); 2855 HeapWord* t = hr->top(); 2856 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2857 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2858 "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p)); 2859 _out->cr(); 2860 2861 HeapWord* from = b; 2862 HeapWord* to = t; 2863 2864 if (to > from) { 2865 _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to)); 2866 _out->cr(); 2867 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2868 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2869 _out->cr(); 2870 } 2871 2872 return false; 2873 } 2874 2875 PrintReachableRegionClosure(outputStream* out, 2876 VerifyOption vo, 2877 bool all) : 2878 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2879 }; 2880 2881 void ConcurrentMark::print_reachable(const char* str, 2882 VerifyOption vo, 2883 bool all) { 2884 gclog_or_tty->cr(); 2885 gclog_or_tty->print_cr("== Doing heap dump... "); 2886 2887 if (G1PrintReachableBaseFile == NULL) { 2888 gclog_or_tty->print_cr(" #### error: no base file defined"); 2889 return; 2890 } 2891 2892 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2893 (JVM_MAXPATHLEN - 1)) { 2894 gclog_or_tty->print_cr(" #### error: file name too long"); 2895 return; 2896 } 2897 2898 char file_name[JVM_MAXPATHLEN]; 2899 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2900 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2901 2902 fileStream fout(file_name); 2903 if (!fout.is_open()) { 2904 gclog_or_tty->print_cr(" #### error: could not open file"); 2905 return; 2906 } 2907 2908 outputStream* out = &fout; 2909 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2910 out->cr(); 2911 2912 out->print_cr("--- ITERATING OVER REGIONS"); 2913 out->cr(); 2914 PrintReachableRegionClosure rcl(out, vo, all); 2915 _g1h->heap_region_iterate(&rcl); 2916 out->cr(); 2917 2918 gclog_or_tty->print_cr(" done"); 2919 gclog_or_tty->flush(); 2920 } 2921 2922 #endif // PRODUCT 2923 2924 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2925 // Note we are overriding the read-only view of the prev map here, via 2926 // the cast. 2927 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2928 } 2929 2930 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2931 _nextMarkBitMap->clearRange(mr); 2932 } 2933 2934 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2935 clearRangePrevBitmap(mr); 2936 clearRangeNextBitmap(mr); 2937 } 2938 2939 HeapRegion* 2940 ConcurrentMark::claim_region(uint worker_id) { 2941 // "checkpoint" the finger 2942 HeapWord* finger = _finger; 2943 2944 // _heap_end will not change underneath our feet; it only changes at 2945 // yield points. 2946 while (finger < _heap_end) { 2947 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2948 2949 // Note on how this code handles humongous regions. In the 2950 // normal case the finger will reach the start of a "starts 2951 // humongous" (SH) region. Its end will either be the end of the 2952 // last "continues humongous" (CH) region in the sequence, or the 2953 // standard end of the SH region (if the SH is the only region in 2954 // the sequence). That way claim_region() will skip over the CH 2955 // regions. However, there is a subtle race between a CM thread 2956 // executing this method and a mutator thread doing a humongous 2957 // object allocation. The two are not mutually exclusive as the CM 2958 // thread does not need to hold the Heap_lock when it gets 2959 // here. So there is a chance that claim_region() will come across 2960 // a free region that's in the progress of becoming a SH or a CH 2961 // region. In the former case, it will either 2962 // a) Miss the update to the region's end, in which case it will 2963 // visit every subsequent CH region, will find their bitmaps 2964 // empty, and do nothing, or 2965 // b) Will observe the update of the region's end (in which case 2966 // it will skip the subsequent CH regions). 2967 // If it comes across a region that suddenly becomes CH, the 2968 // scenario will be similar to b). So, the race between 2969 // claim_region() and a humongous object allocation might force us 2970 // to do a bit of unnecessary work (due to some unnecessary bitmap 2971 // iterations) but it should not introduce and correctness issues. 2972 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2973 HeapWord* bottom = curr_region->bottom(); 2974 HeapWord* end = curr_region->end(); 2975 HeapWord* limit = curr_region->next_top_at_mark_start(); 2976 2977 if (verbose_low()) { 2978 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" " 2979 "["PTR_FORMAT", "PTR_FORMAT"), " 2980 "limit = "PTR_FORMAT, 2981 worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit)); 2982 } 2983 2984 // Is the gap between reading the finger and doing the CAS too long? 2985 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2986 if (res == finger) { 2987 // we succeeded 2988 2989 // notice that _finger == end cannot be guaranteed here since, 2990 // someone else might have moved the finger even further 2991 assert(_finger >= end, "the finger should have moved forward"); 2992 2993 if (verbose_low()) { 2994 gclog_or_tty->print_cr("[%u] we were successful with region = " 2995 PTR_FORMAT, worker_id, p2i(curr_region)); 2996 } 2997 2998 if (limit > bottom) { 2999 if (verbose_low()) { 3000 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, " 3001 "returning it ", worker_id, p2i(curr_region)); 3002 } 3003 return curr_region; 3004 } else { 3005 assert(limit == bottom, 3006 "the region limit should be at bottom"); 3007 if (verbose_low()) { 3008 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, " 3009 "returning NULL", worker_id, p2i(curr_region)); 3010 } 3011 // we return NULL and the caller should try calling 3012 // claim_region() again. 3013 return NULL; 3014 } 3015 } else { 3016 assert(_finger > finger, "the finger should have moved forward"); 3017 if (verbose_low()) { 3018 gclog_or_tty->print_cr("[%u] somebody else moved the finger, " 3019 "global finger = "PTR_FORMAT", " 3020 "our finger = "PTR_FORMAT, 3021 worker_id, p2i(_finger), p2i(finger)); 3022 } 3023 3024 // read it again 3025 finger = _finger; 3026 } 3027 } 3028 3029 return NULL; 3030 } 3031 3032 #ifndef PRODUCT 3033 enum VerifyNoCSetOopsPhase { 3034 VerifyNoCSetOopsStack, 3035 VerifyNoCSetOopsQueues, 3036 VerifyNoCSetOopsSATBCompleted, 3037 VerifyNoCSetOopsSATBThread 3038 }; 3039 3040 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 3041 private: 3042 G1CollectedHeap* _g1h; 3043 VerifyNoCSetOopsPhase _phase; 3044 int _info; 3045 3046 const char* phase_str() { 3047 switch (_phase) { 3048 case VerifyNoCSetOopsStack: return "Stack"; 3049 case VerifyNoCSetOopsQueues: return "Queue"; 3050 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 3051 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 3052 default: ShouldNotReachHere(); 3053 } 3054 return NULL; 3055 } 3056 3057 void do_object_work(oop obj) { 3058 guarantee(!_g1h->obj_in_cs(obj), 3059 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 3060 p2i((void*) obj), phase_str(), _info)); 3061 } 3062 3063 public: 3064 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 3065 3066 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 3067 _phase = phase; 3068 _info = info; 3069 } 3070 3071 virtual void do_oop(oop* p) { 3072 oop obj = oopDesc::load_decode_heap_oop(p); 3073 do_object_work(obj); 3074 } 3075 3076 virtual void do_oop(narrowOop* p) { 3077 // We should not come across narrow oops while scanning marking 3078 // stacks and SATB buffers. 3079 ShouldNotReachHere(); 3080 } 3081 3082 virtual void do_object(oop obj) { 3083 do_object_work(obj); 3084 } 3085 }; 3086 3087 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 3088 bool verify_enqueued_buffers, 3089 bool verify_thread_buffers, 3090 bool verify_fingers) { 3091 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 3092 if (!G1CollectedHeap::heap()->mark_in_progress()) { 3093 return; 3094 } 3095 3096 VerifyNoCSetOopsClosure cl; 3097 3098 if (verify_stacks) { 3099 // Verify entries on the global mark stack 3100 cl.set_phase(VerifyNoCSetOopsStack); 3101 _markStack.oops_do(&cl); 3102 3103 // Verify entries on the task queues 3104 for (uint i = 0; i < _max_worker_id; i += 1) { 3105 cl.set_phase(VerifyNoCSetOopsQueues, i); 3106 CMTaskQueue* queue = _task_queues->queue(i); 3107 queue->oops_do(&cl); 3108 } 3109 } 3110 3111 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 3112 3113 // Verify entries on the enqueued SATB buffers 3114 if (verify_enqueued_buffers) { 3115 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 3116 satb_qs.iterate_completed_buffers_read_only(&cl); 3117 } 3118 3119 // Verify entries on the per-thread SATB buffers 3120 if (verify_thread_buffers) { 3121 cl.set_phase(VerifyNoCSetOopsSATBThread); 3122 satb_qs.iterate_thread_buffers_read_only(&cl); 3123 } 3124 3125 if (verify_fingers) { 3126 // Verify the global finger 3127 HeapWord* global_finger = finger(); 3128 if (global_finger != NULL && global_finger < _heap_end) { 3129 // The global finger always points to a heap region boundary. We 3130 // use heap_region_containing_raw() to get the containing region 3131 // given that the global finger could be pointing to a free region 3132 // which subsequently becomes continues humongous. If that 3133 // happens, heap_region_containing() will return the bottom of the 3134 // corresponding starts humongous region and the check below will 3135 // not hold any more. 3136 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 3137 guarantee(global_finger == global_hr->bottom(), 3138 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 3139 p2i(global_finger), HR_FORMAT_PARAMS(global_hr))); 3140 } 3141 3142 // Verify the task fingers 3143 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 3144 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 3145 CMTask* task = _tasks[i]; 3146 HeapWord* task_finger = task->finger(); 3147 if (task_finger != NULL && task_finger < _heap_end) { 3148 // See above note on the global finger verification. 3149 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 3150 guarantee(task_finger == task_hr->bottom() || 3151 !task_hr->in_collection_set(), 3152 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 3153 p2i(task_finger), HR_FORMAT_PARAMS(task_hr))); 3154 } 3155 } 3156 } 3157 } 3158 #endif // PRODUCT 3159 3160 // Aggregate the counting data that was constructed concurrently 3161 // with marking. 3162 class AggregateCountDataHRClosure: public HeapRegionClosure { 3163 G1CollectedHeap* _g1h; 3164 ConcurrentMark* _cm; 3165 CardTableModRefBS* _ct_bs; 3166 BitMap* _cm_card_bm; 3167 uint _max_worker_id; 3168 3169 public: 3170 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 3171 BitMap* cm_card_bm, 3172 uint max_worker_id) : 3173 _g1h(g1h), _cm(g1h->concurrent_mark()), 3174 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 3175 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 3176 3177 bool doHeapRegion(HeapRegion* hr) { 3178 if (hr->continuesHumongous()) { 3179 // We will ignore these here and process them when their 3180 // associated "starts humongous" region is processed. 3181 // Note that we cannot rely on their associated 3182 // "starts humongous" region to have their bit set to 1 3183 // since, due to the region chunking in the parallel region 3184 // iteration, a "continues humongous" region might be visited 3185 // before its associated "starts humongous". 3186 return false; 3187 } 3188 3189 HeapWord* start = hr->bottom(); 3190 HeapWord* limit = hr->next_top_at_mark_start(); 3191 HeapWord* end = hr->end(); 3192 3193 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 3194 err_msg("Preconditions not met - " 3195 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 3196 "top: "PTR_FORMAT", end: "PTR_FORMAT, 3197 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()))); 3198 3199 assert(hr->next_marked_bytes() == 0, "Precondition"); 3200 3201 if (start == limit) { 3202 // NTAMS of this region has not been set so nothing to do. 3203 return false; 3204 } 3205 3206 // 'start' should be in the heap. 3207 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 3208 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 3209 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 3210 3211 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 3212 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 3213 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 3214 3215 // If ntams is not card aligned then we bump card bitmap index 3216 // for limit so that we get the all the cards spanned by 3217 // the object ending at ntams. 3218 // Note: if this is the last region in the heap then ntams 3219 // could be actually just beyond the end of the the heap; 3220 // limit_idx will then correspond to a (non-existent) card 3221 // that is also outside the heap. 3222 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 3223 limit_idx += 1; 3224 } 3225 3226 assert(limit_idx <= end_idx, "or else use atomics"); 3227 3228 // Aggregate the "stripe" in the count data associated with hr. 3229 uint hrs_index = hr->hrs_index(); 3230 size_t marked_bytes = 0; 3231 3232 for (uint i = 0; i < _max_worker_id; i += 1) { 3233 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 3234 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 3235 3236 // Fetch the marked_bytes in this region for task i and 3237 // add it to the running total for this region. 3238 marked_bytes += marked_bytes_array[hrs_index]; 3239 3240 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 3241 // into the global card bitmap. 3242 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 3243 3244 while (scan_idx < limit_idx) { 3245 assert(task_card_bm->at(scan_idx) == true, "should be"); 3246 _cm_card_bm->set_bit(scan_idx); 3247 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 3248 3249 // BitMap::get_next_one_offset() can handle the case when 3250 // its left_offset parameter is greater than its right_offset 3251 // parameter. It does, however, have an early exit if 3252 // left_offset == right_offset. So let's limit the value 3253 // passed in for left offset here. 3254 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 3255 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 3256 } 3257 } 3258 3259 // Update the marked bytes for this region. 3260 hr->add_to_marked_bytes(marked_bytes); 3261 3262 // Next heap region 3263 return false; 3264 } 3265 }; 3266 3267 class G1AggregateCountDataTask: public AbstractGangTask { 3268 protected: 3269 G1CollectedHeap* _g1h; 3270 ConcurrentMark* _cm; 3271 BitMap* _cm_card_bm; 3272 uint _max_worker_id; 3273 int _active_workers; 3274 3275 public: 3276 G1AggregateCountDataTask(G1CollectedHeap* g1h, 3277 ConcurrentMark* cm, 3278 BitMap* cm_card_bm, 3279 uint max_worker_id, 3280 int n_workers) : 3281 AbstractGangTask("Count Aggregation"), 3282 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 3283 _max_worker_id(max_worker_id), 3284 _active_workers(n_workers) { } 3285 3286 void work(uint worker_id) { 3287 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 3288 3289 if (G1CollectedHeap::use_parallel_gc_threads()) { 3290 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 3291 _active_workers, 3292 HeapRegion::AggregateCountClaimValue); 3293 } else { 3294 _g1h->heap_region_iterate(&cl); 3295 } 3296 } 3297 }; 3298 3299 3300 void ConcurrentMark::aggregate_count_data() { 3301 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3302 _g1h->workers()->active_workers() : 3303 1); 3304 3305 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3306 _max_worker_id, n_workers); 3307 3308 if (G1CollectedHeap::use_parallel_gc_threads()) { 3309 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3310 "sanity check"); 3311 _g1h->set_par_threads(n_workers); 3312 _g1h->workers()->run_task(&g1_par_agg_task); 3313 _g1h->set_par_threads(0); 3314 3315 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3316 "sanity check"); 3317 _g1h->reset_heap_region_claim_values(); 3318 } else { 3319 g1_par_agg_task.work(0); 3320 } 3321 } 3322 3323 // Clear the per-worker arrays used to store the per-region counting data 3324 void ConcurrentMark::clear_all_count_data() { 3325 // Clear the global card bitmap - it will be filled during 3326 // liveness count aggregation (during remark) and the 3327 // final counting task. 3328 _card_bm.clear(); 3329 3330 // Clear the global region bitmap - it will be filled as part 3331 // of the final counting task. 3332 _region_bm.clear(); 3333 3334 uint max_regions = _g1h->max_regions(); 3335 assert(_max_worker_id > 0, "uninitialized"); 3336 3337 for (uint i = 0; i < _max_worker_id; i += 1) { 3338 BitMap* task_card_bm = count_card_bitmap_for(i); 3339 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3340 3341 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3342 assert(marked_bytes_array != NULL, "uninitialized"); 3343 3344 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3345 task_card_bm->clear(); 3346 } 3347 } 3348 3349 void ConcurrentMark::print_stats() { 3350 if (verbose_stats()) { 3351 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3352 for (size_t i = 0; i < _active_tasks; ++i) { 3353 _tasks[i]->print_stats(); 3354 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3355 } 3356 } 3357 } 3358 3359 // abandon current marking iteration due to a Full GC 3360 void ConcurrentMark::abort() { 3361 // Clear all marks to force marking thread to do nothing 3362 _nextMarkBitMap->clearAll(); 3363 3364 // Note we cannot clear the previous marking bitmap here 3365 // since VerifyDuringGC verifies the objects marked during 3366 // a full GC against the previous bitmap. 3367 3368 // Clear the liveness counting data 3369 clear_all_count_data(); 3370 // Empty mark stack 3371 reset_marking_state(); 3372 for (uint i = 0; i < _max_worker_id; ++i) { 3373 _tasks[i]->clear_region_fields(); 3374 } 3375 _first_overflow_barrier_sync.abort(); 3376 _second_overflow_barrier_sync.abort(); 3377 const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id(); 3378 if (!gc_id.is_undefined()) { 3379 // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance 3380 // to detect that it was aborted. Only keep track of the first GC id that we aborted. 3381 _aborted_gc_id = gc_id; 3382 } 3383 _has_aborted = true; 3384 3385 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3386 satb_mq_set.abandon_partial_marking(); 3387 // This can be called either during or outside marking, we'll read 3388 // the expected_active value from the SATB queue set. 3389 satb_mq_set.set_active_all_threads( 3390 false, /* new active value */ 3391 satb_mq_set.is_active() /* expected_active */); 3392 3393 _g1h->trace_heap_after_concurrent_cycle(); 3394 _g1h->register_concurrent_cycle_end(); 3395 } 3396 3397 const GCId& ConcurrentMark::concurrent_gc_id() { 3398 if (has_aborted()) { 3399 return _aborted_gc_id; 3400 } 3401 return _g1h->gc_tracer_cm()->gc_id(); 3402 } 3403 3404 static void print_ms_time_info(const char* prefix, const char* name, 3405 NumberSeq& ns) { 3406 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3407 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3408 if (ns.num() > 0) { 3409 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3410 prefix, ns.sd(), ns.maximum()); 3411 } 3412 } 3413 3414 void ConcurrentMark::print_summary_info() { 3415 gclog_or_tty->print_cr(" Concurrent marking:"); 3416 print_ms_time_info(" ", "init marks", _init_times); 3417 print_ms_time_info(" ", "remarks", _remark_times); 3418 { 3419 print_ms_time_info(" ", "final marks", _remark_mark_times); 3420 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3421 3422 } 3423 print_ms_time_info(" ", "cleanups", _cleanup_times); 3424 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3425 _total_counting_time, 3426 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3427 (double)_cleanup_times.num() 3428 : 0.0)); 3429 if (G1ScrubRemSets) { 3430 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3431 _total_rs_scrub_time, 3432 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3433 (double)_cleanup_times.num() 3434 : 0.0)); 3435 } 3436 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3437 (_init_times.sum() + _remark_times.sum() + 3438 _cleanup_times.sum())/1000.0); 3439 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3440 "(%8.2f s marking).", 3441 cmThread()->vtime_accum(), 3442 cmThread()->vtime_mark_accum()); 3443 } 3444 3445 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3446 if (use_parallel_marking_threads()) { 3447 _parallel_workers->print_worker_threads_on(st); 3448 } 3449 } 3450 3451 void ConcurrentMark::print_on_error(outputStream* st) const { 3452 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 3453 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 3454 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 3455 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 3456 } 3457 3458 // We take a break if someone is trying to stop the world. 3459 bool ConcurrentMark::do_yield_check(uint worker_id) { 3460 if (SuspendibleThreadSet::should_yield()) { 3461 if (worker_id == 0) { 3462 _g1h->g1_policy()->record_concurrent_pause(); 3463 } 3464 SuspendibleThreadSet::yield(); 3465 return true; 3466 } else { 3467 return false; 3468 } 3469 } 3470 3471 bool ConcurrentMark::containing_card_is_marked(void* p) { 3472 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3473 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3474 } 3475 3476 bool ConcurrentMark::containing_cards_are_marked(void* start, 3477 void* last) { 3478 return containing_card_is_marked(start) && 3479 containing_card_is_marked(last); 3480 } 3481 3482 #ifndef PRODUCT 3483 // for debugging purposes 3484 void ConcurrentMark::print_finger() { 3485 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3486 p2i(_heap_start), p2i(_heap_end), p2i(_finger)); 3487 for (uint i = 0; i < _max_worker_id; ++i) { 3488 gclog_or_tty->print(" %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger())); 3489 } 3490 gclog_or_tty->cr(); 3491 } 3492 #endif 3493 3494 void CMTask::scan_object(oop obj) { 3495 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3496 3497 if (_cm->verbose_high()) { 3498 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT, 3499 _worker_id, p2i((void*) obj)); 3500 } 3501 3502 size_t obj_size = obj->size(); 3503 _words_scanned += obj_size; 3504 3505 obj->oop_iterate(_cm_oop_closure); 3506 statsOnly( ++_objs_scanned ); 3507 check_limits(); 3508 } 3509 3510 // Closure for iteration over bitmaps 3511 class CMBitMapClosure : public BitMapClosure { 3512 private: 3513 // the bitmap that is being iterated over 3514 CMBitMap* _nextMarkBitMap; 3515 ConcurrentMark* _cm; 3516 CMTask* _task; 3517 3518 public: 3519 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3520 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3521 3522 bool do_bit(size_t offset) { 3523 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3524 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3525 assert( addr < _cm->finger(), "invariant"); 3526 3527 statsOnly( _task->increase_objs_found_on_bitmap() ); 3528 assert(addr >= _task->finger(), "invariant"); 3529 3530 // We move that task's local finger along. 3531 _task->move_finger_to(addr); 3532 3533 _task->scan_object(oop(addr)); 3534 // we only partially drain the local queue and global stack 3535 _task->drain_local_queue(true); 3536 _task->drain_global_stack(true); 3537 3538 // if the has_aborted flag has been raised, we need to bail out of 3539 // the iteration 3540 return !_task->has_aborted(); 3541 } 3542 }; 3543 3544 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3545 ConcurrentMark* cm, 3546 CMTask* task) 3547 : _g1h(g1h), _cm(cm), _task(task) { 3548 assert(_ref_processor == NULL, "should be initialized to NULL"); 3549 3550 if (G1UseConcMarkReferenceProcessing) { 3551 _ref_processor = g1h->ref_processor_cm(); 3552 assert(_ref_processor != NULL, "should not be NULL"); 3553 } 3554 } 3555 3556 void CMTask::setup_for_region(HeapRegion* hr) { 3557 assert(hr != NULL, 3558 "claim_region() should have filtered out NULL regions"); 3559 assert(!hr->continuesHumongous(), 3560 "claim_region() should have filtered out continues humongous regions"); 3561 3562 if (_cm->verbose_low()) { 3563 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT, 3564 _worker_id, p2i(hr)); 3565 } 3566 3567 _curr_region = hr; 3568 _finger = hr->bottom(); 3569 update_region_limit(); 3570 } 3571 3572 void CMTask::update_region_limit() { 3573 HeapRegion* hr = _curr_region; 3574 HeapWord* bottom = hr->bottom(); 3575 HeapWord* limit = hr->next_top_at_mark_start(); 3576 3577 if (limit == bottom) { 3578 if (_cm->verbose_low()) { 3579 gclog_or_tty->print_cr("[%u] found an empty region " 3580 "["PTR_FORMAT", "PTR_FORMAT")", 3581 _worker_id, p2i(bottom), p2i(limit)); 3582 } 3583 // The region was collected underneath our feet. 3584 // We set the finger to bottom to ensure that the bitmap 3585 // iteration that will follow this will not do anything. 3586 // (this is not a condition that holds when we set the region up, 3587 // as the region is not supposed to be empty in the first place) 3588 _finger = bottom; 3589 } else if (limit >= _region_limit) { 3590 assert(limit >= _finger, "peace of mind"); 3591 } else { 3592 assert(limit < _region_limit, "only way to get here"); 3593 // This can happen under some pretty unusual circumstances. An 3594 // evacuation pause empties the region underneath our feet (NTAMS 3595 // at bottom). We then do some allocation in the region (NTAMS 3596 // stays at bottom), followed by the region being used as a GC 3597 // alloc region (NTAMS will move to top() and the objects 3598 // originally below it will be grayed). All objects now marked in 3599 // the region are explicitly grayed, if below the global finger, 3600 // and we do not need in fact to scan anything else. So, we simply 3601 // set _finger to be limit to ensure that the bitmap iteration 3602 // doesn't do anything. 3603 _finger = limit; 3604 } 3605 3606 _region_limit = limit; 3607 } 3608 3609 void CMTask::giveup_current_region() { 3610 assert(_curr_region != NULL, "invariant"); 3611 if (_cm->verbose_low()) { 3612 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT, 3613 _worker_id, p2i(_curr_region)); 3614 } 3615 clear_region_fields(); 3616 } 3617 3618 void CMTask::clear_region_fields() { 3619 // Values for these three fields that indicate that we're not 3620 // holding on to a region. 3621 _curr_region = NULL; 3622 _finger = NULL; 3623 _region_limit = NULL; 3624 } 3625 3626 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3627 if (cm_oop_closure == NULL) { 3628 assert(_cm_oop_closure != NULL, "invariant"); 3629 } else { 3630 assert(_cm_oop_closure == NULL, "invariant"); 3631 } 3632 _cm_oop_closure = cm_oop_closure; 3633 } 3634 3635 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3636 guarantee(nextMarkBitMap != NULL, "invariant"); 3637 3638 if (_cm->verbose_low()) { 3639 gclog_or_tty->print_cr("[%u] resetting", _worker_id); 3640 } 3641 3642 _nextMarkBitMap = nextMarkBitMap; 3643 clear_region_fields(); 3644 3645 _calls = 0; 3646 _elapsed_time_ms = 0.0; 3647 _termination_time_ms = 0.0; 3648 _termination_start_time_ms = 0.0; 3649 3650 #if _MARKING_STATS_ 3651 _local_pushes = 0; 3652 _local_pops = 0; 3653 _local_max_size = 0; 3654 _objs_scanned = 0; 3655 _global_pushes = 0; 3656 _global_pops = 0; 3657 _global_max_size = 0; 3658 _global_transfers_to = 0; 3659 _global_transfers_from = 0; 3660 _regions_claimed = 0; 3661 _objs_found_on_bitmap = 0; 3662 _satb_buffers_processed = 0; 3663 _steal_attempts = 0; 3664 _steals = 0; 3665 _aborted = 0; 3666 _aborted_overflow = 0; 3667 _aborted_cm_aborted = 0; 3668 _aborted_yield = 0; 3669 _aborted_timed_out = 0; 3670 _aborted_satb = 0; 3671 _aborted_termination = 0; 3672 #endif // _MARKING_STATS_ 3673 } 3674 3675 bool CMTask::should_exit_termination() { 3676 regular_clock_call(); 3677 // This is called when we are in the termination protocol. We should 3678 // quit if, for some reason, this task wants to abort or the global 3679 // stack is not empty (this means that we can get work from it). 3680 return !_cm->mark_stack_empty() || has_aborted(); 3681 } 3682 3683 void CMTask::reached_limit() { 3684 assert(_words_scanned >= _words_scanned_limit || 3685 _refs_reached >= _refs_reached_limit , 3686 "shouldn't have been called otherwise"); 3687 regular_clock_call(); 3688 } 3689 3690 void CMTask::regular_clock_call() { 3691 if (has_aborted()) return; 3692 3693 // First, we need to recalculate the words scanned and refs reached 3694 // limits for the next clock call. 3695 recalculate_limits(); 3696 3697 // During the regular clock call we do the following 3698 3699 // (1) If an overflow has been flagged, then we abort. 3700 if (_cm->has_overflown()) { 3701 set_has_aborted(); 3702 return; 3703 } 3704 3705 // If we are not concurrent (i.e. we're doing remark) we don't need 3706 // to check anything else. The other steps are only needed during 3707 // the concurrent marking phase. 3708 if (!concurrent()) return; 3709 3710 // (2) If marking has been aborted for Full GC, then we also abort. 3711 if (_cm->has_aborted()) { 3712 set_has_aborted(); 3713 statsOnly( ++_aborted_cm_aborted ); 3714 return; 3715 } 3716 3717 double curr_time_ms = os::elapsedVTime() * 1000.0; 3718 3719 // (3) If marking stats are enabled, then we update the step history. 3720 #if _MARKING_STATS_ 3721 if (_words_scanned >= _words_scanned_limit) { 3722 ++_clock_due_to_scanning; 3723 } 3724 if (_refs_reached >= _refs_reached_limit) { 3725 ++_clock_due_to_marking; 3726 } 3727 3728 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3729 _interval_start_time_ms = curr_time_ms; 3730 _all_clock_intervals_ms.add(last_interval_ms); 3731 3732 if (_cm->verbose_medium()) { 3733 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, " 3734 "scanned = %d%s, refs reached = %d%s", 3735 _worker_id, last_interval_ms, 3736 _words_scanned, 3737 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3738 _refs_reached, 3739 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3740 } 3741 #endif // _MARKING_STATS_ 3742 3743 // (4) We check whether we should yield. If we have to, then we abort. 3744 if (SuspendibleThreadSet::should_yield()) { 3745 // We should yield. To do this we abort the task. The caller is 3746 // responsible for yielding. 3747 set_has_aborted(); 3748 statsOnly( ++_aborted_yield ); 3749 return; 3750 } 3751 3752 // (5) We check whether we've reached our time quota. If we have, 3753 // then we abort. 3754 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3755 if (elapsed_time_ms > _time_target_ms) { 3756 set_has_aborted(); 3757 _has_timed_out = true; 3758 statsOnly( ++_aborted_timed_out ); 3759 return; 3760 } 3761 3762 // (6) Finally, we check whether there are enough completed STAB 3763 // buffers available for processing. If there are, we abort. 3764 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3765 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3766 if (_cm->verbose_low()) { 3767 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers", 3768 _worker_id); 3769 } 3770 // we do need to process SATB buffers, we'll abort and restart 3771 // the marking task to do so 3772 set_has_aborted(); 3773 statsOnly( ++_aborted_satb ); 3774 return; 3775 } 3776 } 3777 3778 void CMTask::recalculate_limits() { 3779 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3780 _words_scanned_limit = _real_words_scanned_limit; 3781 3782 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3783 _refs_reached_limit = _real_refs_reached_limit; 3784 } 3785 3786 void CMTask::decrease_limits() { 3787 // This is called when we believe that we're going to do an infrequent 3788 // operation which will increase the per byte scanned cost (i.e. move 3789 // entries to/from the global stack). It basically tries to decrease the 3790 // scanning limit so that the clock is called earlier. 3791 3792 if (_cm->verbose_medium()) { 3793 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id); 3794 } 3795 3796 _words_scanned_limit = _real_words_scanned_limit - 3797 3 * words_scanned_period / 4; 3798 _refs_reached_limit = _real_refs_reached_limit - 3799 3 * refs_reached_period / 4; 3800 } 3801 3802 void CMTask::move_entries_to_global_stack() { 3803 // local array where we'll store the entries that will be popped 3804 // from the local queue 3805 oop buffer[global_stack_transfer_size]; 3806 3807 int n = 0; 3808 oop obj; 3809 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3810 buffer[n] = obj; 3811 ++n; 3812 } 3813 3814 if (n > 0) { 3815 // we popped at least one entry from the local queue 3816 3817 statsOnly( ++_global_transfers_to; _local_pops += n ); 3818 3819 if (!_cm->mark_stack_push(buffer, n)) { 3820 if (_cm->verbose_low()) { 3821 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow", 3822 _worker_id); 3823 } 3824 set_has_aborted(); 3825 } else { 3826 // the transfer was successful 3827 3828 if (_cm->verbose_medium()) { 3829 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack", 3830 _worker_id, n); 3831 } 3832 statsOnly( int tmp_size = _cm->mark_stack_size(); 3833 if (tmp_size > _global_max_size) { 3834 _global_max_size = tmp_size; 3835 } 3836 _global_pushes += n ); 3837 } 3838 } 3839 3840 // this operation was quite expensive, so decrease the limits 3841 decrease_limits(); 3842 } 3843 3844 void CMTask::get_entries_from_global_stack() { 3845 // local array where we'll store the entries that will be popped 3846 // from the global stack. 3847 oop buffer[global_stack_transfer_size]; 3848 int n; 3849 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3850 assert(n <= global_stack_transfer_size, 3851 "we should not pop more than the given limit"); 3852 if (n > 0) { 3853 // yes, we did actually pop at least one entry 3854 3855 statsOnly( ++_global_transfers_from; _global_pops += n ); 3856 if (_cm->verbose_medium()) { 3857 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack", 3858 _worker_id, n); 3859 } 3860 for (int i = 0; i < n; ++i) { 3861 bool success = _task_queue->push(buffer[i]); 3862 // We only call this when the local queue is empty or under a 3863 // given target limit. So, we do not expect this push to fail. 3864 assert(success, "invariant"); 3865 } 3866 3867 statsOnly( int tmp_size = _task_queue->size(); 3868 if (tmp_size > _local_max_size) { 3869 _local_max_size = tmp_size; 3870 } 3871 _local_pushes += n ); 3872 } 3873 3874 // this operation was quite expensive, so decrease the limits 3875 decrease_limits(); 3876 } 3877 3878 void CMTask::drain_local_queue(bool partially) { 3879 if (has_aborted()) return; 3880 3881 // Decide what the target size is, depending whether we're going to 3882 // drain it partially (so that other tasks can steal if they run out 3883 // of things to do) or totally (at the very end). 3884 size_t target_size; 3885 if (partially) { 3886 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3887 } else { 3888 target_size = 0; 3889 } 3890 3891 if (_task_queue->size() > target_size) { 3892 if (_cm->verbose_high()) { 3893 gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT, 3894 _worker_id, target_size); 3895 } 3896 3897 oop obj; 3898 bool ret = _task_queue->pop_local(obj); 3899 while (ret) { 3900 statsOnly( ++_local_pops ); 3901 3902 if (_cm->verbose_high()) { 3903 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id, 3904 p2i((void*) obj)); 3905 } 3906 3907 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3908 assert(!_g1h->is_on_master_free_list( 3909 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3910 3911 scan_object(obj); 3912 3913 if (_task_queue->size() <= target_size || has_aborted()) { 3914 ret = false; 3915 } else { 3916 ret = _task_queue->pop_local(obj); 3917 } 3918 } 3919 3920 if (_cm->verbose_high()) { 3921 gclog_or_tty->print_cr("[%u] drained local queue, size = %u", 3922 _worker_id, _task_queue->size()); 3923 } 3924 } 3925 } 3926 3927 void CMTask::drain_global_stack(bool partially) { 3928 if (has_aborted()) return; 3929 3930 // We have a policy to drain the local queue before we attempt to 3931 // drain the global stack. 3932 assert(partially || _task_queue->size() == 0, "invariant"); 3933 3934 // Decide what the target size is, depending whether we're going to 3935 // drain it partially (so that other tasks can steal if they run out 3936 // of things to do) or totally (at the very end). Notice that, 3937 // because we move entries from the global stack in chunks or 3938 // because another task might be doing the same, we might in fact 3939 // drop below the target. But, this is not a problem. 3940 size_t target_size; 3941 if (partially) { 3942 target_size = _cm->partial_mark_stack_size_target(); 3943 } else { 3944 target_size = 0; 3945 } 3946 3947 if (_cm->mark_stack_size() > target_size) { 3948 if (_cm->verbose_low()) { 3949 gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT, 3950 _worker_id, target_size); 3951 } 3952 3953 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3954 get_entries_from_global_stack(); 3955 drain_local_queue(partially); 3956 } 3957 3958 if (_cm->verbose_low()) { 3959 gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT, 3960 _worker_id, _cm->mark_stack_size()); 3961 } 3962 } 3963 } 3964 3965 // SATB Queue has several assumptions on whether to call the par or 3966 // non-par versions of the methods. this is why some of the code is 3967 // replicated. We should really get rid of the single-threaded version 3968 // of the code to simplify things. 3969 void CMTask::drain_satb_buffers() { 3970 if (has_aborted()) return; 3971 3972 // We set this so that the regular clock knows that we're in the 3973 // middle of draining buffers and doesn't set the abort flag when it 3974 // notices that SATB buffers are available for draining. It'd be 3975 // very counter productive if it did that. :-) 3976 _draining_satb_buffers = true; 3977 3978 CMObjectClosure oc(this); 3979 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3980 if (G1CollectedHeap::use_parallel_gc_threads()) { 3981 satb_mq_set.set_par_closure(_worker_id, &oc); 3982 } else { 3983 satb_mq_set.set_closure(&oc); 3984 } 3985 3986 // This keeps claiming and applying the closure to completed buffers 3987 // until we run out of buffers or we need to abort. 3988 if (G1CollectedHeap::use_parallel_gc_threads()) { 3989 while (!has_aborted() && 3990 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) { 3991 if (_cm->verbose_medium()) { 3992 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3993 } 3994 statsOnly( ++_satb_buffers_processed ); 3995 regular_clock_call(); 3996 } 3997 } else { 3998 while (!has_aborted() && 3999 satb_mq_set.apply_closure_to_completed_buffer()) { 4000 if (_cm->verbose_medium()) { 4001 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 4002 } 4003 statsOnly( ++_satb_buffers_processed ); 4004 regular_clock_call(); 4005 } 4006 } 4007 4008 _draining_satb_buffers = false; 4009 4010 assert(has_aborted() || 4011 concurrent() || 4012 satb_mq_set.completed_buffers_num() == 0, "invariant"); 4013 4014 if (G1CollectedHeap::use_parallel_gc_threads()) { 4015 satb_mq_set.set_par_closure(_worker_id, NULL); 4016 } else { 4017 satb_mq_set.set_closure(NULL); 4018 } 4019 4020 // again, this was a potentially expensive operation, decrease the 4021 // limits to get the regular clock call early 4022 decrease_limits(); 4023 } 4024 4025 void CMTask::print_stats() { 4026 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 4027 _worker_id, _calls); 4028 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 4029 _elapsed_time_ms, _termination_time_ms); 4030 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4031 _step_times_ms.num(), _step_times_ms.avg(), 4032 _step_times_ms.sd()); 4033 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4034 _step_times_ms.maximum(), _step_times_ms.sum()); 4035 4036 #if _MARKING_STATS_ 4037 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4038 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 4039 _all_clock_intervals_ms.sd()); 4040 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4041 _all_clock_intervals_ms.maximum(), 4042 _all_clock_intervals_ms.sum()); 4043 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 4044 _clock_due_to_scanning, _clock_due_to_marking); 4045 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 4046 _objs_scanned, _objs_found_on_bitmap); 4047 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 4048 _local_pushes, _local_pops, _local_max_size); 4049 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 4050 _global_pushes, _global_pops, _global_max_size); 4051 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 4052 _global_transfers_to,_global_transfers_from); 4053 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 4054 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 4055 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 4056 _steal_attempts, _steals); 4057 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 4058 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 4059 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 4060 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 4061 _aborted_timed_out, _aborted_satb, _aborted_termination); 4062 #endif // _MARKING_STATS_ 4063 } 4064 4065 /***************************************************************************** 4066 4067 The do_marking_step(time_target_ms, ...) method is the building 4068 block of the parallel marking framework. It can be called in parallel 4069 with other invocations of do_marking_step() on different tasks 4070 (but only one per task, obviously) and concurrently with the 4071 mutator threads, or during remark, hence it eliminates the need 4072 for two versions of the code. When called during remark, it will 4073 pick up from where the task left off during the concurrent marking 4074 phase. Interestingly, tasks are also claimable during evacuation 4075 pauses too, since do_marking_step() ensures that it aborts before 4076 it needs to yield. 4077 4078 The data structures that it uses to do marking work are the 4079 following: 4080 4081 (1) Marking Bitmap. If there are gray objects that appear only 4082 on the bitmap (this happens either when dealing with an overflow 4083 or when the initial marking phase has simply marked the roots 4084 and didn't push them on the stack), then tasks claim heap 4085 regions whose bitmap they then scan to find gray objects. A 4086 global finger indicates where the end of the last claimed region 4087 is. A local finger indicates how far into the region a task has 4088 scanned. The two fingers are used to determine how to gray an 4089 object (i.e. whether simply marking it is OK, as it will be 4090 visited by a task in the future, or whether it needs to be also 4091 pushed on a stack). 4092 4093 (2) Local Queue. The local queue of the task which is accessed 4094 reasonably efficiently by the task. Other tasks can steal from 4095 it when they run out of work. Throughout the marking phase, a 4096 task attempts to keep its local queue short but not totally 4097 empty, so that entries are available for stealing by other 4098 tasks. Only when there is no more work, a task will totally 4099 drain its local queue. 4100 4101 (3) Global Mark Stack. This handles local queue overflow. During 4102 marking only sets of entries are moved between it and the local 4103 queues, as access to it requires a mutex and more fine-grain 4104 interaction with it which might cause contention. If it 4105 overflows, then the marking phase should restart and iterate 4106 over the bitmap to identify gray objects. Throughout the marking 4107 phase, tasks attempt to keep the global mark stack at a small 4108 length but not totally empty, so that entries are available for 4109 popping by other tasks. Only when there is no more work, tasks 4110 will totally drain the global mark stack. 4111 4112 (4) SATB Buffer Queue. This is where completed SATB buffers are 4113 made available. Buffers are regularly removed from this queue 4114 and scanned for roots, so that the queue doesn't get too 4115 long. During remark, all completed buffers are processed, as 4116 well as the filled in parts of any uncompleted buffers. 4117 4118 The do_marking_step() method tries to abort when the time target 4119 has been reached. There are a few other cases when the 4120 do_marking_step() method also aborts: 4121 4122 (1) When the marking phase has been aborted (after a Full GC). 4123 4124 (2) When a global overflow (on the global stack) has been 4125 triggered. Before the task aborts, it will actually sync up with 4126 the other tasks to ensure that all the marking data structures 4127 (local queues, stacks, fingers etc.) are re-initialized so that 4128 when do_marking_step() completes, the marking phase can 4129 immediately restart. 4130 4131 (3) When enough completed SATB buffers are available. The 4132 do_marking_step() method only tries to drain SATB buffers right 4133 at the beginning. So, if enough buffers are available, the 4134 marking step aborts and the SATB buffers are processed at 4135 the beginning of the next invocation. 4136 4137 (4) To yield. when we have to yield then we abort and yield 4138 right at the end of do_marking_step(). This saves us from a lot 4139 of hassle as, by yielding we might allow a Full GC. If this 4140 happens then objects will be compacted underneath our feet, the 4141 heap might shrink, etc. We save checking for this by just 4142 aborting and doing the yield right at the end. 4143 4144 From the above it follows that the do_marking_step() method should 4145 be called in a loop (or, otherwise, regularly) until it completes. 4146 4147 If a marking step completes without its has_aborted() flag being 4148 true, it means it has completed the current marking phase (and 4149 also all other marking tasks have done so and have all synced up). 4150 4151 A method called regular_clock_call() is invoked "regularly" (in 4152 sub ms intervals) throughout marking. It is this clock method that 4153 checks all the abort conditions which were mentioned above and 4154 decides when the task should abort. A work-based scheme is used to 4155 trigger this clock method: when the number of object words the 4156 marking phase has scanned or the number of references the marking 4157 phase has visited reach a given limit. Additional invocations to 4158 the method clock have been planted in a few other strategic places 4159 too. The initial reason for the clock method was to avoid calling 4160 vtime too regularly, as it is quite expensive. So, once it was in 4161 place, it was natural to piggy-back all the other conditions on it 4162 too and not constantly check them throughout the code. 4163 4164 If do_termination is true then do_marking_step will enter its 4165 termination protocol. 4166 4167 The value of is_serial must be true when do_marking_step is being 4168 called serially (i.e. by the VMThread) and do_marking_step should 4169 skip any synchronization in the termination and overflow code. 4170 Examples include the serial remark code and the serial reference 4171 processing closures. 4172 4173 The value of is_serial must be false when do_marking_step is 4174 being called by any of the worker threads in a work gang. 4175 Examples include the concurrent marking code (CMMarkingTask), 4176 the MT remark code, and the MT reference processing closures. 4177 4178 *****************************************************************************/ 4179 4180 void CMTask::do_marking_step(double time_target_ms, 4181 bool do_termination, 4182 bool is_serial) { 4183 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 4184 assert(concurrent() == _cm->concurrent(), "they should be the same"); 4185 4186 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4187 assert(_task_queues != NULL, "invariant"); 4188 assert(_task_queue != NULL, "invariant"); 4189 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 4190 4191 assert(!_claimed, 4192 "only one thread should claim this task at any one time"); 4193 4194 // OK, this doesn't safeguard again all possible scenarios, as it is 4195 // possible for two threads to set the _claimed flag at the same 4196 // time. But it is only for debugging purposes anyway and it will 4197 // catch most problems. 4198 _claimed = true; 4199 4200 _start_time_ms = os::elapsedVTime() * 1000.0; 4201 statsOnly( _interval_start_time_ms = _start_time_ms ); 4202 4203 // If do_stealing is true then do_marking_step will attempt to 4204 // steal work from the other CMTasks. It only makes sense to 4205 // enable stealing when the termination protocol is enabled 4206 // and do_marking_step() is not being called serially. 4207 bool do_stealing = do_termination && !is_serial; 4208 4209 double diff_prediction_ms = 4210 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4211 _time_target_ms = time_target_ms - diff_prediction_ms; 4212 4213 // set up the variables that are used in the work-based scheme to 4214 // call the regular clock method 4215 _words_scanned = 0; 4216 _refs_reached = 0; 4217 recalculate_limits(); 4218 4219 // clear all flags 4220 clear_has_aborted(); 4221 _has_timed_out = false; 4222 _draining_satb_buffers = false; 4223 4224 ++_calls; 4225 4226 if (_cm->verbose_low()) { 4227 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, " 4228 "target = %1.2lfms >>>>>>>>>>", 4229 _worker_id, _calls, _time_target_ms); 4230 } 4231 4232 // Set up the bitmap and oop closures. Anything that uses them is 4233 // eventually called from this method, so it is OK to allocate these 4234 // statically. 4235 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4236 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 4237 set_cm_oop_closure(&cm_oop_closure); 4238 4239 if (_cm->has_overflown()) { 4240 // This can happen if the mark stack overflows during a GC pause 4241 // and this task, after a yield point, restarts. We have to abort 4242 // as we need to get into the overflow protocol which happens 4243 // right at the end of this task. 4244 set_has_aborted(); 4245 } 4246 4247 // First drain any available SATB buffers. After this, we will not 4248 // look at SATB buffers before the next invocation of this method. 4249 // If enough completed SATB buffers are queued up, the regular clock 4250 // will abort this task so that it restarts. 4251 drain_satb_buffers(); 4252 // ...then partially drain the local queue and the global stack 4253 drain_local_queue(true); 4254 drain_global_stack(true); 4255 4256 do { 4257 if (!has_aborted() && _curr_region != NULL) { 4258 // This means that we're already holding on to a region. 4259 assert(_finger != NULL, "if region is not NULL, then the finger " 4260 "should not be NULL either"); 4261 4262 // We might have restarted this task after an evacuation pause 4263 // which might have evacuated the region we're holding on to 4264 // underneath our feet. Let's read its limit again to make sure 4265 // that we do not iterate over a region of the heap that 4266 // contains garbage (update_region_limit() will also move 4267 // _finger to the start of the region if it is found empty). 4268 update_region_limit(); 4269 // We will start from _finger not from the start of the region, 4270 // as we might be restarting this task after aborting half-way 4271 // through scanning this region. In this case, _finger points to 4272 // the address where we last found a marked object. If this is a 4273 // fresh region, _finger points to start(). 4274 MemRegion mr = MemRegion(_finger, _region_limit); 4275 4276 if (_cm->verbose_low()) { 4277 gclog_or_tty->print_cr("[%u] we're scanning part " 4278 "["PTR_FORMAT", "PTR_FORMAT") " 4279 "of region "HR_FORMAT, 4280 _worker_id, p2i(_finger), p2i(_region_limit), 4281 HR_FORMAT_PARAMS(_curr_region)); 4282 } 4283 4284 assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(), 4285 "humongous regions should go around loop once only"); 4286 4287 // Some special cases: 4288 // If the memory region is empty, we can just give up the region. 4289 // If the current region is humongous then we only need to check 4290 // the bitmap for the bit associated with the start of the object, 4291 // scan the object if it's live, and give up the region. 4292 // Otherwise, let's iterate over the bitmap of the part of the region 4293 // that is left. 4294 // If the iteration is successful, give up the region. 4295 if (mr.is_empty()) { 4296 giveup_current_region(); 4297 regular_clock_call(); 4298 } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) { 4299 if (_nextMarkBitMap->isMarked(mr.start())) { 4300 // The object is marked - apply the closure 4301 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 4302 bitmap_closure.do_bit(offset); 4303 } 4304 // Even if this task aborted while scanning the humongous object 4305 // we can (and should) give up the current region. 4306 giveup_current_region(); 4307 regular_clock_call(); 4308 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4309 giveup_current_region(); 4310 regular_clock_call(); 4311 } else { 4312 assert(has_aborted(), "currently the only way to do so"); 4313 // The only way to abort the bitmap iteration is to return 4314 // false from the do_bit() method. However, inside the 4315 // do_bit() method we move the _finger to point to the 4316 // object currently being looked at. So, if we bail out, we 4317 // have definitely set _finger to something non-null. 4318 assert(_finger != NULL, "invariant"); 4319 4320 // Region iteration was actually aborted. So now _finger 4321 // points to the address of the object we last scanned. If we 4322 // leave it there, when we restart this task, we will rescan 4323 // the object. It is easy to avoid this. We move the finger by 4324 // enough to point to the next possible object header (the 4325 // bitmap knows by how much we need to move it as it knows its 4326 // granularity). 4327 assert(_finger < _region_limit, "invariant"); 4328 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 4329 // Check if bitmap iteration was aborted while scanning the last object 4330 if (new_finger >= _region_limit) { 4331 giveup_current_region(); 4332 } else { 4333 move_finger_to(new_finger); 4334 } 4335 } 4336 } 4337 // At this point we have either completed iterating over the 4338 // region we were holding on to, or we have aborted. 4339 4340 // We then partially drain the local queue and the global stack. 4341 // (Do we really need this?) 4342 drain_local_queue(true); 4343 drain_global_stack(true); 4344 4345 // Read the note on the claim_region() method on why it might 4346 // return NULL with potentially more regions available for 4347 // claiming and why we have to check out_of_regions() to determine 4348 // whether we're done or not. 4349 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4350 // We are going to try to claim a new region. We should have 4351 // given up on the previous one. 4352 // Separated the asserts so that we know which one fires. 4353 assert(_curr_region == NULL, "invariant"); 4354 assert(_finger == NULL, "invariant"); 4355 assert(_region_limit == NULL, "invariant"); 4356 if (_cm->verbose_low()) { 4357 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id); 4358 } 4359 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 4360 if (claimed_region != NULL) { 4361 // Yes, we managed to claim one 4362 statsOnly( ++_regions_claimed ); 4363 4364 if (_cm->verbose_low()) { 4365 gclog_or_tty->print_cr("[%u] we successfully claimed " 4366 "region "PTR_FORMAT, 4367 _worker_id, p2i(claimed_region)); 4368 } 4369 4370 setup_for_region(claimed_region); 4371 assert(_curr_region == claimed_region, "invariant"); 4372 } 4373 // It is important to call the regular clock here. It might take 4374 // a while to claim a region if, for example, we hit a large 4375 // block of empty regions. So we need to call the regular clock 4376 // method once round the loop to make sure it's called 4377 // frequently enough. 4378 regular_clock_call(); 4379 } 4380 4381 if (!has_aborted() && _curr_region == NULL) { 4382 assert(_cm->out_of_regions(), 4383 "at this point we should be out of regions"); 4384 } 4385 } while ( _curr_region != NULL && !has_aborted()); 4386 4387 if (!has_aborted()) { 4388 // We cannot check whether the global stack is empty, since other 4389 // tasks might be pushing objects to it concurrently. 4390 assert(_cm->out_of_regions(), 4391 "at this point we should be out of regions"); 4392 4393 if (_cm->verbose_low()) { 4394 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id); 4395 } 4396 4397 // Try to reduce the number of available SATB buffers so that 4398 // remark has less work to do. 4399 drain_satb_buffers(); 4400 } 4401 4402 // Since we've done everything else, we can now totally drain the 4403 // local queue and global stack. 4404 drain_local_queue(false); 4405 drain_global_stack(false); 4406 4407 // Attempt at work stealing from other task's queues. 4408 if (do_stealing && !has_aborted()) { 4409 // We have not aborted. This means that we have finished all that 4410 // we could. Let's try to do some stealing... 4411 4412 // We cannot check whether the global stack is empty, since other 4413 // tasks might be pushing objects to it concurrently. 4414 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4415 "only way to reach here"); 4416 4417 if (_cm->verbose_low()) { 4418 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id); 4419 } 4420 4421 while (!has_aborted()) { 4422 oop obj; 4423 statsOnly( ++_steal_attempts ); 4424 4425 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 4426 if (_cm->verbose_medium()) { 4427 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully", 4428 _worker_id, p2i((void*) obj)); 4429 } 4430 4431 statsOnly( ++_steals ); 4432 4433 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4434 "any stolen object should be marked"); 4435 scan_object(obj); 4436 4437 // And since we're towards the end, let's totally drain the 4438 // local queue and global stack. 4439 drain_local_queue(false); 4440 drain_global_stack(false); 4441 } else { 4442 break; 4443 } 4444 } 4445 } 4446 4447 // If we are about to wrap up and go into termination, check if we 4448 // should raise the overflow flag. 4449 if (do_termination && !has_aborted()) { 4450 if (_cm->force_overflow()->should_force()) { 4451 _cm->set_has_overflown(); 4452 regular_clock_call(); 4453 } 4454 } 4455 4456 // We still haven't aborted. Now, let's try to get into the 4457 // termination protocol. 4458 if (do_termination && !has_aborted()) { 4459 // We cannot check whether the global stack is empty, since other 4460 // tasks might be concurrently pushing objects on it. 4461 // Separated the asserts so that we know which one fires. 4462 assert(_cm->out_of_regions(), "only way to reach here"); 4463 assert(_task_queue->size() == 0, "only way to reach here"); 4464 4465 if (_cm->verbose_low()) { 4466 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id); 4467 } 4468 4469 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4470 4471 // The CMTask class also extends the TerminatorTerminator class, 4472 // hence its should_exit_termination() method will also decide 4473 // whether to exit the termination protocol or not. 4474 bool finished = (is_serial || 4475 _cm->terminator()->offer_termination(this)); 4476 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4477 _termination_time_ms += 4478 termination_end_time_ms - _termination_start_time_ms; 4479 4480 if (finished) { 4481 // We're all done. 4482 4483 if (_worker_id == 0) { 4484 // let's allow task 0 to do this 4485 if (concurrent()) { 4486 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4487 // we need to set this to false before the next 4488 // safepoint. This way we ensure that the marking phase 4489 // doesn't observe any more heap expansions. 4490 _cm->clear_concurrent_marking_in_progress(); 4491 } 4492 } 4493 4494 // We can now guarantee that the global stack is empty, since 4495 // all other tasks have finished. We separated the guarantees so 4496 // that, if a condition is false, we can immediately find out 4497 // which one. 4498 guarantee(_cm->out_of_regions(), "only way to reach here"); 4499 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4500 guarantee(_task_queue->size() == 0, "only way to reach here"); 4501 guarantee(!_cm->has_overflown(), "only way to reach here"); 4502 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4503 4504 if (_cm->verbose_low()) { 4505 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id); 4506 } 4507 } else { 4508 // Apparently there's more work to do. Let's abort this task. It 4509 // will restart it and we can hopefully find more things to do. 4510 4511 if (_cm->verbose_low()) { 4512 gclog_or_tty->print_cr("[%u] apparently there is more work to do", 4513 _worker_id); 4514 } 4515 4516 set_has_aborted(); 4517 statsOnly( ++_aborted_termination ); 4518 } 4519 } 4520 4521 // Mainly for debugging purposes to make sure that a pointer to the 4522 // closure which was statically allocated in this frame doesn't 4523 // escape it by accident. 4524 set_cm_oop_closure(NULL); 4525 double end_time_ms = os::elapsedVTime() * 1000.0; 4526 double elapsed_time_ms = end_time_ms - _start_time_ms; 4527 // Update the step history. 4528 _step_times_ms.add(elapsed_time_ms); 4529 4530 if (has_aborted()) { 4531 // The task was aborted for some reason. 4532 4533 statsOnly( ++_aborted ); 4534 4535 if (_has_timed_out) { 4536 double diff_ms = elapsed_time_ms - _time_target_ms; 4537 // Keep statistics of how well we did with respect to hitting 4538 // our target only if we actually timed out (if we aborted for 4539 // other reasons, then the results might get skewed). 4540 _marking_step_diffs_ms.add(diff_ms); 4541 } 4542 4543 if (_cm->has_overflown()) { 4544 // This is the interesting one. We aborted because a global 4545 // overflow was raised. This means we have to restart the 4546 // marking phase and start iterating over regions. However, in 4547 // order to do this we have to make sure that all tasks stop 4548 // what they are doing and re-initialize in a safe manner. We 4549 // will achieve this with the use of two barrier sync points. 4550 4551 if (_cm->verbose_low()) { 4552 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id); 4553 } 4554 4555 if (!is_serial) { 4556 // We only need to enter the sync barrier if being called 4557 // from a parallel context 4558 _cm->enter_first_sync_barrier(_worker_id); 4559 4560 // When we exit this sync barrier we know that all tasks have 4561 // stopped doing marking work. So, it's now safe to 4562 // re-initialize our data structures. At the end of this method, 4563 // task 0 will clear the global data structures. 4564 } 4565 4566 statsOnly( ++_aborted_overflow ); 4567 4568 // We clear the local state of this task... 4569 clear_region_fields(); 4570 4571 if (!is_serial) { 4572 // ...and enter the second barrier. 4573 _cm->enter_second_sync_barrier(_worker_id); 4574 } 4575 // At this point, if we're during the concurrent phase of 4576 // marking, everything has been re-initialized and we're 4577 // ready to restart. 4578 } 4579 4580 if (_cm->verbose_low()) { 4581 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4582 "elapsed = %1.2lfms <<<<<<<<<<", 4583 _worker_id, _time_target_ms, elapsed_time_ms); 4584 if (_cm->has_aborted()) { 4585 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========", 4586 _worker_id); 4587 } 4588 } 4589 } else { 4590 if (_cm->verbose_low()) { 4591 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4592 "elapsed = %1.2lfms <<<<<<<<<<", 4593 _worker_id, _time_target_ms, elapsed_time_ms); 4594 } 4595 } 4596 4597 _claimed = false; 4598 } 4599 4600 CMTask::CMTask(uint worker_id, 4601 ConcurrentMark* cm, 4602 size_t* marked_bytes, 4603 BitMap* card_bm, 4604 CMTaskQueue* task_queue, 4605 CMTaskQueueSet* task_queues) 4606 : _g1h(G1CollectedHeap::heap()), 4607 _worker_id(worker_id), _cm(cm), 4608 _claimed(false), 4609 _nextMarkBitMap(NULL), _hash_seed(17), 4610 _task_queue(task_queue), 4611 _task_queues(task_queues), 4612 _cm_oop_closure(NULL), 4613 _marked_bytes_array(marked_bytes), 4614 _card_bm(card_bm) { 4615 guarantee(task_queue != NULL, "invariant"); 4616 guarantee(task_queues != NULL, "invariant"); 4617 4618 statsOnly( _clock_due_to_scanning = 0; 4619 _clock_due_to_marking = 0 ); 4620 4621 _marking_step_diffs_ms.add(0.5); 4622 } 4623 4624 // These are formatting macros that are used below to ensure 4625 // consistent formatting. The *_H_* versions are used to format the 4626 // header for a particular value and they should be kept consistent 4627 // with the corresponding macro. Also note that most of the macros add 4628 // the necessary white space (as a prefix) which makes them a bit 4629 // easier to compose. 4630 4631 // All the output lines are prefixed with this string to be able to 4632 // identify them easily in a large log file. 4633 #define G1PPRL_LINE_PREFIX "###" 4634 4635 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4636 #ifdef _LP64 4637 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4638 #else // _LP64 4639 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4640 #endif // _LP64 4641 4642 // For per-region info 4643 #define G1PPRL_TYPE_FORMAT " %-4s" 4644 #define G1PPRL_TYPE_H_FORMAT " %4s" 4645 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4646 #define G1PPRL_BYTE_H_FORMAT " %9s" 4647 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4648 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4649 4650 // For summary info 4651 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4652 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4653 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4654 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4655 4656 G1PrintRegionLivenessInfoClosure:: 4657 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4658 : _out(out), 4659 _total_used_bytes(0), _total_capacity_bytes(0), 4660 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4661 _hum_used_bytes(0), _hum_capacity_bytes(0), 4662 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 4663 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 4664 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4665 MemRegion g1_committed = g1h->g1_committed(); 4666 MemRegion g1_reserved = g1h->g1_reserved(); 4667 double now = os::elapsedTime(); 4668 4669 // Print the header of the output. 4670 _out->cr(); 4671 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4672 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4673 G1PPRL_SUM_ADDR_FORMAT("committed") 4674 G1PPRL_SUM_ADDR_FORMAT("reserved") 4675 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4676 p2i(g1_committed.start()), p2i(g1_committed.end()), 4677 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 4678 HeapRegion::GrainBytes); 4679 _out->print_cr(G1PPRL_LINE_PREFIX); 4680 _out->print_cr(G1PPRL_LINE_PREFIX 4681 G1PPRL_TYPE_H_FORMAT 4682 G1PPRL_ADDR_BASE_H_FORMAT 4683 G1PPRL_BYTE_H_FORMAT 4684 G1PPRL_BYTE_H_FORMAT 4685 G1PPRL_BYTE_H_FORMAT 4686 G1PPRL_DOUBLE_H_FORMAT 4687 G1PPRL_BYTE_H_FORMAT 4688 G1PPRL_BYTE_H_FORMAT, 4689 "type", "address-range", 4690 "used", "prev-live", "next-live", "gc-eff", 4691 "remset", "code-roots"); 4692 _out->print_cr(G1PPRL_LINE_PREFIX 4693 G1PPRL_TYPE_H_FORMAT 4694 G1PPRL_ADDR_BASE_H_FORMAT 4695 G1PPRL_BYTE_H_FORMAT 4696 G1PPRL_BYTE_H_FORMAT 4697 G1PPRL_BYTE_H_FORMAT 4698 G1PPRL_DOUBLE_H_FORMAT 4699 G1PPRL_BYTE_H_FORMAT 4700 G1PPRL_BYTE_H_FORMAT, 4701 "", "", 4702 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 4703 "(bytes)", "(bytes)"); 4704 } 4705 4706 // It takes as a parameter a reference to one of the _hum_* fields, it 4707 // deduces the corresponding value for a region in a humongous region 4708 // series (either the region size, or what's left if the _hum_* field 4709 // is < the region size), and updates the _hum_* field accordingly. 4710 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4711 size_t bytes = 0; 4712 // The > 0 check is to deal with the prev and next live bytes which 4713 // could be 0. 4714 if (*hum_bytes > 0) { 4715 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4716 *hum_bytes -= bytes; 4717 } 4718 return bytes; 4719 } 4720 4721 // It deduces the values for a region in a humongous region series 4722 // from the _hum_* fields and updates those accordingly. It assumes 4723 // that that _hum_* fields have already been set up from the "starts 4724 // humongous" region and we visit the regions in address order. 4725 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4726 size_t* capacity_bytes, 4727 size_t* prev_live_bytes, 4728 size_t* next_live_bytes) { 4729 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4730 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4731 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4732 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4733 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4734 } 4735 4736 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4737 const char* type = ""; 4738 HeapWord* bottom = r->bottom(); 4739 HeapWord* end = r->end(); 4740 size_t capacity_bytes = r->capacity(); 4741 size_t used_bytes = r->used(); 4742 size_t prev_live_bytes = r->live_bytes(); 4743 size_t next_live_bytes = r->next_live_bytes(); 4744 double gc_eff = r->gc_efficiency(); 4745 size_t remset_bytes = r->rem_set()->mem_size(); 4746 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 4747 4748 if (r->used() == 0) { 4749 type = "FREE"; 4750 } else if (r->is_survivor()) { 4751 type = "SURV"; 4752 } else if (r->is_young()) { 4753 type = "EDEN"; 4754 } else if (r->startsHumongous()) { 4755 type = "HUMS"; 4756 4757 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4758 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4759 "they should have been zeroed after the last time we used them"); 4760 // Set up the _hum_* fields. 4761 _hum_capacity_bytes = capacity_bytes; 4762 _hum_used_bytes = used_bytes; 4763 _hum_prev_live_bytes = prev_live_bytes; 4764 _hum_next_live_bytes = next_live_bytes; 4765 get_hum_bytes(&used_bytes, &capacity_bytes, 4766 &prev_live_bytes, &next_live_bytes); 4767 end = bottom + HeapRegion::GrainWords; 4768 } else if (r->continuesHumongous()) { 4769 type = "HUMC"; 4770 get_hum_bytes(&used_bytes, &capacity_bytes, 4771 &prev_live_bytes, &next_live_bytes); 4772 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4773 } else { 4774 type = "OLD"; 4775 } 4776 4777 _total_used_bytes += used_bytes; 4778 _total_capacity_bytes += capacity_bytes; 4779 _total_prev_live_bytes += prev_live_bytes; 4780 _total_next_live_bytes += next_live_bytes; 4781 _total_remset_bytes += remset_bytes; 4782 _total_strong_code_roots_bytes += strong_code_roots_bytes; 4783 4784 // Print a line for this particular region. 4785 _out->print_cr(G1PPRL_LINE_PREFIX 4786 G1PPRL_TYPE_FORMAT 4787 G1PPRL_ADDR_BASE_FORMAT 4788 G1PPRL_BYTE_FORMAT 4789 G1PPRL_BYTE_FORMAT 4790 G1PPRL_BYTE_FORMAT 4791 G1PPRL_DOUBLE_FORMAT 4792 G1PPRL_BYTE_FORMAT 4793 G1PPRL_BYTE_FORMAT, 4794 type, p2i(bottom), p2i(end), 4795 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 4796 remset_bytes, strong_code_roots_bytes); 4797 4798 return false; 4799 } 4800 4801 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4802 // add static memory usages to remembered set sizes 4803 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 4804 // Print the footer of the output. 4805 _out->print_cr(G1PPRL_LINE_PREFIX); 4806 _out->print_cr(G1PPRL_LINE_PREFIX 4807 " SUMMARY" 4808 G1PPRL_SUM_MB_FORMAT("capacity") 4809 G1PPRL_SUM_MB_PERC_FORMAT("used") 4810 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4811 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 4812 G1PPRL_SUM_MB_FORMAT("remset") 4813 G1PPRL_SUM_MB_FORMAT("code-roots"), 4814 bytes_to_mb(_total_capacity_bytes), 4815 bytes_to_mb(_total_used_bytes), 4816 perc(_total_used_bytes, _total_capacity_bytes), 4817 bytes_to_mb(_total_prev_live_bytes), 4818 perc(_total_prev_live_bytes, _total_capacity_bytes), 4819 bytes_to_mb(_total_next_live_bytes), 4820 perc(_total_next_live_bytes, _total_capacity_bytes), 4821 bytes_to_mb(_total_remset_bytes), 4822 bytes_to_mb(_total_strong_code_roots_bytes)); 4823 _out->cr(); 4824 }