1 /* 2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 32 #include "gc_implementation/g1/g1Log.hpp" 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 34 #include "gc_implementation/g1/g1RemSet.hpp" 35 #include "gc_implementation/g1/heapRegion.inline.hpp" 36 #include "gc_implementation/g1/heapRegionRemSet.hpp" 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 38 #include "gc_implementation/shared/vmGCOperations.hpp" 39 #include "gc_implementation/shared/gcTimer.hpp" 40 #include "gc_implementation/shared/gcTrace.hpp" 41 #include "gc_implementation/shared/gcTraceTime.hpp" 42 #include "memory/genOopClosures.inline.hpp" 43 #include "memory/referencePolicy.hpp" 44 #include "memory/resourceArea.hpp" 45 #include "oops/oop.inline.hpp" 46 #include "runtime/handles.inline.hpp" 47 #include "runtime/java.hpp" 48 #include "services/memTracker.hpp" 49 50 // Concurrent marking bit map wrapper 51 52 CMBitMapRO::CMBitMapRO(int shifter) : 53 _bm(), 54 _shifter(shifter) { 55 _bmStartWord = 0; 56 _bmWordSize = 0; 57 } 58 59 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 60 HeapWord* limit) const { 61 // First we must round addr *up* to a possible object boundary. 62 addr = (HeapWord*)align_size_up((intptr_t)addr, 63 HeapWordSize << _shifter); 64 size_t addrOffset = heapWordToOffset(addr); 65 if (limit == NULL) { 66 limit = _bmStartWord + _bmWordSize; 67 } 68 size_t limitOffset = heapWordToOffset(limit); 69 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 70 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 71 assert(nextAddr >= addr, "get_next_one postcondition"); 72 assert(nextAddr == limit || isMarked(nextAddr), 73 "get_next_one postcondition"); 74 return nextAddr; 75 } 76 77 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 78 HeapWord* limit) const { 79 size_t addrOffset = heapWordToOffset(addr); 80 if (limit == NULL) { 81 limit = _bmStartWord + _bmWordSize; 82 } 83 size_t limitOffset = heapWordToOffset(limit); 84 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 85 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 86 assert(nextAddr >= addr, "get_next_one postcondition"); 87 assert(nextAddr == limit || !isMarked(nextAddr), 88 "get_next_one postcondition"); 89 return nextAddr; 90 } 91 92 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 93 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 94 return (int) (diff >> _shifter); 95 } 96 97 #ifndef PRODUCT 98 bool CMBitMapRO::covers(ReservedSpace heap_rs) const { 99 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 100 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 101 "size inconsistency"); 102 return _bmStartWord == (HeapWord*)(heap_rs.base()) && 103 _bmWordSize == heap_rs.size()>>LogHeapWordSize; 104 } 105 #endif 106 107 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 108 _bm.print_on_error(st, prefix); 109 } 110 111 bool CMBitMap::allocate(ReservedSpace heap_rs) { 112 _bmStartWord = (HeapWord*)(heap_rs.base()); 113 _bmWordSize = heap_rs.size()/HeapWordSize; // heap_rs.size() is in bytes 114 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 115 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 116 if (!brs.is_reserved()) { 117 warning("ConcurrentMark marking bit map allocation failure"); 118 return false; 119 } 120 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC); 121 // For now we'll just commit all of the bit map up front. 122 // Later on we'll try to be more parsimonious with swap. 123 if (!_virtual_space.initialize(brs, brs.size())) { 124 warning("ConcurrentMark marking bit map backing store failure"); 125 return false; 126 } 127 assert(_virtual_space.committed_size() == brs.size(), 128 "didn't reserve backing store for all of concurrent marking bit map?"); 129 _bm.set_map((uintptr_t*)_virtual_space.low()); 130 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 131 _bmWordSize, "inconsistency in bit map sizing"); 132 _bm.set_size(_bmWordSize >> _shifter); 133 return true; 134 } 135 136 void CMBitMap::clearAll() { 137 _bm.clear(); 138 return; 139 } 140 141 void CMBitMap::markRange(MemRegion mr) { 142 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 143 assert(!mr.is_empty(), "unexpected empty region"); 144 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 145 ((HeapWord *) mr.end())), 146 "markRange memory region end is not card aligned"); 147 // convert address range into offset range 148 _bm.at_put_range(heapWordToOffset(mr.start()), 149 heapWordToOffset(mr.end()), true); 150 } 151 152 void CMBitMap::clearRange(MemRegion mr) { 153 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 154 assert(!mr.is_empty(), "unexpected empty region"); 155 // convert address range into offset range 156 _bm.at_put_range(heapWordToOffset(mr.start()), 157 heapWordToOffset(mr.end()), false); 158 } 159 160 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 161 HeapWord* end_addr) { 162 HeapWord* start = getNextMarkedWordAddress(addr); 163 start = MIN2(start, end_addr); 164 HeapWord* end = getNextUnmarkedWordAddress(start); 165 end = MIN2(end, end_addr); 166 assert(start <= end, "Consistency check"); 167 MemRegion mr(start, end); 168 if (!mr.is_empty()) { 169 clearRange(mr); 170 } 171 return mr; 172 } 173 174 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 175 _base(NULL), _cm(cm) 176 #ifdef ASSERT 177 , _drain_in_progress(false) 178 , _drain_in_progress_yields(false) 179 #endif 180 {} 181 182 bool CMMarkStack::allocate(size_t capacity) { 183 // allocate a stack of the requisite depth 184 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 185 if (!rs.is_reserved()) { 186 warning("ConcurrentMark MarkStack allocation failure"); 187 return false; 188 } 189 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 190 if (!_virtual_space.initialize(rs, rs.size())) { 191 warning("ConcurrentMark MarkStack backing store failure"); 192 // Release the virtual memory reserved for the marking stack 193 rs.release(); 194 return false; 195 } 196 assert(_virtual_space.committed_size() == rs.size(), 197 "Didn't reserve backing store for all of ConcurrentMark stack?"); 198 _base = (oop*) _virtual_space.low(); 199 setEmpty(); 200 _capacity = (jint) capacity; 201 _saved_index = -1; 202 _should_expand = false; 203 NOT_PRODUCT(_max_depth = 0); 204 return true; 205 } 206 207 void CMMarkStack::expand() { 208 // Called, during remark, if we've overflown the marking stack during marking. 209 assert(isEmpty(), "stack should been emptied while handling overflow"); 210 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 211 // Clear expansion flag 212 _should_expand = false; 213 if (_capacity == (jint) MarkStackSizeMax) { 214 if (PrintGCDetails && Verbose) { 215 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 216 } 217 return; 218 } 219 // Double capacity if possible 220 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 221 // Do not give up existing stack until we have managed to 222 // get the double capacity that we desired. 223 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 224 sizeof(oop))); 225 if (rs.is_reserved()) { 226 // Release the backing store associated with old stack 227 _virtual_space.release(); 228 // Reinitialize virtual space for new stack 229 if (!_virtual_space.initialize(rs, rs.size())) { 230 fatal("Not enough swap for expanded marking stack capacity"); 231 } 232 _base = (oop*)(_virtual_space.low()); 233 _index = 0; 234 _capacity = new_capacity; 235 } else { 236 if (PrintGCDetails && Verbose) { 237 // Failed to double capacity, continue; 238 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 239 SIZE_FORMAT"K to " SIZE_FORMAT"K", 240 _capacity / K, new_capacity / K); 241 } 242 } 243 } 244 245 void CMMarkStack::set_should_expand() { 246 // If we're resetting the marking state because of an 247 // marking stack overflow, record that we should, if 248 // possible, expand the stack. 249 _should_expand = _cm->has_overflown(); 250 } 251 252 CMMarkStack::~CMMarkStack() { 253 if (_base != NULL) { 254 _base = NULL; 255 _virtual_space.release(); 256 } 257 } 258 259 void CMMarkStack::par_push(oop ptr) { 260 while (true) { 261 if (isFull()) { 262 _overflow = true; 263 return; 264 } 265 // Otherwise... 266 jint index = _index; 267 jint next_index = index+1; 268 jint res = Atomic::cmpxchg(next_index, &_index, index); 269 if (res == index) { 270 _base[index] = ptr; 271 // Note that we don't maintain this atomically. We could, but it 272 // doesn't seem necessary. 273 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 274 return; 275 } 276 // Otherwise, we need to try again. 277 } 278 } 279 280 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 281 while (true) { 282 if (isFull()) { 283 _overflow = true; 284 return; 285 } 286 // Otherwise... 287 jint index = _index; 288 jint next_index = index + n; 289 if (next_index > _capacity) { 290 _overflow = true; 291 return; 292 } 293 jint res = Atomic::cmpxchg(next_index, &_index, index); 294 if (res == index) { 295 for (int i = 0; i < n; i++) { 296 int ind = index + i; 297 assert(ind < _capacity, "By overflow test above."); 298 _base[ind] = ptr_arr[i]; 299 } 300 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 301 return; 302 } 303 // Otherwise, we need to try again. 304 } 305 } 306 307 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 308 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 309 jint start = _index; 310 jint next_index = start + n; 311 if (next_index > _capacity) { 312 _overflow = true; 313 return; 314 } 315 // Otherwise. 316 _index = next_index; 317 for (int i = 0; i < n; i++) { 318 int ind = start + i; 319 assert(ind < _capacity, "By overflow test above."); 320 _base[ind] = ptr_arr[i]; 321 } 322 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 323 } 324 325 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 326 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 327 jint index = _index; 328 if (index == 0) { 329 *n = 0; 330 return false; 331 } else { 332 int k = MIN2(max, index); 333 jint new_ind = index - k; 334 for (int j = 0; j < k; j++) { 335 ptr_arr[j] = _base[new_ind + j]; 336 } 337 _index = new_ind; 338 *n = k; 339 return true; 340 } 341 } 342 343 template<class OopClosureClass> 344 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 345 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 346 || SafepointSynchronize::is_at_safepoint(), 347 "Drain recursion must be yield-safe."); 348 bool res = true; 349 debug_only(_drain_in_progress = true); 350 debug_only(_drain_in_progress_yields = yield_after); 351 while (!isEmpty()) { 352 oop newOop = pop(); 353 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 354 assert(newOop->is_oop(), "Expected an oop"); 355 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 356 "only grey objects on this stack"); 357 newOop->oop_iterate(cl); 358 if (yield_after && _cm->do_yield_check()) { 359 res = false; 360 break; 361 } 362 } 363 debug_only(_drain_in_progress = false); 364 return res; 365 } 366 367 void CMMarkStack::note_start_of_gc() { 368 assert(_saved_index == -1, 369 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 370 _saved_index = _index; 371 } 372 373 void CMMarkStack::note_end_of_gc() { 374 // This is intentionally a guarantee, instead of an assert. If we 375 // accidentally add something to the mark stack during GC, it 376 // will be a correctness issue so it's better if we crash. we'll 377 // only check this once per GC anyway, so it won't be a performance 378 // issue in any way. 379 guarantee(_saved_index == _index, 380 err_msg("saved index: %d index: %d", _saved_index, _index)); 381 _saved_index = -1; 382 } 383 384 void CMMarkStack::oops_do(OopClosure* f) { 385 assert(_saved_index == _index, 386 err_msg("saved index: %d index: %d", _saved_index, _index)); 387 for (int i = 0; i < _index; i += 1) { 388 f->do_oop(&_base[i]); 389 } 390 } 391 392 bool ConcurrentMark::not_yet_marked(oop obj) const { 393 return _g1h->is_obj_ill(obj); 394 } 395 396 CMRootRegions::CMRootRegions() : 397 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 398 _should_abort(false), _next_survivor(NULL) { } 399 400 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 401 _young_list = g1h->young_list(); 402 _cm = cm; 403 } 404 405 void CMRootRegions::prepare_for_scan() { 406 assert(!scan_in_progress(), "pre-condition"); 407 408 // Currently, only survivors can be root regions. 409 assert(_next_survivor == NULL, "pre-condition"); 410 _next_survivor = _young_list->first_survivor_region(); 411 _scan_in_progress = (_next_survivor != NULL); 412 _should_abort = false; 413 } 414 415 HeapRegion* CMRootRegions::claim_next() { 416 if (_should_abort) { 417 // If someone has set the should_abort flag, we return NULL to 418 // force the caller to bail out of their loop. 419 return NULL; 420 } 421 422 // Currently, only survivors can be root regions. 423 HeapRegion* res = _next_survivor; 424 if (res != NULL) { 425 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 426 // Read it again in case it changed while we were waiting for the lock. 427 res = _next_survivor; 428 if (res != NULL) { 429 if (res == _young_list->last_survivor_region()) { 430 // We just claimed the last survivor so store NULL to indicate 431 // that we're done. 432 _next_survivor = NULL; 433 } else { 434 _next_survivor = res->get_next_young_region(); 435 } 436 } else { 437 // Someone else claimed the last survivor while we were trying 438 // to take the lock so nothing else to do. 439 } 440 } 441 assert(res == NULL || res->is_survivor(), "post-condition"); 442 443 return res; 444 } 445 446 void CMRootRegions::scan_finished() { 447 assert(scan_in_progress(), "pre-condition"); 448 449 // Currently, only survivors can be root regions. 450 if (!_should_abort) { 451 assert(_next_survivor == NULL, "we should have claimed all survivors"); 452 } 453 _next_survivor = NULL; 454 455 { 456 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 457 _scan_in_progress = false; 458 RootRegionScan_lock->notify_all(); 459 } 460 } 461 462 bool CMRootRegions::wait_until_scan_finished() { 463 if (!scan_in_progress()) return false; 464 465 { 466 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 467 while (scan_in_progress()) { 468 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 469 } 470 } 471 return true; 472 } 473 474 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 475 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 476 #endif // _MSC_VER 477 478 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 479 return MAX2((n_par_threads + 2) / 4, 1U); 480 } 481 482 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) : 483 _g1h(g1h), 484 _markBitMap1(log2_intptr(MinObjAlignment)), 485 _markBitMap2(log2_intptr(MinObjAlignment)), 486 _parallel_marking_threads(0), 487 _max_parallel_marking_threads(0), 488 _sleep_factor(0.0), 489 _marking_task_overhead(1.0), 490 _cleanup_sleep_factor(0.0), 491 _cleanup_task_overhead(1.0), 492 _cleanup_list("Cleanup List"), 493 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 494 _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >> 495 CardTableModRefBS::card_shift, 496 false /* in_resource_area*/), 497 498 _prevMarkBitMap(&_markBitMap1), 499 _nextMarkBitMap(&_markBitMap2), 500 501 _markStack(this), 502 // _finger set in set_non_marking_state 503 504 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)), 505 // _active_tasks set in set_non_marking_state 506 // _tasks set inside the constructor 507 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 508 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 509 510 _has_overflown(false), 511 _concurrent(false), 512 _has_aborted(false), 513 _restart_for_overflow(false), 514 _concurrent_marking_in_progress(false), 515 516 // _verbose_level set below 517 518 _init_times(), 519 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 520 _cleanup_times(), 521 _total_counting_time(0.0), 522 _total_rs_scrub_time(0.0), 523 524 _parallel_workers(NULL), 525 526 _count_card_bitmaps(NULL), 527 _count_marked_bytes(NULL), 528 _completed_initialization(false) { 529 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 530 if (verbose_level < no_verbose) { 531 verbose_level = no_verbose; 532 } 533 if (verbose_level > high_verbose) { 534 verbose_level = high_verbose; 535 } 536 _verbose_level = verbose_level; 537 538 if (verbose_low()) { 539 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 540 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 541 } 542 543 if (!_markBitMap1.allocate(heap_rs)) { 544 warning("Failed to allocate first CM bit map"); 545 return; 546 } 547 if (!_markBitMap2.allocate(heap_rs)) { 548 warning("Failed to allocate second CM bit map"); 549 return; 550 } 551 552 // Create & start a ConcurrentMark thread. 553 _cmThread = new ConcurrentMarkThread(this); 554 assert(cmThread() != NULL, "CM Thread should have been created"); 555 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 556 if (_cmThread->osthread() == NULL) { 557 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 558 } 559 560 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 561 assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency"); 562 assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency"); 563 564 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 565 satb_qs.set_buffer_size(G1SATBBufferSize); 566 567 _root_regions.init(_g1h, this); 568 569 if (ConcGCThreads > ParallelGCThreads) { 570 warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") " 571 "than ParallelGCThreads (" UINTX_FORMAT ").", 572 ConcGCThreads, ParallelGCThreads); 573 return; 574 } 575 if (ParallelGCThreads == 0) { 576 // if we are not running with any parallel GC threads we will not 577 // spawn any marking threads either 578 _parallel_marking_threads = 0; 579 _max_parallel_marking_threads = 0; 580 _sleep_factor = 0.0; 581 _marking_task_overhead = 1.0; 582 } else { 583 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 584 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 585 // if both are set 586 _sleep_factor = 0.0; 587 _marking_task_overhead = 1.0; 588 } else if (G1MarkingOverheadPercent > 0) { 589 // We will calculate the number of parallel marking threads based 590 // on a target overhead with respect to the soft real-time goal 591 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 592 double overall_cm_overhead = 593 (double) MaxGCPauseMillis * marking_overhead / 594 (double) GCPauseIntervalMillis; 595 double cpu_ratio = 1.0 / (double) os::processor_count(); 596 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 597 double marking_task_overhead = 598 overall_cm_overhead / marking_thread_num * 599 (double) os::processor_count(); 600 double sleep_factor = 601 (1.0 - marking_task_overhead) / marking_task_overhead; 602 603 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num); 604 _sleep_factor = sleep_factor; 605 _marking_task_overhead = marking_task_overhead; 606 } else { 607 // Calculate the number of parallel marking threads by scaling 608 // the number of parallel GC threads. 609 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads); 610 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num); 611 _sleep_factor = 0.0; 612 _marking_task_overhead = 1.0; 613 } 614 615 assert(ConcGCThreads > 0, "Should have been set"); 616 _parallel_marking_threads = (uint) ConcGCThreads; 617 _max_parallel_marking_threads = _parallel_marking_threads; 618 619 if (parallel_marking_threads() > 1) { 620 _cleanup_task_overhead = 1.0; 621 } else { 622 _cleanup_task_overhead = marking_task_overhead(); 623 } 624 _cleanup_sleep_factor = 625 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 626 627 #if 0 628 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 629 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 630 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 631 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 632 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 633 #endif 634 635 guarantee(parallel_marking_threads() > 0, "peace of mind"); 636 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 637 _max_parallel_marking_threads, false, true); 638 if (_parallel_workers == NULL) { 639 vm_exit_during_initialization("Failed necessary allocation."); 640 } else { 641 _parallel_workers->initialize_workers(); 642 } 643 } 644 645 if (FLAG_IS_DEFAULT(MarkStackSize)) { 646 uintx mark_stack_size = 647 MIN2(MarkStackSizeMax, 648 MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE))); 649 // Verify that the calculated value for MarkStackSize is in range. 650 // It would be nice to use the private utility routine from Arguments. 651 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 652 warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): " 653 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 654 mark_stack_size, 1, MarkStackSizeMax); 655 return; 656 } 657 FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size); 658 } else { 659 // Verify MarkStackSize is in range. 660 if (FLAG_IS_CMDLINE(MarkStackSize)) { 661 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 662 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 663 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): " 664 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 665 MarkStackSize, 1, MarkStackSizeMax); 666 return; 667 } 668 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 669 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 670 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")" 671 " or for MarkStackSizeMax (" UINTX_FORMAT ")", 672 MarkStackSize, MarkStackSizeMax); 673 return; 674 } 675 } 676 } 677 } 678 679 if (!_markStack.allocate(MarkStackSize)) { 680 warning("Failed to allocate CM marking stack"); 681 return; 682 } 683 684 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 685 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 686 687 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 688 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 689 690 BitMap::idx_t card_bm_size = _card_bm.size(); 691 692 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 693 _active_tasks = _max_worker_id; 694 695 size_t max_regions = (size_t) _g1h->max_regions(); 696 for (uint i = 0; i < _max_worker_id; ++i) { 697 CMTaskQueue* task_queue = new CMTaskQueue(); 698 task_queue->initialize(); 699 _task_queues->register_queue(i, task_queue); 700 701 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 702 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 703 704 _tasks[i] = new CMTask(i, this, 705 _count_marked_bytes[i], 706 &_count_card_bitmaps[i], 707 task_queue, _task_queues); 708 709 _accum_task_vtime[i] = 0.0; 710 } 711 712 // Calculate the card number for the bottom of the heap. Used 713 // in biasing indexes into the accounting card bitmaps. 714 _heap_bottom_card_num = 715 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 716 CardTableModRefBS::card_shift); 717 718 // Clear all the liveness counting data 719 clear_all_count_data(); 720 721 // so that the call below can read a sensible value 722 _heap_start = (HeapWord*) heap_rs.base(); 723 set_non_marking_state(); 724 _completed_initialization = true; 725 } 726 727 void ConcurrentMark::update_g1_committed(bool force) { 728 // If concurrent marking is not in progress, then we do not need to 729 // update _heap_end. 730 if (!concurrent_marking_in_progress() && !force) return; 731 732 MemRegion committed = _g1h->g1_committed(); 733 assert(committed.start() == _heap_start, "start shouldn't change"); 734 HeapWord* new_end = committed.end(); 735 if (new_end > _heap_end) { 736 // The heap has been expanded. 737 738 _heap_end = new_end; 739 } 740 // Notice that the heap can also shrink. However, this only happens 741 // during a Full GC (at least currently) and the entire marking 742 // phase will bail out and the task will not be restarted. So, let's 743 // do nothing. 744 } 745 746 void ConcurrentMark::reset() { 747 // Starting values for these two. This should be called in a STW 748 // phase. CM will be notified of any future g1_committed expansions 749 // will be at the end of evacuation pauses, when tasks are 750 // inactive. 751 MemRegion committed = _g1h->g1_committed(); 752 _heap_start = committed.start(); 753 _heap_end = committed.end(); 754 755 // Separated the asserts so that we know which one fires. 756 assert(_heap_start != NULL, "heap bounds should look ok"); 757 assert(_heap_end != NULL, "heap bounds should look ok"); 758 assert(_heap_start < _heap_end, "heap bounds should look ok"); 759 760 // Reset all the marking data structures and any necessary flags 761 reset_marking_state(); 762 763 if (verbose_low()) { 764 gclog_or_tty->print_cr("[global] resetting"); 765 } 766 767 // We do reset all of them, since different phases will use 768 // different number of active threads. So, it's easiest to have all 769 // of them ready. 770 for (uint i = 0; i < _max_worker_id; ++i) { 771 _tasks[i]->reset(_nextMarkBitMap); 772 } 773 774 // we need this to make sure that the flag is on during the evac 775 // pause with initial mark piggy-backed 776 set_concurrent_marking_in_progress(); 777 } 778 779 780 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 781 _markStack.set_should_expand(); 782 _markStack.setEmpty(); // Also clears the _markStack overflow flag 783 if (clear_overflow) { 784 clear_has_overflown(); 785 } else { 786 assert(has_overflown(), "pre-condition"); 787 } 788 _finger = _heap_start; 789 790 for (uint i = 0; i < _max_worker_id; ++i) { 791 CMTaskQueue* queue = _task_queues->queue(i); 792 queue->set_empty(); 793 } 794 } 795 796 void ConcurrentMark::set_concurrency(uint active_tasks) { 797 assert(active_tasks <= _max_worker_id, "we should not have more"); 798 799 _active_tasks = active_tasks; 800 // Need to update the three data structures below according to the 801 // number of active threads for this phase. 802 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 803 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 804 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 805 } 806 807 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 808 set_concurrency(active_tasks); 809 810 _concurrent = concurrent; 811 // We propagate this to all tasks, not just the active ones. 812 for (uint i = 0; i < _max_worker_id; ++i) 813 _tasks[i]->set_concurrent(concurrent); 814 815 if (concurrent) { 816 set_concurrent_marking_in_progress(); 817 } else { 818 // We currently assume that the concurrent flag has been set to 819 // false before we start remark. At this point we should also be 820 // in a STW phase. 821 assert(!concurrent_marking_in_progress(), "invariant"); 822 assert(_finger == _heap_end, 823 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT, 824 _finger, _heap_end)); 825 update_g1_committed(true); 826 } 827 } 828 829 void ConcurrentMark::set_non_marking_state() { 830 // We set the global marking state to some default values when we're 831 // not doing marking. 832 reset_marking_state(); 833 _active_tasks = 0; 834 clear_concurrent_marking_in_progress(); 835 } 836 837 ConcurrentMark::~ConcurrentMark() { 838 // The ConcurrentMark instance is never freed. 839 ShouldNotReachHere(); 840 } 841 842 void ConcurrentMark::clearNextBitmap() { 843 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 844 G1CollectorPolicy* g1p = g1h->g1_policy(); 845 846 // Make sure that the concurrent mark thread looks to still be in 847 // the current cycle. 848 guarantee(cmThread()->during_cycle(), "invariant"); 849 850 // We are finishing up the current cycle by clearing the next 851 // marking bitmap and getting it ready for the next cycle. During 852 // this time no other cycle can start. So, let's make sure that this 853 // is the case. 854 guarantee(!g1h->mark_in_progress(), "invariant"); 855 856 // clear the mark bitmap (no grey objects to start with). 857 // We need to do this in chunks and offer to yield in between 858 // each chunk. 859 HeapWord* start = _nextMarkBitMap->startWord(); 860 HeapWord* end = _nextMarkBitMap->endWord(); 861 HeapWord* cur = start; 862 size_t chunkSize = M; 863 while (cur < end) { 864 HeapWord* next = cur + chunkSize; 865 if (next > end) { 866 next = end; 867 } 868 MemRegion mr(cur,next); 869 _nextMarkBitMap->clearRange(mr); 870 cur = next; 871 do_yield_check(); 872 873 // Repeat the asserts from above. We'll do them as asserts here to 874 // minimize their overhead on the product. However, we'll have 875 // them as guarantees at the beginning / end of the bitmap 876 // clearing to get some checking in the product. 877 assert(cmThread()->during_cycle(), "invariant"); 878 assert(!g1h->mark_in_progress(), "invariant"); 879 } 880 881 // Clear the liveness counting data 882 clear_all_count_data(); 883 884 // Repeat the asserts from above. 885 guarantee(cmThread()->during_cycle(), "invariant"); 886 guarantee(!g1h->mark_in_progress(), "invariant"); 887 } 888 889 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 890 public: 891 bool doHeapRegion(HeapRegion* r) { 892 if (!r->continuesHumongous()) { 893 r->note_start_of_marking(); 894 } 895 return false; 896 } 897 }; 898 899 void ConcurrentMark::checkpointRootsInitialPre() { 900 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 901 G1CollectorPolicy* g1p = g1h->g1_policy(); 902 903 _has_aborted = false; 904 905 #ifndef PRODUCT 906 if (G1PrintReachableAtInitialMark) { 907 print_reachable("at-cycle-start", 908 VerifyOption_G1UsePrevMarking, true /* all */); 909 } 910 #endif 911 912 // Initialize marking structures. This has to be done in a STW phase. 913 reset(); 914 915 // For each region note start of marking. 916 NoteStartOfMarkHRClosure startcl; 917 g1h->heap_region_iterate(&startcl); 918 } 919 920 921 void ConcurrentMark::checkpointRootsInitialPost() { 922 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 923 924 // If we force an overflow during remark, the remark operation will 925 // actually abort and we'll restart concurrent marking. If we always 926 // force an overflow during remark we'll never actually complete the 927 // marking phase. So, we initialize this here, at the start of the 928 // cycle, so that at the remaining overflow number will decrease at 929 // every remark and we'll eventually not need to cause one. 930 force_overflow_stw()->init(); 931 932 // Start Concurrent Marking weak-reference discovery. 933 ReferenceProcessor* rp = g1h->ref_processor_cm(); 934 // enable ("weak") refs discovery 935 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 936 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 937 938 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 939 // This is the start of the marking cycle, we're expected all 940 // threads to have SATB queues with active set to false. 941 satb_mq_set.set_active_all_threads(true, /* new active value */ 942 false /* expected_active */); 943 944 _root_regions.prepare_for_scan(); 945 946 // update_g1_committed() will be called at the end of an evac pause 947 // when marking is on. So, it's also called at the end of the 948 // initial-mark pause to update the heap end, if the heap expands 949 // during it. No need to call it here. 950 } 951 952 /* 953 * Notice that in the next two methods, we actually leave the STS 954 * during the barrier sync and join it immediately afterwards. If we 955 * do not do this, the following deadlock can occur: one thread could 956 * be in the barrier sync code, waiting for the other thread to also 957 * sync up, whereas another one could be trying to yield, while also 958 * waiting for the other threads to sync up too. 959 * 960 * Note, however, that this code is also used during remark and in 961 * this case we should not attempt to leave / enter the STS, otherwise 962 * we'll either hit an assert (debug / fastdebug) or deadlock 963 * (product). So we should only leave / enter the STS if we are 964 * operating concurrently. 965 * 966 * Because the thread that does the sync barrier has left the STS, it 967 * is possible to be suspended for a Full GC or an evacuation pause 968 * could occur. This is actually safe, since the entering the sync 969 * barrier is one of the last things do_marking_step() does, and it 970 * doesn't manipulate any data structures afterwards. 971 */ 972 973 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 974 if (verbose_low()) { 975 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id); 976 } 977 978 if (concurrent()) { 979 SuspendibleThreadSet::leave(); 980 } 981 982 bool barrier_aborted = !_first_overflow_barrier_sync.enter(); 983 984 if (concurrent()) { 985 SuspendibleThreadSet::join(); 986 } 987 // at this point everyone should have synced up and not be doing any 988 // more work 989 990 if (verbose_low()) { 991 if (barrier_aborted) { 992 gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id); 993 } else { 994 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); 995 } 996 } 997 998 // If we're executing the concurrent phase of marking, reset the marking 999 // state; otherwise the marking state is reset after reference processing, 1000 // during the remark pause. 1001 // If we reset here as a result of an overflow during the remark we will 1002 // see assertion failures from any subsequent set_concurrency_and_phase() 1003 // calls. 1004 // If the barrier aborted we don't need to reset the marking state here 1005 // since ConcurrentMark::abort() did that for us and we will now ignore 1006 // the overflow condition and just abort the whole marking phase. 1007 if (!barrier_aborted && concurrent()) { 1008 // let the task associated with with worker 0 do this 1009 if (worker_id == 0) { 1010 // task 0 is responsible for clearing the global data structures 1011 // We should be here because of an overflow. During STW we should 1012 // not clear the overflow flag since we rely on it being true when 1013 // we exit this method to abort the pause and restart concurrent 1014 // marking. 1015 reset_marking_state(true /* clear_overflow */); 1016 force_overflow()->update(); 1017 1018 if (G1Log::fine()) { 1019 gclog_or_tty->date_stamp(PrintGCDateStamps); 1020 gclog_or_tty->stamp(PrintGCTimeStamps); 1021 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 1022 } 1023 } 1024 } 1025 1026 // after this, each task should reset its own data structures then 1027 // then go into the second barrier 1028 } 1029 1030 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 1031 if (verbose_low()) { 1032 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id); 1033 } 1034 1035 if (concurrent()) { 1036 SuspendibleThreadSet::leave(); 1037 } 1038 1039 bool barrier_aborted = !_second_overflow_barrier_sync.enter(); 1040 1041 if (concurrent()) { 1042 SuspendibleThreadSet::join(); 1043 } 1044 // at this point everything should be re-initialized and ready to go 1045 1046 if (verbose_low()) { 1047 if (barrier_aborted) { 1048 gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id); 1049 } else { 1050 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); 1051 } 1052 } 1053 } 1054 1055 #ifndef PRODUCT 1056 void ForceOverflowSettings::init() { 1057 _num_remaining = G1ConcMarkForceOverflow; 1058 _force = false; 1059 update(); 1060 } 1061 1062 void ForceOverflowSettings::update() { 1063 if (_num_remaining > 0) { 1064 _num_remaining -= 1; 1065 _force = true; 1066 } else { 1067 _force = false; 1068 } 1069 } 1070 1071 bool ForceOverflowSettings::should_force() { 1072 if (_force) { 1073 _force = false; 1074 return true; 1075 } else { 1076 return false; 1077 } 1078 } 1079 #endif // !PRODUCT 1080 1081 class CMConcurrentMarkingTask: public AbstractGangTask { 1082 private: 1083 ConcurrentMark* _cm; 1084 ConcurrentMarkThread* _cmt; 1085 1086 public: 1087 void work(uint worker_id) { 1088 assert(Thread::current()->is_ConcurrentGC_thread(), 1089 "this should only be done by a conc GC thread"); 1090 ResourceMark rm; 1091 1092 double start_vtime = os::elapsedVTime(); 1093 1094 SuspendibleThreadSet::join(); 1095 1096 assert(worker_id < _cm->active_tasks(), "invariant"); 1097 CMTask* the_task = _cm->task(worker_id); 1098 the_task->record_start_time(); 1099 if (!_cm->has_aborted()) { 1100 do { 1101 double start_vtime_sec = os::elapsedVTime(); 1102 double start_time_sec = os::elapsedTime(); 1103 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1104 1105 the_task->do_marking_step(mark_step_duration_ms, 1106 true /* do_termination */, 1107 false /* is_serial*/); 1108 1109 double end_time_sec = os::elapsedTime(); 1110 double end_vtime_sec = os::elapsedVTime(); 1111 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1112 double elapsed_time_sec = end_time_sec - start_time_sec; 1113 _cm->clear_has_overflown(); 1114 1115 bool ret = _cm->do_yield_check(worker_id); 1116 1117 jlong sleep_time_ms; 1118 if (!_cm->has_aborted() && the_task->has_aborted()) { 1119 sleep_time_ms = 1120 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1121 SuspendibleThreadSet::leave(); 1122 os::sleep(Thread::current(), sleep_time_ms, false); 1123 SuspendibleThreadSet::join(); 1124 } 1125 double end_time2_sec = os::elapsedTime(); 1126 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1127 1128 #if 0 1129 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1130 "overhead %1.4lf", 1131 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1132 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1133 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1134 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1135 #endif 1136 } while (!_cm->has_aborted() && the_task->has_aborted()); 1137 } 1138 the_task->record_end_time(); 1139 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1140 1141 SuspendibleThreadSet::leave(); 1142 1143 double end_vtime = os::elapsedVTime(); 1144 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1145 } 1146 1147 CMConcurrentMarkingTask(ConcurrentMark* cm, 1148 ConcurrentMarkThread* cmt) : 1149 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1150 1151 ~CMConcurrentMarkingTask() { } 1152 }; 1153 1154 // Calculates the number of active workers for a concurrent 1155 // phase. 1156 uint ConcurrentMark::calc_parallel_marking_threads() { 1157 if (G1CollectedHeap::use_parallel_gc_threads()) { 1158 uint n_conc_workers = 0; 1159 if (!UseDynamicNumberOfGCThreads || 1160 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1161 !ForceDynamicNumberOfGCThreads)) { 1162 n_conc_workers = max_parallel_marking_threads(); 1163 } else { 1164 n_conc_workers = 1165 AdaptiveSizePolicy::calc_default_active_workers( 1166 max_parallel_marking_threads(), 1167 1, /* Minimum workers */ 1168 parallel_marking_threads(), 1169 Threads::number_of_non_daemon_threads()); 1170 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1171 // that scaling has already gone into "_max_parallel_marking_threads". 1172 } 1173 assert(n_conc_workers > 0, "Always need at least 1"); 1174 return n_conc_workers; 1175 } 1176 // If we are not running with any parallel GC threads we will not 1177 // have spawned any marking threads either. Hence the number of 1178 // concurrent workers should be 0. 1179 return 0; 1180 } 1181 1182 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1183 // Currently, only survivors can be root regions. 1184 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1185 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1186 1187 const uintx interval = PrefetchScanIntervalInBytes; 1188 HeapWord* curr = hr->bottom(); 1189 const HeapWord* end = hr->top(); 1190 while (curr < end) { 1191 Prefetch::read(curr, interval); 1192 oop obj = oop(curr); 1193 int size = obj->oop_iterate(&cl); 1194 assert(size == obj->size(), "sanity"); 1195 curr += size; 1196 } 1197 } 1198 1199 class CMRootRegionScanTask : public AbstractGangTask { 1200 private: 1201 ConcurrentMark* _cm; 1202 1203 public: 1204 CMRootRegionScanTask(ConcurrentMark* cm) : 1205 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1206 1207 void work(uint worker_id) { 1208 assert(Thread::current()->is_ConcurrentGC_thread(), 1209 "this should only be done by a conc GC thread"); 1210 1211 CMRootRegions* root_regions = _cm->root_regions(); 1212 HeapRegion* hr = root_regions->claim_next(); 1213 while (hr != NULL) { 1214 _cm->scanRootRegion(hr, worker_id); 1215 hr = root_regions->claim_next(); 1216 } 1217 } 1218 }; 1219 1220 void ConcurrentMark::scanRootRegions() { 1221 // scan_in_progress() will have been set to true only if there was 1222 // at least one root region to scan. So, if it's false, we 1223 // should not attempt to do any further work. 1224 if (root_regions()->scan_in_progress()) { 1225 _parallel_marking_threads = calc_parallel_marking_threads(); 1226 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1227 "Maximum number of marking threads exceeded"); 1228 uint active_workers = MAX2(1U, parallel_marking_threads()); 1229 1230 CMRootRegionScanTask task(this); 1231 if (use_parallel_marking_threads()) { 1232 _parallel_workers->set_active_workers((int) active_workers); 1233 _parallel_workers->run_task(&task); 1234 } else { 1235 task.work(0); 1236 } 1237 1238 // It's possible that has_aborted() is true here without actually 1239 // aborting the survivor scan earlier. This is OK as it's 1240 // mainly used for sanity checking. 1241 root_regions()->scan_finished(); 1242 } 1243 } 1244 1245 void ConcurrentMark::markFromRoots() { 1246 // we might be tempted to assert that: 1247 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1248 // "inconsistent argument?"); 1249 // However that wouldn't be right, because it's possible that 1250 // a safepoint is indeed in progress as a younger generation 1251 // stop-the-world GC happens even as we mark in this generation. 1252 1253 _restart_for_overflow = false; 1254 force_overflow_conc()->init(); 1255 1256 // _g1h has _n_par_threads 1257 _parallel_marking_threads = calc_parallel_marking_threads(); 1258 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1259 "Maximum number of marking threads exceeded"); 1260 1261 uint active_workers = MAX2(1U, parallel_marking_threads()); 1262 1263 // Parallel task terminator is set in "set_concurrency_and_phase()" 1264 set_concurrency_and_phase(active_workers, true /* concurrent */); 1265 1266 CMConcurrentMarkingTask markingTask(this, cmThread()); 1267 if (use_parallel_marking_threads()) { 1268 _parallel_workers->set_active_workers((int)active_workers); 1269 // Don't set _n_par_threads because it affects MT in process_strong_roots() 1270 // and the decisions on that MT processing is made elsewhere. 1271 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1272 _parallel_workers->run_task(&markingTask); 1273 } else { 1274 markingTask.work(0); 1275 } 1276 print_stats(); 1277 } 1278 1279 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1280 // world is stopped at this checkpoint 1281 assert(SafepointSynchronize::is_at_safepoint(), 1282 "world should be stopped"); 1283 1284 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1285 1286 // If a full collection has happened, we shouldn't do this. 1287 if (has_aborted()) { 1288 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1289 return; 1290 } 1291 1292 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1293 1294 if (VerifyDuringGC) { 1295 HandleMark hm; // handle scope 1296 Universe::heap()->prepare_for_verify(); 1297 Universe::verify(VerifyOption_G1UsePrevMarking, 1298 " VerifyDuringGC:(before)"); 1299 } 1300 g1h->check_bitmaps("Remark Start"); 1301 1302 G1CollectorPolicy* g1p = g1h->g1_policy(); 1303 g1p->record_concurrent_mark_remark_start(); 1304 1305 double start = os::elapsedTime(); 1306 1307 checkpointRootsFinalWork(); 1308 1309 double mark_work_end = os::elapsedTime(); 1310 1311 weakRefsWork(clear_all_soft_refs); 1312 1313 if (has_overflown()) { 1314 // Oops. We overflowed. Restart concurrent marking. 1315 _restart_for_overflow = true; 1316 if (G1TraceMarkStackOverflow) { 1317 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1318 } 1319 1320 // Verify the heap w.r.t. the previous marking bitmap. 1321 if (VerifyDuringGC) { 1322 HandleMark hm; // handle scope 1323 Universe::heap()->prepare_for_verify(); 1324 Universe::verify(VerifyOption_G1UsePrevMarking, 1325 " VerifyDuringGC:(overflow)"); 1326 } 1327 1328 // Clear the marking state because we will be restarting 1329 // marking due to overflowing the global mark stack. 1330 reset_marking_state(); 1331 } else { 1332 // Aggregate the per-task counting data that we have accumulated 1333 // while marking. 1334 aggregate_count_data(); 1335 1336 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1337 // We're done with marking. 1338 // This is the end of the marking cycle, we're expected all 1339 // threads to have SATB queues with active set to true. 1340 satb_mq_set.set_active_all_threads(false, /* new active value */ 1341 true /* expected_active */); 1342 1343 if (VerifyDuringGC) { 1344 HandleMark hm; // handle scope 1345 Universe::heap()->prepare_for_verify(); 1346 Universe::verify(VerifyOption_G1UseNextMarking, 1347 " VerifyDuringGC:(after)"); 1348 } 1349 g1h->check_bitmaps("Remark End"); 1350 assert(!restart_for_overflow(), "sanity"); 1351 // Completely reset the marking state since marking completed 1352 set_non_marking_state(); 1353 } 1354 1355 // Expand the marking stack, if we have to and if we can. 1356 if (_markStack.should_expand()) { 1357 _markStack.expand(); 1358 } 1359 1360 // Statistics 1361 double now = os::elapsedTime(); 1362 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1363 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1364 _remark_times.add((now - start) * 1000.0); 1365 1366 g1p->record_concurrent_mark_remark_end(); 1367 1368 G1CMIsAliveClosure is_alive(g1h); 1369 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1370 } 1371 1372 // Base class of the closures that finalize and verify the 1373 // liveness counting data. 1374 class CMCountDataClosureBase: public HeapRegionClosure { 1375 protected: 1376 G1CollectedHeap* _g1h; 1377 ConcurrentMark* _cm; 1378 CardTableModRefBS* _ct_bs; 1379 1380 BitMap* _region_bm; 1381 BitMap* _card_bm; 1382 1383 // Takes a region that's not empty (i.e., it has at least one 1384 // live object in it and sets its corresponding bit on the region 1385 // bitmap to 1. If the region is "starts humongous" it will also set 1386 // to 1 the bits on the region bitmap that correspond to its 1387 // associated "continues humongous" regions. 1388 void set_bit_for_region(HeapRegion* hr) { 1389 assert(!hr->continuesHumongous(), "should have filtered those out"); 1390 1391 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1392 if (!hr->startsHumongous()) { 1393 // Normal (non-humongous) case: just set the bit. 1394 _region_bm->par_at_put(index, true); 1395 } else { 1396 // Starts humongous case: calculate how many regions are part of 1397 // this humongous region and then set the bit range. 1398 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1399 _region_bm->par_at_put_range(index, end_index, true); 1400 } 1401 } 1402 1403 public: 1404 CMCountDataClosureBase(G1CollectedHeap* g1h, 1405 BitMap* region_bm, BitMap* card_bm): 1406 _g1h(g1h), _cm(g1h->concurrent_mark()), 1407 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 1408 _region_bm(region_bm), _card_bm(card_bm) { } 1409 }; 1410 1411 // Closure that calculates the # live objects per region. Used 1412 // for verification purposes during the cleanup pause. 1413 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1414 CMBitMapRO* _bm; 1415 size_t _region_marked_bytes; 1416 1417 public: 1418 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1419 BitMap* region_bm, BitMap* card_bm) : 1420 CMCountDataClosureBase(g1h, region_bm, card_bm), 1421 _bm(bm), _region_marked_bytes(0) { } 1422 1423 bool doHeapRegion(HeapRegion* hr) { 1424 1425 if (hr->continuesHumongous()) { 1426 // We will ignore these here and process them when their 1427 // associated "starts humongous" region is processed (see 1428 // set_bit_for_heap_region()). Note that we cannot rely on their 1429 // associated "starts humongous" region to have their bit set to 1430 // 1 since, due to the region chunking in the parallel region 1431 // iteration, a "continues humongous" region might be visited 1432 // before its associated "starts humongous". 1433 return false; 1434 } 1435 1436 HeapWord* ntams = hr->next_top_at_mark_start(); 1437 HeapWord* start = hr->bottom(); 1438 1439 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1440 err_msg("Preconditions not met - " 1441 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT, 1442 start, ntams, hr->end())); 1443 1444 // Find the first marked object at or after "start". 1445 start = _bm->getNextMarkedWordAddress(start, ntams); 1446 1447 size_t marked_bytes = 0; 1448 1449 while (start < ntams) { 1450 oop obj = oop(start); 1451 int obj_sz = obj->size(); 1452 HeapWord* obj_end = start + obj_sz; 1453 1454 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1455 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1456 1457 // Note: if we're looking at the last region in heap - obj_end 1458 // could be actually just beyond the end of the heap; end_idx 1459 // will then correspond to a (non-existent) card that is also 1460 // just beyond the heap. 1461 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1462 // end of object is not card aligned - increment to cover 1463 // all the cards spanned by the object 1464 end_idx += 1; 1465 } 1466 1467 // Set the bits in the card BM for the cards spanned by this object. 1468 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1469 1470 // Add the size of this object to the number of marked bytes. 1471 marked_bytes += (size_t)obj_sz * HeapWordSize; 1472 1473 // Find the next marked object after this one. 1474 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1475 } 1476 1477 // Mark the allocated-since-marking portion... 1478 HeapWord* top = hr->top(); 1479 if (ntams < top) { 1480 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1481 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1482 1483 // Note: if we're looking at the last region in heap - top 1484 // could be actually just beyond the end of the heap; end_idx 1485 // will then correspond to a (non-existent) card that is also 1486 // just beyond the heap. 1487 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1488 // end of object is not card aligned - increment to cover 1489 // all the cards spanned by the object 1490 end_idx += 1; 1491 } 1492 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1493 1494 // This definitely means the region has live objects. 1495 set_bit_for_region(hr); 1496 } 1497 1498 // Update the live region bitmap. 1499 if (marked_bytes > 0) { 1500 set_bit_for_region(hr); 1501 } 1502 1503 // Set the marked bytes for the current region so that 1504 // it can be queried by a calling verification routine 1505 _region_marked_bytes = marked_bytes; 1506 1507 return false; 1508 } 1509 1510 size_t region_marked_bytes() const { return _region_marked_bytes; } 1511 }; 1512 1513 // Heap region closure used for verifying the counting data 1514 // that was accumulated concurrently and aggregated during 1515 // the remark pause. This closure is applied to the heap 1516 // regions during the STW cleanup pause. 1517 1518 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1519 G1CollectedHeap* _g1h; 1520 ConcurrentMark* _cm; 1521 CalcLiveObjectsClosure _calc_cl; 1522 BitMap* _region_bm; // Region BM to be verified 1523 BitMap* _card_bm; // Card BM to be verified 1524 bool _verbose; // verbose output? 1525 1526 BitMap* _exp_region_bm; // Expected Region BM values 1527 BitMap* _exp_card_bm; // Expected card BM values 1528 1529 int _failures; 1530 1531 public: 1532 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1533 BitMap* region_bm, 1534 BitMap* card_bm, 1535 BitMap* exp_region_bm, 1536 BitMap* exp_card_bm, 1537 bool verbose) : 1538 _g1h(g1h), _cm(g1h->concurrent_mark()), 1539 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1540 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1541 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1542 _failures(0) { } 1543 1544 int failures() const { return _failures; } 1545 1546 bool doHeapRegion(HeapRegion* hr) { 1547 if (hr->continuesHumongous()) { 1548 // We will ignore these here and process them when their 1549 // associated "starts humongous" region is processed (see 1550 // set_bit_for_heap_region()). Note that we cannot rely on their 1551 // associated "starts humongous" region to have their bit set to 1552 // 1 since, due to the region chunking in the parallel region 1553 // iteration, a "continues humongous" region might be visited 1554 // before its associated "starts humongous". 1555 return false; 1556 } 1557 1558 int failures = 0; 1559 1560 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1561 // this region and set the corresponding bits in the expected region 1562 // and card bitmaps. 1563 bool res = _calc_cl.doHeapRegion(hr); 1564 assert(res == false, "should be continuing"); 1565 1566 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1567 Mutex::_no_safepoint_check_flag); 1568 1569 // Verify the marked bytes for this region. 1570 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1571 size_t act_marked_bytes = hr->next_marked_bytes(); 1572 1573 // We're not OK if expected marked bytes > actual marked bytes. It means 1574 // we have missed accounting some objects during the actual marking. 1575 if (exp_marked_bytes > act_marked_bytes) { 1576 if (_verbose) { 1577 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1578 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1579 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1580 } 1581 failures += 1; 1582 } 1583 1584 // Verify the bit, for this region, in the actual and expected 1585 // (which was just calculated) region bit maps. 1586 // We're not OK if the bit in the calculated expected region 1587 // bitmap is set and the bit in the actual region bitmap is not. 1588 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1589 1590 bool expected = _exp_region_bm->at(index); 1591 bool actual = _region_bm->at(index); 1592 if (expected && !actual) { 1593 if (_verbose) { 1594 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1595 "expected: %s, actual: %s", 1596 hr->hrs_index(), 1597 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1598 } 1599 failures += 1; 1600 } 1601 1602 // Verify that the card bit maps for the cards spanned by the current 1603 // region match. We have an error if we have a set bit in the expected 1604 // bit map and the corresponding bit in the actual bitmap is not set. 1605 1606 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1607 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1608 1609 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1610 expected = _exp_card_bm->at(i); 1611 actual = _card_bm->at(i); 1612 1613 if (expected && !actual) { 1614 if (_verbose) { 1615 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1616 "expected: %s, actual: %s", 1617 hr->hrs_index(), i, 1618 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1619 } 1620 failures += 1; 1621 } 1622 } 1623 1624 if (failures > 0 && _verbose) { 1625 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1626 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1627 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(), 1628 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1629 } 1630 1631 _failures += failures; 1632 1633 // We could stop iteration over the heap when we 1634 // find the first violating region by returning true. 1635 return false; 1636 } 1637 }; 1638 1639 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1640 protected: 1641 G1CollectedHeap* _g1h; 1642 ConcurrentMark* _cm; 1643 BitMap* _actual_region_bm; 1644 BitMap* _actual_card_bm; 1645 1646 uint _n_workers; 1647 1648 BitMap* _expected_region_bm; 1649 BitMap* _expected_card_bm; 1650 1651 int _failures; 1652 bool _verbose; 1653 1654 public: 1655 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1656 BitMap* region_bm, BitMap* card_bm, 1657 BitMap* expected_region_bm, BitMap* expected_card_bm) 1658 : AbstractGangTask("G1 verify final counting"), 1659 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1660 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1661 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1662 _failures(0), _verbose(false), 1663 _n_workers(0) { 1664 assert(VerifyDuringGC, "don't call this otherwise"); 1665 1666 // Use the value already set as the number of active threads 1667 // in the call to run_task(). 1668 if (G1CollectedHeap::use_parallel_gc_threads()) { 1669 assert( _g1h->workers()->active_workers() > 0, 1670 "Should have been previously set"); 1671 _n_workers = _g1h->workers()->active_workers(); 1672 } else { 1673 _n_workers = 1; 1674 } 1675 1676 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1677 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1678 1679 _verbose = _cm->verbose_medium(); 1680 } 1681 1682 void work(uint worker_id) { 1683 assert(worker_id < _n_workers, "invariant"); 1684 1685 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1686 _actual_region_bm, _actual_card_bm, 1687 _expected_region_bm, 1688 _expected_card_bm, 1689 _verbose); 1690 1691 if (G1CollectedHeap::use_parallel_gc_threads()) { 1692 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1693 worker_id, 1694 _n_workers, 1695 HeapRegion::VerifyCountClaimValue); 1696 } else { 1697 _g1h->heap_region_iterate(&verify_cl); 1698 } 1699 1700 Atomic::add(verify_cl.failures(), &_failures); 1701 } 1702 1703 int failures() const { return _failures; } 1704 }; 1705 1706 // Closure that finalizes the liveness counting data. 1707 // Used during the cleanup pause. 1708 // Sets the bits corresponding to the interval [NTAMS, top] 1709 // (which contains the implicitly live objects) in the 1710 // card liveness bitmap. Also sets the bit for each region, 1711 // containing live data, in the region liveness bitmap. 1712 1713 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1714 public: 1715 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1716 BitMap* region_bm, 1717 BitMap* card_bm) : 1718 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1719 1720 bool doHeapRegion(HeapRegion* hr) { 1721 1722 if (hr->continuesHumongous()) { 1723 // We will ignore these here and process them when their 1724 // associated "starts humongous" region is processed (see 1725 // set_bit_for_heap_region()). Note that we cannot rely on their 1726 // associated "starts humongous" region to have their bit set to 1727 // 1 since, due to the region chunking in the parallel region 1728 // iteration, a "continues humongous" region might be visited 1729 // before its associated "starts humongous". 1730 return false; 1731 } 1732 1733 HeapWord* ntams = hr->next_top_at_mark_start(); 1734 HeapWord* top = hr->top(); 1735 1736 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1737 1738 // Mark the allocated-since-marking portion... 1739 if (ntams < top) { 1740 // This definitely means the region has live objects. 1741 set_bit_for_region(hr); 1742 1743 // Now set the bits in the card bitmap for [ntams, top) 1744 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1745 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1746 1747 // Note: if we're looking at the last region in heap - top 1748 // could be actually just beyond the end of the heap; end_idx 1749 // will then correspond to a (non-existent) card that is also 1750 // just beyond the heap. 1751 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1752 // end of object is not card aligned - increment to cover 1753 // all the cards spanned by the object 1754 end_idx += 1; 1755 } 1756 1757 assert(end_idx <= _card_bm->size(), 1758 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1759 end_idx, _card_bm->size())); 1760 assert(start_idx < _card_bm->size(), 1761 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1762 start_idx, _card_bm->size())); 1763 1764 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1765 } 1766 1767 // Set the bit for the region if it contains live data 1768 if (hr->next_marked_bytes() > 0) { 1769 set_bit_for_region(hr); 1770 } 1771 1772 return false; 1773 } 1774 }; 1775 1776 class G1ParFinalCountTask: public AbstractGangTask { 1777 protected: 1778 G1CollectedHeap* _g1h; 1779 ConcurrentMark* _cm; 1780 BitMap* _actual_region_bm; 1781 BitMap* _actual_card_bm; 1782 1783 uint _n_workers; 1784 1785 public: 1786 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1787 : AbstractGangTask("G1 final counting"), 1788 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1789 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1790 _n_workers(0) { 1791 // Use the value already set as the number of active threads 1792 // in the call to run_task(). 1793 if (G1CollectedHeap::use_parallel_gc_threads()) { 1794 assert( _g1h->workers()->active_workers() > 0, 1795 "Should have been previously set"); 1796 _n_workers = _g1h->workers()->active_workers(); 1797 } else { 1798 _n_workers = 1; 1799 } 1800 } 1801 1802 void work(uint worker_id) { 1803 assert(worker_id < _n_workers, "invariant"); 1804 1805 FinalCountDataUpdateClosure final_update_cl(_g1h, 1806 _actual_region_bm, 1807 _actual_card_bm); 1808 1809 if (G1CollectedHeap::use_parallel_gc_threads()) { 1810 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1811 worker_id, 1812 _n_workers, 1813 HeapRegion::FinalCountClaimValue); 1814 } else { 1815 _g1h->heap_region_iterate(&final_update_cl); 1816 } 1817 } 1818 }; 1819 1820 class G1ParNoteEndTask; 1821 1822 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1823 G1CollectedHeap* _g1; 1824 size_t _max_live_bytes; 1825 uint _regions_claimed; 1826 size_t _freed_bytes; 1827 FreeRegionList* _local_cleanup_list; 1828 HeapRegionSetCount _old_regions_removed; 1829 HeapRegionSetCount _humongous_regions_removed; 1830 HRRSCleanupTask* _hrrs_cleanup_task; 1831 double _claimed_region_time; 1832 double _max_region_time; 1833 1834 public: 1835 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1836 FreeRegionList* local_cleanup_list, 1837 HRRSCleanupTask* hrrs_cleanup_task) : 1838 _g1(g1), 1839 _max_live_bytes(0), _regions_claimed(0), 1840 _freed_bytes(0), 1841 _claimed_region_time(0.0), _max_region_time(0.0), 1842 _local_cleanup_list(local_cleanup_list), 1843 _old_regions_removed(), 1844 _humongous_regions_removed(), 1845 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1846 1847 size_t freed_bytes() { return _freed_bytes; } 1848 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; } 1849 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } 1850 1851 bool doHeapRegion(HeapRegion *hr) { 1852 if (hr->continuesHumongous()) { 1853 return false; 1854 } 1855 // We use a claim value of zero here because all regions 1856 // were claimed with value 1 in the FinalCount task. 1857 _g1->reset_gc_time_stamps(hr); 1858 double start = os::elapsedTime(); 1859 _regions_claimed++; 1860 hr->note_end_of_marking(); 1861 _max_live_bytes += hr->max_live_bytes(); 1862 1863 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1864 _freed_bytes += hr->used(); 1865 hr->set_containing_set(NULL); 1866 if (hr->isHumongous()) { 1867 assert(hr->startsHumongous(), "we should only see starts humongous"); 1868 _humongous_regions_removed.increment(1u, hr->capacity()); 1869 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1870 } else { 1871 _old_regions_removed.increment(1u, hr->capacity()); 1872 _g1->free_region(hr, _local_cleanup_list, true); 1873 } 1874 } else { 1875 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1876 } 1877 1878 double region_time = (os::elapsedTime() - start); 1879 _claimed_region_time += region_time; 1880 if (region_time > _max_region_time) { 1881 _max_region_time = region_time; 1882 } 1883 return false; 1884 } 1885 1886 size_t max_live_bytes() { return _max_live_bytes; } 1887 uint regions_claimed() { return _regions_claimed; } 1888 double claimed_region_time_sec() { return _claimed_region_time; } 1889 double max_region_time_sec() { return _max_region_time; } 1890 }; 1891 1892 class G1ParNoteEndTask: public AbstractGangTask { 1893 friend class G1NoteEndOfConcMarkClosure; 1894 1895 protected: 1896 G1CollectedHeap* _g1h; 1897 size_t _max_live_bytes; 1898 size_t _freed_bytes; 1899 FreeRegionList* _cleanup_list; 1900 1901 public: 1902 G1ParNoteEndTask(G1CollectedHeap* g1h, 1903 FreeRegionList* cleanup_list) : 1904 AbstractGangTask("G1 note end"), _g1h(g1h), 1905 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1906 1907 void work(uint worker_id) { 1908 double start = os::elapsedTime(); 1909 FreeRegionList local_cleanup_list("Local Cleanup List"); 1910 HRRSCleanupTask hrrs_cleanup_task; 1911 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1912 &hrrs_cleanup_task); 1913 if (G1CollectedHeap::use_parallel_gc_threads()) { 1914 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1915 _g1h->workers()->active_workers(), 1916 HeapRegion::NoteEndClaimValue); 1917 } else { 1918 _g1h->heap_region_iterate(&g1_note_end); 1919 } 1920 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1921 1922 // Now update the lists 1923 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1924 { 1925 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1926 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1927 _max_live_bytes += g1_note_end.max_live_bytes(); 1928 _freed_bytes += g1_note_end.freed_bytes(); 1929 1930 // If we iterate over the global cleanup list at the end of 1931 // cleanup to do this printing we will not guarantee to only 1932 // generate output for the newly-reclaimed regions (the list 1933 // might not be empty at the beginning of cleanup; we might 1934 // still be working on its previous contents). So we do the 1935 // printing here, before we append the new regions to the global 1936 // cleanup list. 1937 1938 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1939 if (hr_printer->is_active()) { 1940 FreeRegionListIterator iter(&local_cleanup_list); 1941 while (iter.more_available()) { 1942 HeapRegion* hr = iter.get_next(); 1943 hr_printer->cleanup(hr); 1944 } 1945 } 1946 1947 _cleanup_list->add_ordered(&local_cleanup_list); 1948 assert(local_cleanup_list.is_empty(), "post-condition"); 1949 1950 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1951 } 1952 } 1953 size_t max_live_bytes() { return _max_live_bytes; } 1954 size_t freed_bytes() { return _freed_bytes; } 1955 }; 1956 1957 class G1ParScrubRemSetTask: public AbstractGangTask { 1958 protected: 1959 G1RemSet* _g1rs; 1960 BitMap* _region_bm; 1961 BitMap* _card_bm; 1962 public: 1963 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1964 BitMap* region_bm, BitMap* card_bm) : 1965 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1966 _region_bm(region_bm), _card_bm(card_bm) { } 1967 1968 void work(uint worker_id) { 1969 if (G1CollectedHeap::use_parallel_gc_threads()) { 1970 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1971 HeapRegion::ScrubRemSetClaimValue); 1972 } else { 1973 _g1rs->scrub(_region_bm, _card_bm); 1974 } 1975 } 1976 1977 }; 1978 1979 void ConcurrentMark::cleanup() { 1980 // world is stopped at this checkpoint 1981 assert(SafepointSynchronize::is_at_safepoint(), 1982 "world should be stopped"); 1983 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1984 1985 // If a full collection has happened, we shouldn't do this. 1986 if (has_aborted()) { 1987 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1988 return; 1989 } 1990 1991 g1h->verify_region_sets_optional(); 1992 1993 if (VerifyDuringGC) { 1994 HandleMark hm; // handle scope 1995 Universe::heap()->prepare_for_verify(); 1996 Universe::verify(VerifyOption_G1UsePrevMarking, 1997 " VerifyDuringGC:(before)"); 1998 } 1999 g1h->check_bitmaps("Cleanup Start"); 2000 2001 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 2002 g1p->record_concurrent_mark_cleanup_start(); 2003 2004 double start = os::elapsedTime(); 2005 2006 HeapRegionRemSet::reset_for_cleanup_tasks(); 2007 2008 uint n_workers; 2009 2010 // Do counting once more with the world stopped for good measure. 2011 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 2012 2013 if (G1CollectedHeap::use_parallel_gc_threads()) { 2014 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 2015 "sanity check"); 2016 2017 g1h->set_par_threads(); 2018 n_workers = g1h->n_par_threads(); 2019 assert(g1h->n_par_threads() == n_workers, 2020 "Should not have been reset"); 2021 g1h->workers()->run_task(&g1_par_count_task); 2022 // Done with the parallel phase so reset to 0. 2023 g1h->set_par_threads(0); 2024 2025 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 2026 "sanity check"); 2027 } else { 2028 n_workers = 1; 2029 g1_par_count_task.work(0); 2030 } 2031 2032 if (VerifyDuringGC) { 2033 // Verify that the counting data accumulated during marking matches 2034 // that calculated by walking the marking bitmap. 2035 2036 // Bitmaps to hold expected values 2037 BitMap expected_region_bm(_region_bm.size(), true); 2038 BitMap expected_card_bm(_card_bm.size(), true); 2039 2040 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 2041 &_region_bm, 2042 &_card_bm, 2043 &expected_region_bm, 2044 &expected_card_bm); 2045 2046 if (G1CollectedHeap::use_parallel_gc_threads()) { 2047 g1h->set_par_threads((int)n_workers); 2048 g1h->workers()->run_task(&g1_par_verify_task); 2049 // Done with the parallel phase so reset to 0. 2050 g1h->set_par_threads(0); 2051 2052 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 2053 "sanity check"); 2054 } else { 2055 g1_par_verify_task.work(0); 2056 } 2057 2058 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 2059 } 2060 2061 size_t start_used_bytes = g1h->used(); 2062 g1h->set_marking_complete(); 2063 2064 double count_end = os::elapsedTime(); 2065 double this_final_counting_time = (count_end - start); 2066 _total_counting_time += this_final_counting_time; 2067 2068 if (G1PrintRegionLivenessInfo) { 2069 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 2070 _g1h->heap_region_iterate(&cl); 2071 } 2072 2073 // Install newly created mark bitMap as "prev". 2074 swapMarkBitMaps(); 2075 2076 g1h->reset_gc_time_stamp(); 2077 2078 // Note end of marking in all heap regions. 2079 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 2080 if (G1CollectedHeap::use_parallel_gc_threads()) { 2081 g1h->set_par_threads((int)n_workers); 2082 g1h->workers()->run_task(&g1_par_note_end_task); 2083 g1h->set_par_threads(0); 2084 2085 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 2086 "sanity check"); 2087 } else { 2088 g1_par_note_end_task.work(0); 2089 } 2090 g1h->check_gc_time_stamps(); 2091 2092 if (!cleanup_list_is_empty()) { 2093 // The cleanup list is not empty, so we'll have to process it 2094 // concurrently. Notify anyone else that might be wanting free 2095 // regions that there will be more free regions coming soon. 2096 g1h->set_free_regions_coming(); 2097 } 2098 2099 // call below, since it affects the metric by which we sort the heap 2100 // regions. 2101 if (G1ScrubRemSets) { 2102 double rs_scrub_start = os::elapsedTime(); 2103 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 2104 if (G1CollectedHeap::use_parallel_gc_threads()) { 2105 g1h->set_par_threads((int)n_workers); 2106 g1h->workers()->run_task(&g1_par_scrub_rs_task); 2107 g1h->set_par_threads(0); 2108 2109 assert(g1h->check_heap_region_claim_values( 2110 HeapRegion::ScrubRemSetClaimValue), 2111 "sanity check"); 2112 } else { 2113 g1_par_scrub_rs_task.work(0); 2114 } 2115 2116 double rs_scrub_end = os::elapsedTime(); 2117 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 2118 _total_rs_scrub_time += this_rs_scrub_time; 2119 } 2120 2121 // this will also free any regions totally full of garbage objects, 2122 // and sort the regions. 2123 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 2124 2125 // Statistics. 2126 double end = os::elapsedTime(); 2127 _cleanup_times.add((end - start) * 1000.0); 2128 2129 if (G1Log::fine()) { 2130 g1h->print_size_transition(gclog_or_tty, 2131 start_used_bytes, 2132 g1h->used(), 2133 g1h->capacity()); 2134 } 2135 2136 // Clean up will have freed any regions completely full of garbage. 2137 // Update the soft reference policy with the new heap occupancy. 2138 Universe::update_heap_info_at_gc(); 2139 2140 // We need to make this be a "collection" so any collection pause that 2141 // races with it goes around and waits for completeCleanup to finish. 2142 g1h->increment_total_collections(); 2143 2144 // We reclaimed old regions so we should calculate the sizes to make 2145 // sure we update the old gen/space data. 2146 g1h->g1mm()->update_sizes(); 2147 2148 if (VerifyDuringGC) { 2149 HandleMark hm; // handle scope 2150 Universe::heap()->prepare_for_verify(); 2151 Universe::verify(VerifyOption_G1UsePrevMarking, 2152 " VerifyDuringGC:(after)"); 2153 } 2154 g1h->check_bitmaps("Cleanup End"); 2155 2156 g1h->verify_region_sets_optional(); 2157 g1h->trace_heap_after_concurrent_cycle(); 2158 } 2159 2160 void ConcurrentMark::completeCleanup() { 2161 if (has_aborted()) return; 2162 2163 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2164 2165 _cleanup_list.verify_optional(); 2166 FreeRegionList tmp_free_list("Tmp Free List"); 2167 2168 if (G1ConcRegionFreeingVerbose) { 2169 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2170 "cleanup list has %u entries", 2171 _cleanup_list.length()); 2172 } 2173 2174 // Noone else should be accessing the _cleanup_list at this point, 2175 // so it's not necessary to take any locks 2176 while (!_cleanup_list.is_empty()) { 2177 HeapRegion* hr = _cleanup_list.remove_head(); 2178 assert(hr != NULL, "Got NULL from a non-empty list"); 2179 hr->par_clear(); 2180 tmp_free_list.add_ordered(hr); 2181 2182 // Instead of adding one region at a time to the secondary_free_list, 2183 // we accumulate them in the local list and move them a few at a 2184 // time. This also cuts down on the number of notify_all() calls 2185 // we do during this process. We'll also append the local list when 2186 // _cleanup_list is empty (which means we just removed the last 2187 // region from the _cleanup_list). 2188 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2189 _cleanup_list.is_empty()) { 2190 if (G1ConcRegionFreeingVerbose) { 2191 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2192 "appending %u entries to the secondary_free_list, " 2193 "cleanup list still has %u entries", 2194 tmp_free_list.length(), 2195 _cleanup_list.length()); 2196 } 2197 2198 { 2199 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2200 g1h->secondary_free_list_add(&tmp_free_list); 2201 SecondaryFreeList_lock->notify_all(); 2202 } 2203 2204 if (G1StressConcRegionFreeing) { 2205 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2206 os::sleep(Thread::current(), (jlong) 1, false); 2207 } 2208 } 2209 } 2210 } 2211 assert(tmp_free_list.is_empty(), "post-condition"); 2212 } 2213 2214 // Supporting Object and Oop closures for reference discovery 2215 // and processing in during marking 2216 2217 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2218 HeapWord* addr = (HeapWord*)obj; 2219 return addr != NULL && 2220 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2221 } 2222 2223 // 'Keep Alive' oop closure used by both serial parallel reference processing. 2224 // Uses the CMTask associated with a worker thread (for serial reference 2225 // processing the CMTask for worker 0 is used) to preserve (mark) and 2226 // trace referent objects. 2227 // 2228 // Using the CMTask and embedded local queues avoids having the worker 2229 // threads operating on the global mark stack. This reduces the risk 2230 // of overflowing the stack - which we would rather avoid at this late 2231 // state. Also using the tasks' local queues removes the potential 2232 // of the workers interfering with each other that could occur if 2233 // operating on the global stack. 2234 2235 class G1CMKeepAliveAndDrainClosure: public OopClosure { 2236 ConcurrentMark* _cm; 2237 CMTask* _task; 2238 int _ref_counter_limit; 2239 int _ref_counter; 2240 bool _is_serial; 2241 public: 2242 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2243 _cm(cm), _task(task), _is_serial(is_serial), 2244 _ref_counter_limit(G1RefProcDrainInterval) { 2245 assert(_ref_counter_limit > 0, "sanity"); 2246 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2247 _ref_counter = _ref_counter_limit; 2248 } 2249 2250 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2251 virtual void do_oop( oop* p) { do_oop_work(p); } 2252 2253 template <class T> void do_oop_work(T* p) { 2254 if (!_cm->has_overflown()) { 2255 oop obj = oopDesc::load_decode_heap_oop(p); 2256 if (_cm->verbose_high()) { 2257 gclog_or_tty->print_cr("\t[%u] we're looking at location " 2258 "*"PTR_FORMAT" = "PTR_FORMAT, 2259 _task->worker_id(), p, (void*) obj); 2260 } 2261 2262 _task->deal_with_reference(obj); 2263 _ref_counter--; 2264 2265 if (_ref_counter == 0) { 2266 // We have dealt with _ref_counter_limit references, pushing them 2267 // and objects reachable from them on to the local stack (and 2268 // possibly the global stack). Call CMTask::do_marking_step() to 2269 // process these entries. 2270 // 2271 // We call CMTask::do_marking_step() in a loop, which we'll exit if 2272 // there's nothing more to do (i.e. we're done with the entries that 2273 // were pushed as a result of the CMTask::deal_with_reference() calls 2274 // above) or we overflow. 2275 // 2276 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2277 // flag while there may still be some work to do. (See the comment at 2278 // the beginning of CMTask::do_marking_step() for those conditions - 2279 // one of which is reaching the specified time target.) It is only 2280 // when CMTask::do_marking_step() returns without setting the 2281 // has_aborted() flag that the marking step has completed. 2282 do { 2283 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2284 _task->do_marking_step(mark_step_duration_ms, 2285 false /* do_termination */, 2286 _is_serial); 2287 } while (_task->has_aborted() && !_cm->has_overflown()); 2288 _ref_counter = _ref_counter_limit; 2289 } 2290 } else { 2291 if (_cm->verbose_high()) { 2292 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id()); 2293 } 2294 } 2295 } 2296 }; 2297 2298 // 'Drain' oop closure used by both serial and parallel reference processing. 2299 // Uses the CMTask associated with a given worker thread (for serial 2300 // reference processing the CMtask for worker 0 is used). Calls the 2301 // do_marking_step routine, with an unbelievably large timeout value, 2302 // to drain the marking data structures of the remaining entries 2303 // added by the 'keep alive' oop closure above. 2304 2305 class G1CMDrainMarkingStackClosure: public VoidClosure { 2306 ConcurrentMark* _cm; 2307 CMTask* _task; 2308 bool _is_serial; 2309 public: 2310 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2311 _cm(cm), _task(task), _is_serial(is_serial) { 2312 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2313 } 2314 2315 void do_void() { 2316 do { 2317 if (_cm->verbose_high()) { 2318 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s", 2319 _task->worker_id(), BOOL_TO_STR(_is_serial)); 2320 } 2321 2322 // We call CMTask::do_marking_step() to completely drain the local 2323 // and global marking stacks of entries pushed by the 'keep alive' 2324 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2325 // 2326 // CMTask::do_marking_step() is called in a loop, which we'll exit 2327 // if there's nothing more to do (i.e. we've completely drained the 2328 // entries that were pushed as a a result of applying the 'keep alive' 2329 // closure to the entries on the discovered ref lists) or we overflow 2330 // the global marking stack. 2331 // 2332 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2333 // flag while there may still be some work to do. (See the comment at 2334 // the beginning of CMTask::do_marking_step() for those conditions - 2335 // one of which is reaching the specified time target.) It is only 2336 // when CMTask::do_marking_step() returns without setting the 2337 // has_aborted() flag that the marking step has completed. 2338 2339 _task->do_marking_step(1000000000.0 /* something very large */, 2340 true /* do_termination */, 2341 _is_serial); 2342 } while (_task->has_aborted() && !_cm->has_overflown()); 2343 } 2344 }; 2345 2346 // Implementation of AbstractRefProcTaskExecutor for parallel 2347 // reference processing at the end of G1 concurrent marking 2348 2349 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2350 private: 2351 G1CollectedHeap* _g1h; 2352 ConcurrentMark* _cm; 2353 WorkGang* _workers; 2354 int _active_workers; 2355 2356 public: 2357 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2358 ConcurrentMark* cm, 2359 WorkGang* workers, 2360 int n_workers) : 2361 _g1h(g1h), _cm(cm), 2362 _workers(workers), _active_workers(n_workers) { } 2363 2364 // Executes the given task using concurrent marking worker threads. 2365 virtual void execute(ProcessTask& task); 2366 virtual void execute(EnqueueTask& task); 2367 }; 2368 2369 class G1CMRefProcTaskProxy: public AbstractGangTask { 2370 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2371 ProcessTask& _proc_task; 2372 G1CollectedHeap* _g1h; 2373 ConcurrentMark* _cm; 2374 2375 public: 2376 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2377 G1CollectedHeap* g1h, 2378 ConcurrentMark* cm) : 2379 AbstractGangTask("Process reference objects in parallel"), 2380 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2381 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2382 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2383 } 2384 2385 virtual void work(uint worker_id) { 2386 CMTask* task = _cm->task(worker_id); 2387 G1CMIsAliveClosure g1_is_alive(_g1h); 2388 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2389 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2390 2391 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2392 } 2393 }; 2394 2395 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2396 assert(_workers != NULL, "Need parallel worker threads."); 2397 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2398 2399 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2400 2401 // We need to reset the concurrency level before each 2402 // proxy task execution, so that the termination protocol 2403 // and overflow handling in CMTask::do_marking_step() knows 2404 // how many workers to wait for. 2405 _cm->set_concurrency(_active_workers); 2406 _g1h->set_par_threads(_active_workers); 2407 _workers->run_task(&proc_task_proxy); 2408 _g1h->set_par_threads(0); 2409 } 2410 2411 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2412 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2413 EnqueueTask& _enq_task; 2414 2415 public: 2416 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2417 AbstractGangTask("Enqueue reference objects in parallel"), 2418 _enq_task(enq_task) { } 2419 2420 virtual void work(uint worker_id) { 2421 _enq_task.work(worker_id); 2422 } 2423 }; 2424 2425 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2426 assert(_workers != NULL, "Need parallel worker threads."); 2427 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2428 2429 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2430 2431 // Not strictly necessary but... 2432 // 2433 // We need to reset the concurrency level before each 2434 // proxy task execution, so that the termination protocol 2435 // and overflow handling in CMTask::do_marking_step() knows 2436 // how many workers to wait for. 2437 _cm->set_concurrency(_active_workers); 2438 _g1h->set_par_threads(_active_workers); 2439 _workers->run_task(&enq_task_proxy); 2440 _g1h->set_par_threads(0); 2441 } 2442 2443 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2444 if (has_overflown()) { 2445 // Skip processing the discovered references if we have 2446 // overflown the global marking stack. Reference objects 2447 // only get discovered once so it is OK to not 2448 // de-populate the discovered reference lists. We could have, 2449 // but the only benefit would be that, when marking restarts, 2450 // less reference objects are discovered. 2451 return; 2452 } 2453 2454 ResourceMark rm; 2455 HandleMark hm; 2456 2457 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2458 2459 // Is alive closure. 2460 G1CMIsAliveClosure g1_is_alive(g1h); 2461 2462 // Inner scope to exclude the cleaning of the string and symbol 2463 // tables from the displayed time. 2464 { 2465 if (G1Log::finer()) { 2466 gclog_or_tty->put(' '); 2467 } 2468 GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm()); 2469 2470 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2471 2472 // See the comment in G1CollectedHeap::ref_processing_init() 2473 // about how reference processing currently works in G1. 2474 2475 // Set the soft reference policy 2476 rp->setup_policy(clear_all_soft_refs); 2477 assert(_markStack.isEmpty(), "mark stack should be empty"); 2478 2479 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2480 // in serial reference processing. Note these closures are also 2481 // used for serially processing (by the the current thread) the 2482 // JNI references during parallel reference processing. 2483 // 2484 // These closures do not need to synchronize with the worker 2485 // threads involved in parallel reference processing as these 2486 // instances are executed serially by the current thread (e.g. 2487 // reference processing is not multi-threaded and is thus 2488 // performed by the current thread instead of a gang worker). 2489 // 2490 // The gang tasks involved in parallel reference processing create 2491 // their own instances of these closures, which do their own 2492 // synchronization among themselves. 2493 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2494 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2495 2496 // We need at least one active thread. If reference processing 2497 // is not multi-threaded we use the current (VMThread) thread, 2498 // otherwise we use the work gang from the G1CollectedHeap and 2499 // we utilize all the worker threads we can. 2500 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL; 2501 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2502 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2503 2504 // Parallel processing task executor. 2505 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2506 g1h->workers(), active_workers); 2507 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2508 2509 // Set the concurrency level. The phase was already set prior to 2510 // executing the remark task. 2511 set_concurrency(active_workers); 2512 2513 // Set the degree of MT processing here. If the discovery was done MT, 2514 // the number of threads involved during discovery could differ from 2515 // the number of active workers. This is OK as long as the discovered 2516 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2517 rp->set_active_mt_degree(active_workers); 2518 2519 // Process the weak references. 2520 const ReferenceProcessorStats& stats = 2521 rp->process_discovered_references(&g1_is_alive, 2522 &g1_keep_alive, 2523 &g1_drain_mark_stack, 2524 executor, 2525 g1h->gc_timer_cm()); 2526 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2527 2528 // The do_oop work routines of the keep_alive and drain_marking_stack 2529 // oop closures will set the has_overflown flag if we overflow the 2530 // global marking stack. 2531 2532 assert(_markStack.overflow() || _markStack.isEmpty(), 2533 "mark stack should be empty (unless it overflowed)"); 2534 2535 if (_markStack.overflow()) { 2536 // This should have been done already when we tried to push an 2537 // entry on to the global mark stack. But let's do it again. 2538 set_has_overflown(); 2539 } 2540 2541 assert(rp->num_q() == active_workers, "why not"); 2542 2543 rp->enqueue_discovered_references(executor); 2544 2545 rp->verify_no_references_recorded(); 2546 assert(!rp->discovery_enabled(), "Post condition"); 2547 } 2548 2549 if (has_overflown()) { 2550 // We can not trust g1_is_alive if the marking stack overflowed 2551 return; 2552 } 2553 2554 g1h->unlink_string_and_symbol_table(&g1_is_alive, 2555 /* process_strings */ false, // currently strings are always roots 2556 /* process_symbols */ true); 2557 } 2558 2559 void ConcurrentMark::swapMarkBitMaps() { 2560 CMBitMapRO* temp = _prevMarkBitMap; 2561 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2562 _nextMarkBitMap = (CMBitMap*) temp; 2563 } 2564 2565 class CMRemarkTask: public AbstractGangTask { 2566 private: 2567 ConcurrentMark* _cm; 2568 bool _is_serial; 2569 public: 2570 void work(uint worker_id) { 2571 // Since all available tasks are actually started, we should 2572 // only proceed if we're supposed to be active. 2573 if (worker_id < _cm->active_tasks()) { 2574 CMTask* task = _cm->task(worker_id); 2575 task->record_start_time(); 2576 do { 2577 task->do_marking_step(1000000000.0 /* something very large */, 2578 true /* do_termination */, 2579 _is_serial); 2580 } while (task->has_aborted() && !_cm->has_overflown()); 2581 // If we overflow, then we do not want to restart. We instead 2582 // want to abort remark and do concurrent marking again. 2583 task->record_end_time(); 2584 } 2585 } 2586 2587 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) : 2588 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) { 2589 _cm->terminator()->reset_for_reuse(active_workers); 2590 } 2591 }; 2592 2593 void ConcurrentMark::checkpointRootsFinalWork() { 2594 ResourceMark rm; 2595 HandleMark hm; 2596 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2597 2598 g1h->ensure_parsability(false); 2599 2600 if (G1CollectedHeap::use_parallel_gc_threads()) { 2601 G1CollectedHeap::StrongRootsScope srs(g1h); 2602 // this is remark, so we'll use up all active threads 2603 uint active_workers = g1h->workers()->active_workers(); 2604 if (active_workers == 0) { 2605 assert(active_workers > 0, "Should have been set earlier"); 2606 active_workers = (uint) ParallelGCThreads; 2607 g1h->workers()->set_active_workers(active_workers); 2608 } 2609 set_concurrency_and_phase(active_workers, false /* concurrent */); 2610 // Leave _parallel_marking_threads at it's 2611 // value originally calculated in the ConcurrentMark 2612 // constructor and pass values of the active workers 2613 // through the gang in the task. 2614 2615 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */); 2616 // We will start all available threads, even if we decide that the 2617 // active_workers will be fewer. The extra ones will just bail out 2618 // immediately. 2619 g1h->set_par_threads(active_workers); 2620 g1h->workers()->run_task(&remarkTask); 2621 g1h->set_par_threads(0); 2622 } else { 2623 G1CollectedHeap::StrongRootsScope srs(g1h); 2624 uint active_workers = 1; 2625 set_concurrency_and_phase(active_workers, false /* concurrent */); 2626 2627 // Note - if there's no work gang then the VMThread will be 2628 // the thread to execute the remark - serially. We have 2629 // to pass true for the is_serial parameter so that 2630 // CMTask::do_marking_step() doesn't enter the sync 2631 // barriers in the event of an overflow. Doing so will 2632 // cause an assert that the current thread is not a 2633 // concurrent GC thread. 2634 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/); 2635 remarkTask.work(0); 2636 } 2637 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2638 guarantee(has_overflown() || 2639 satb_mq_set.completed_buffers_num() == 0, 2640 err_msg("Invariant: has_overflown = %s, num buffers = %d", 2641 BOOL_TO_STR(has_overflown()), 2642 satb_mq_set.completed_buffers_num())); 2643 2644 print_stats(); 2645 } 2646 2647 #ifndef PRODUCT 2648 2649 class PrintReachableOopClosure: public OopClosure { 2650 private: 2651 G1CollectedHeap* _g1h; 2652 outputStream* _out; 2653 VerifyOption _vo; 2654 bool _all; 2655 2656 public: 2657 PrintReachableOopClosure(outputStream* out, 2658 VerifyOption vo, 2659 bool all) : 2660 _g1h(G1CollectedHeap::heap()), 2661 _out(out), _vo(vo), _all(all) { } 2662 2663 void do_oop(narrowOop* p) { do_oop_work(p); } 2664 void do_oop( oop* p) { do_oop_work(p); } 2665 2666 template <class T> void do_oop_work(T* p) { 2667 oop obj = oopDesc::load_decode_heap_oop(p); 2668 const char* str = NULL; 2669 const char* str2 = ""; 2670 2671 if (obj == NULL) { 2672 str = ""; 2673 } else if (!_g1h->is_in_g1_reserved(obj)) { 2674 str = " O"; 2675 } else { 2676 HeapRegion* hr = _g1h->heap_region_containing(obj); 2677 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2678 bool marked = _g1h->is_marked(obj, _vo); 2679 2680 if (over_tams) { 2681 str = " >"; 2682 if (marked) { 2683 str2 = " AND MARKED"; 2684 } 2685 } else if (marked) { 2686 str = " M"; 2687 } else { 2688 str = " NOT"; 2689 } 2690 } 2691 2692 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2693 p, (void*) obj, str, str2); 2694 } 2695 }; 2696 2697 class PrintReachableObjectClosure : public ObjectClosure { 2698 private: 2699 G1CollectedHeap* _g1h; 2700 outputStream* _out; 2701 VerifyOption _vo; 2702 bool _all; 2703 HeapRegion* _hr; 2704 2705 public: 2706 PrintReachableObjectClosure(outputStream* out, 2707 VerifyOption vo, 2708 bool all, 2709 HeapRegion* hr) : 2710 _g1h(G1CollectedHeap::heap()), 2711 _out(out), _vo(vo), _all(all), _hr(hr) { } 2712 2713 void do_object(oop o) { 2714 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2715 bool marked = _g1h->is_marked(o, _vo); 2716 bool print_it = _all || over_tams || marked; 2717 2718 if (print_it) { 2719 _out->print_cr(" "PTR_FORMAT"%s", 2720 (void *)o, (over_tams) ? " >" : (marked) ? " M" : ""); 2721 PrintReachableOopClosure oopCl(_out, _vo, _all); 2722 o->oop_iterate_no_header(&oopCl); 2723 } 2724 } 2725 }; 2726 2727 class PrintReachableRegionClosure : public HeapRegionClosure { 2728 private: 2729 G1CollectedHeap* _g1h; 2730 outputStream* _out; 2731 VerifyOption _vo; 2732 bool _all; 2733 2734 public: 2735 bool doHeapRegion(HeapRegion* hr) { 2736 HeapWord* b = hr->bottom(); 2737 HeapWord* e = hr->end(); 2738 HeapWord* t = hr->top(); 2739 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2740 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2741 "TAMS: "PTR_FORMAT, b, e, t, p); 2742 _out->cr(); 2743 2744 HeapWord* from = b; 2745 HeapWord* to = t; 2746 2747 if (to > from) { 2748 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2749 _out->cr(); 2750 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2751 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2752 _out->cr(); 2753 } 2754 2755 return false; 2756 } 2757 2758 PrintReachableRegionClosure(outputStream* out, 2759 VerifyOption vo, 2760 bool all) : 2761 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2762 }; 2763 2764 void ConcurrentMark::print_reachable(const char* str, 2765 VerifyOption vo, 2766 bool all) { 2767 gclog_or_tty->cr(); 2768 gclog_or_tty->print_cr("== Doing heap dump... "); 2769 2770 if (G1PrintReachableBaseFile == NULL) { 2771 gclog_or_tty->print_cr(" #### error: no base file defined"); 2772 return; 2773 } 2774 2775 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2776 (JVM_MAXPATHLEN - 1)) { 2777 gclog_or_tty->print_cr(" #### error: file name too long"); 2778 return; 2779 } 2780 2781 char file_name[JVM_MAXPATHLEN]; 2782 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2783 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2784 2785 fileStream fout(file_name); 2786 if (!fout.is_open()) { 2787 gclog_or_tty->print_cr(" #### error: could not open file"); 2788 return; 2789 } 2790 2791 outputStream* out = &fout; 2792 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2793 out->cr(); 2794 2795 out->print_cr("--- ITERATING OVER REGIONS"); 2796 out->cr(); 2797 PrintReachableRegionClosure rcl(out, vo, all); 2798 _g1h->heap_region_iterate(&rcl); 2799 out->cr(); 2800 2801 gclog_or_tty->print_cr(" done"); 2802 gclog_or_tty->flush(); 2803 } 2804 2805 #endif // PRODUCT 2806 2807 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2808 // Note we are overriding the read-only view of the prev map here, via 2809 // the cast. 2810 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2811 } 2812 2813 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2814 _nextMarkBitMap->clearRange(mr); 2815 } 2816 2817 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2818 clearRangePrevBitmap(mr); 2819 clearRangeNextBitmap(mr); 2820 } 2821 2822 HeapRegion* 2823 ConcurrentMark::claim_region(uint worker_id) { 2824 // "checkpoint" the finger 2825 HeapWord* finger = _finger; 2826 2827 // _heap_end will not change underneath our feet; it only changes at 2828 // yield points. 2829 while (finger < _heap_end) { 2830 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2831 2832 // Note on how this code handles humongous regions. In the 2833 // normal case the finger will reach the start of a "starts 2834 // humongous" (SH) region. Its end will either be the end of the 2835 // last "continues humongous" (CH) region in the sequence, or the 2836 // standard end of the SH region (if the SH is the only region in 2837 // the sequence). That way claim_region() will skip over the CH 2838 // regions. However, there is a subtle race between a CM thread 2839 // executing this method and a mutator thread doing a humongous 2840 // object allocation. The two are not mutually exclusive as the CM 2841 // thread does not need to hold the Heap_lock when it gets 2842 // here. So there is a chance that claim_region() will come across 2843 // a free region that's in the progress of becoming a SH or a CH 2844 // region. In the former case, it will either 2845 // a) Miss the update to the region's end, in which case it will 2846 // visit every subsequent CH region, will find their bitmaps 2847 // empty, and do nothing, or 2848 // b) Will observe the update of the region's end (in which case 2849 // it will skip the subsequent CH regions). 2850 // If it comes across a region that suddenly becomes CH, the 2851 // scenario will be similar to b). So, the race between 2852 // claim_region() and a humongous object allocation might force us 2853 // to do a bit of unnecessary work (due to some unnecessary bitmap 2854 // iterations) but it should not introduce and correctness issues. 2855 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2856 HeapWord* bottom = curr_region->bottom(); 2857 HeapWord* end = curr_region->end(); 2858 HeapWord* limit = curr_region->next_top_at_mark_start(); 2859 2860 if (verbose_low()) { 2861 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" " 2862 "["PTR_FORMAT", "PTR_FORMAT"), " 2863 "limit = "PTR_FORMAT, 2864 worker_id, curr_region, bottom, end, limit); 2865 } 2866 2867 // Is the gap between reading the finger and doing the CAS too long? 2868 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2869 if (res == finger) { 2870 // we succeeded 2871 2872 // notice that _finger == end cannot be guaranteed here since, 2873 // someone else might have moved the finger even further 2874 assert(_finger >= end, "the finger should have moved forward"); 2875 2876 if (verbose_low()) { 2877 gclog_or_tty->print_cr("[%u] we were successful with region = " 2878 PTR_FORMAT, worker_id, curr_region); 2879 } 2880 2881 if (limit > bottom) { 2882 if (verbose_low()) { 2883 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, " 2884 "returning it ", worker_id, curr_region); 2885 } 2886 return curr_region; 2887 } else { 2888 assert(limit == bottom, 2889 "the region limit should be at bottom"); 2890 if (verbose_low()) { 2891 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, " 2892 "returning NULL", worker_id, curr_region); 2893 } 2894 // we return NULL and the caller should try calling 2895 // claim_region() again. 2896 return NULL; 2897 } 2898 } else { 2899 assert(_finger > finger, "the finger should have moved forward"); 2900 if (verbose_low()) { 2901 gclog_or_tty->print_cr("[%u] somebody else moved the finger, " 2902 "global finger = "PTR_FORMAT", " 2903 "our finger = "PTR_FORMAT, 2904 worker_id, _finger, finger); 2905 } 2906 2907 // read it again 2908 finger = _finger; 2909 } 2910 } 2911 2912 return NULL; 2913 } 2914 2915 #ifndef PRODUCT 2916 enum VerifyNoCSetOopsPhase { 2917 VerifyNoCSetOopsStack, 2918 VerifyNoCSetOopsQueues, 2919 VerifyNoCSetOopsSATBCompleted, 2920 VerifyNoCSetOopsSATBThread 2921 }; 2922 2923 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 2924 private: 2925 G1CollectedHeap* _g1h; 2926 VerifyNoCSetOopsPhase _phase; 2927 int _info; 2928 2929 const char* phase_str() { 2930 switch (_phase) { 2931 case VerifyNoCSetOopsStack: return "Stack"; 2932 case VerifyNoCSetOopsQueues: return "Queue"; 2933 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 2934 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 2935 default: ShouldNotReachHere(); 2936 } 2937 return NULL; 2938 } 2939 2940 void do_object_work(oop obj) { 2941 guarantee(!_g1h->obj_in_cs(obj), 2942 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 2943 (void*) obj, phase_str(), _info)); 2944 } 2945 2946 public: 2947 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 2948 2949 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 2950 _phase = phase; 2951 _info = info; 2952 } 2953 2954 virtual void do_oop(oop* p) { 2955 oop obj = oopDesc::load_decode_heap_oop(p); 2956 do_object_work(obj); 2957 } 2958 2959 virtual void do_oop(narrowOop* p) { 2960 // We should not come across narrow oops while scanning marking 2961 // stacks and SATB buffers. 2962 ShouldNotReachHere(); 2963 } 2964 2965 virtual void do_object(oop obj) { 2966 do_object_work(obj); 2967 } 2968 }; 2969 2970 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 2971 bool verify_enqueued_buffers, 2972 bool verify_thread_buffers, 2973 bool verify_fingers) { 2974 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2975 if (!G1CollectedHeap::heap()->mark_in_progress()) { 2976 return; 2977 } 2978 2979 VerifyNoCSetOopsClosure cl; 2980 2981 if (verify_stacks) { 2982 // Verify entries on the global mark stack 2983 cl.set_phase(VerifyNoCSetOopsStack); 2984 _markStack.oops_do(&cl); 2985 2986 // Verify entries on the task queues 2987 for (uint i = 0; i < _max_worker_id; i += 1) { 2988 cl.set_phase(VerifyNoCSetOopsQueues, i); 2989 CMTaskQueue* queue = _task_queues->queue(i); 2990 queue->oops_do(&cl); 2991 } 2992 } 2993 2994 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 2995 2996 // Verify entries on the enqueued SATB buffers 2997 if (verify_enqueued_buffers) { 2998 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 2999 satb_qs.iterate_completed_buffers_read_only(&cl); 3000 } 3001 3002 // Verify entries on the per-thread SATB buffers 3003 if (verify_thread_buffers) { 3004 cl.set_phase(VerifyNoCSetOopsSATBThread); 3005 satb_qs.iterate_thread_buffers_read_only(&cl); 3006 } 3007 3008 if (verify_fingers) { 3009 // Verify the global finger 3010 HeapWord* global_finger = finger(); 3011 if (global_finger != NULL && global_finger < _heap_end) { 3012 // The global finger always points to a heap region boundary. We 3013 // use heap_region_containing_raw() to get the containing region 3014 // given that the global finger could be pointing to a free region 3015 // which subsequently becomes continues humongous. If that 3016 // happens, heap_region_containing() will return the bottom of the 3017 // corresponding starts humongous region and the check below will 3018 // not hold any more. 3019 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 3020 guarantee(global_finger == global_hr->bottom(), 3021 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 3022 global_finger, HR_FORMAT_PARAMS(global_hr))); 3023 } 3024 3025 // Verify the task fingers 3026 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 3027 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 3028 CMTask* task = _tasks[i]; 3029 HeapWord* task_finger = task->finger(); 3030 if (task_finger != NULL && task_finger < _heap_end) { 3031 // See above note on the global finger verification. 3032 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 3033 guarantee(task_finger == task_hr->bottom() || 3034 !task_hr->in_collection_set(), 3035 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 3036 task_finger, HR_FORMAT_PARAMS(task_hr))); 3037 } 3038 } 3039 } 3040 } 3041 #endif // PRODUCT 3042 3043 // Aggregate the counting data that was constructed concurrently 3044 // with marking. 3045 class AggregateCountDataHRClosure: public HeapRegionClosure { 3046 G1CollectedHeap* _g1h; 3047 ConcurrentMark* _cm; 3048 CardTableModRefBS* _ct_bs; 3049 BitMap* _cm_card_bm; 3050 uint _max_worker_id; 3051 3052 public: 3053 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 3054 BitMap* cm_card_bm, 3055 uint max_worker_id) : 3056 _g1h(g1h), _cm(g1h->concurrent_mark()), 3057 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 3058 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 3059 3060 bool doHeapRegion(HeapRegion* hr) { 3061 if (hr->continuesHumongous()) { 3062 // We will ignore these here and process them when their 3063 // associated "starts humongous" region is processed. 3064 // Note that we cannot rely on their associated 3065 // "starts humongous" region to have their bit set to 1 3066 // since, due to the region chunking in the parallel region 3067 // iteration, a "continues humongous" region might be visited 3068 // before its associated "starts humongous". 3069 return false; 3070 } 3071 3072 HeapWord* start = hr->bottom(); 3073 HeapWord* limit = hr->next_top_at_mark_start(); 3074 HeapWord* end = hr->end(); 3075 3076 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 3077 err_msg("Preconditions not met - " 3078 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 3079 "top: "PTR_FORMAT", end: "PTR_FORMAT, 3080 start, limit, hr->top(), hr->end())); 3081 3082 assert(hr->next_marked_bytes() == 0, "Precondition"); 3083 3084 if (start == limit) { 3085 // NTAMS of this region has not been set so nothing to do. 3086 return false; 3087 } 3088 3089 // 'start' should be in the heap. 3090 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 3091 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 3092 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 3093 3094 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 3095 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 3096 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 3097 3098 // If ntams is not card aligned then we bump card bitmap index 3099 // for limit so that we get the all the cards spanned by 3100 // the object ending at ntams. 3101 // Note: if this is the last region in the heap then ntams 3102 // could be actually just beyond the end of the the heap; 3103 // limit_idx will then correspond to a (non-existent) card 3104 // that is also outside the heap. 3105 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 3106 limit_idx += 1; 3107 } 3108 3109 assert(limit_idx <= end_idx, "or else use atomics"); 3110 3111 // Aggregate the "stripe" in the count data associated with hr. 3112 uint hrs_index = hr->hrs_index(); 3113 size_t marked_bytes = 0; 3114 3115 for (uint i = 0; i < _max_worker_id; i += 1) { 3116 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 3117 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 3118 3119 // Fetch the marked_bytes in this region for task i and 3120 // add it to the running total for this region. 3121 marked_bytes += marked_bytes_array[hrs_index]; 3122 3123 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 3124 // into the global card bitmap. 3125 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 3126 3127 while (scan_idx < limit_idx) { 3128 assert(task_card_bm->at(scan_idx) == true, "should be"); 3129 _cm_card_bm->set_bit(scan_idx); 3130 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 3131 3132 // BitMap::get_next_one_offset() can handle the case when 3133 // its left_offset parameter is greater than its right_offset 3134 // parameter. It does, however, have an early exit if 3135 // left_offset == right_offset. So let's limit the value 3136 // passed in for left offset here. 3137 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 3138 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 3139 } 3140 } 3141 3142 // Update the marked bytes for this region. 3143 hr->add_to_marked_bytes(marked_bytes); 3144 3145 // Next heap region 3146 return false; 3147 } 3148 }; 3149 3150 class G1AggregateCountDataTask: public AbstractGangTask { 3151 protected: 3152 G1CollectedHeap* _g1h; 3153 ConcurrentMark* _cm; 3154 BitMap* _cm_card_bm; 3155 uint _max_worker_id; 3156 int _active_workers; 3157 3158 public: 3159 G1AggregateCountDataTask(G1CollectedHeap* g1h, 3160 ConcurrentMark* cm, 3161 BitMap* cm_card_bm, 3162 uint max_worker_id, 3163 int n_workers) : 3164 AbstractGangTask("Count Aggregation"), 3165 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 3166 _max_worker_id(max_worker_id), 3167 _active_workers(n_workers) { } 3168 3169 void work(uint worker_id) { 3170 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 3171 3172 if (G1CollectedHeap::use_parallel_gc_threads()) { 3173 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 3174 _active_workers, 3175 HeapRegion::AggregateCountClaimValue); 3176 } else { 3177 _g1h->heap_region_iterate(&cl); 3178 } 3179 } 3180 }; 3181 3182 3183 void ConcurrentMark::aggregate_count_data() { 3184 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3185 _g1h->workers()->active_workers() : 3186 1); 3187 3188 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3189 _max_worker_id, n_workers); 3190 3191 if (G1CollectedHeap::use_parallel_gc_threads()) { 3192 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3193 "sanity check"); 3194 _g1h->set_par_threads(n_workers); 3195 _g1h->workers()->run_task(&g1_par_agg_task); 3196 _g1h->set_par_threads(0); 3197 3198 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3199 "sanity check"); 3200 _g1h->reset_heap_region_claim_values(); 3201 } else { 3202 g1_par_agg_task.work(0); 3203 } 3204 } 3205 3206 // Clear the per-worker arrays used to store the per-region counting data 3207 void ConcurrentMark::clear_all_count_data() { 3208 // Clear the global card bitmap - it will be filled during 3209 // liveness count aggregation (during remark) and the 3210 // final counting task. 3211 _card_bm.clear(); 3212 3213 // Clear the global region bitmap - it will be filled as part 3214 // of the final counting task. 3215 _region_bm.clear(); 3216 3217 uint max_regions = _g1h->max_regions(); 3218 assert(_max_worker_id > 0, "uninitialized"); 3219 3220 for (uint i = 0; i < _max_worker_id; i += 1) { 3221 BitMap* task_card_bm = count_card_bitmap_for(i); 3222 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3223 3224 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3225 assert(marked_bytes_array != NULL, "uninitialized"); 3226 3227 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3228 task_card_bm->clear(); 3229 } 3230 } 3231 3232 void ConcurrentMark::print_stats() { 3233 if (verbose_stats()) { 3234 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3235 for (size_t i = 0; i < _active_tasks; ++i) { 3236 _tasks[i]->print_stats(); 3237 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3238 } 3239 } 3240 } 3241 3242 // abandon current marking iteration due to a Full GC 3243 void ConcurrentMark::abort() { 3244 // Clear all marks to force marking thread to do nothing 3245 _nextMarkBitMap->clearAll(); 3246 3247 // Note we cannot clear the previous marking bitmap here 3248 // since VerifyDuringGC verifies the objects marked during 3249 // a full GC against the previous bitmap. 3250 3251 // Clear the liveness counting data 3252 clear_all_count_data(); 3253 // Empty mark stack 3254 reset_marking_state(); 3255 for (uint i = 0; i < _max_worker_id; ++i) { 3256 _tasks[i]->clear_region_fields(); 3257 } 3258 _first_overflow_barrier_sync.abort(); 3259 _second_overflow_barrier_sync.abort(); 3260 _has_aborted = true; 3261 3262 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3263 satb_mq_set.abandon_partial_marking(); 3264 // This can be called either during or outside marking, we'll read 3265 // the expected_active value from the SATB queue set. 3266 satb_mq_set.set_active_all_threads( 3267 false, /* new active value */ 3268 satb_mq_set.is_active() /* expected_active */); 3269 3270 _g1h->trace_heap_after_concurrent_cycle(); 3271 _g1h->register_concurrent_cycle_end(); 3272 } 3273 3274 static void print_ms_time_info(const char* prefix, const char* name, 3275 NumberSeq& ns) { 3276 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3277 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3278 if (ns.num() > 0) { 3279 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3280 prefix, ns.sd(), ns.maximum()); 3281 } 3282 } 3283 3284 void ConcurrentMark::print_summary_info() { 3285 gclog_or_tty->print_cr(" Concurrent marking:"); 3286 print_ms_time_info(" ", "init marks", _init_times); 3287 print_ms_time_info(" ", "remarks", _remark_times); 3288 { 3289 print_ms_time_info(" ", "final marks", _remark_mark_times); 3290 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3291 3292 } 3293 print_ms_time_info(" ", "cleanups", _cleanup_times); 3294 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3295 _total_counting_time, 3296 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3297 (double)_cleanup_times.num() 3298 : 0.0)); 3299 if (G1ScrubRemSets) { 3300 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3301 _total_rs_scrub_time, 3302 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3303 (double)_cleanup_times.num() 3304 : 0.0)); 3305 } 3306 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3307 (_init_times.sum() + _remark_times.sum() + 3308 _cleanup_times.sum())/1000.0); 3309 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3310 "(%8.2f s marking).", 3311 cmThread()->vtime_accum(), 3312 cmThread()->vtime_mark_accum()); 3313 } 3314 3315 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3316 if (use_parallel_marking_threads()) { 3317 _parallel_workers->print_worker_threads_on(st); 3318 } 3319 } 3320 3321 void ConcurrentMark::print_on_error(outputStream* st) const { 3322 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 3323 _prevMarkBitMap, _nextMarkBitMap); 3324 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 3325 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 3326 } 3327 3328 // We take a break if someone is trying to stop the world. 3329 bool ConcurrentMark::do_yield_check(uint worker_id) { 3330 if (SuspendibleThreadSet::should_yield()) { 3331 if (worker_id == 0) { 3332 _g1h->g1_policy()->record_concurrent_pause(); 3333 } 3334 SuspendibleThreadSet::yield(); 3335 return true; 3336 } else { 3337 return false; 3338 } 3339 } 3340 3341 bool ConcurrentMark::containing_card_is_marked(void* p) { 3342 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3343 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3344 } 3345 3346 bool ConcurrentMark::containing_cards_are_marked(void* start, 3347 void* last) { 3348 return containing_card_is_marked(start) && 3349 containing_card_is_marked(last); 3350 } 3351 3352 #ifndef PRODUCT 3353 // for debugging purposes 3354 void ConcurrentMark::print_finger() { 3355 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3356 _heap_start, _heap_end, _finger); 3357 for (uint i = 0; i < _max_worker_id; ++i) { 3358 gclog_or_tty->print(" %u: "PTR_FORMAT, i, _tasks[i]->finger()); 3359 } 3360 gclog_or_tty->print_cr(""); 3361 } 3362 #endif 3363 3364 void CMTask::scan_object(oop obj) { 3365 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3366 3367 if (_cm->verbose_high()) { 3368 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT, 3369 _worker_id, (void*) obj); 3370 } 3371 3372 size_t obj_size = obj->size(); 3373 _words_scanned += obj_size; 3374 3375 obj->oop_iterate(_cm_oop_closure); 3376 statsOnly( ++_objs_scanned ); 3377 check_limits(); 3378 } 3379 3380 // Closure for iteration over bitmaps 3381 class CMBitMapClosure : public BitMapClosure { 3382 private: 3383 // the bitmap that is being iterated over 3384 CMBitMap* _nextMarkBitMap; 3385 ConcurrentMark* _cm; 3386 CMTask* _task; 3387 3388 public: 3389 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3390 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3391 3392 bool do_bit(size_t offset) { 3393 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3394 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3395 assert( addr < _cm->finger(), "invariant"); 3396 3397 statsOnly( _task->increase_objs_found_on_bitmap() ); 3398 assert(addr >= _task->finger(), "invariant"); 3399 3400 // We move that task's local finger along. 3401 _task->move_finger_to(addr); 3402 3403 _task->scan_object(oop(addr)); 3404 // we only partially drain the local queue and global stack 3405 _task->drain_local_queue(true); 3406 _task->drain_global_stack(true); 3407 3408 // if the has_aborted flag has been raised, we need to bail out of 3409 // the iteration 3410 return !_task->has_aborted(); 3411 } 3412 }; 3413 3414 // Closure for iterating over objects, currently only used for 3415 // processing SATB buffers. 3416 class CMObjectClosure : public ObjectClosure { 3417 private: 3418 CMTask* _task; 3419 3420 public: 3421 void do_object(oop obj) { 3422 _task->deal_with_reference(obj); 3423 } 3424 3425 CMObjectClosure(CMTask* task) : _task(task) { } 3426 }; 3427 3428 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3429 ConcurrentMark* cm, 3430 CMTask* task) 3431 : _g1h(g1h), _cm(cm), _task(task) { 3432 assert(_ref_processor == NULL, "should be initialized to NULL"); 3433 3434 if (G1UseConcMarkReferenceProcessing) { 3435 _ref_processor = g1h->ref_processor_cm(); 3436 assert(_ref_processor != NULL, "should not be NULL"); 3437 } 3438 } 3439 3440 void CMTask::setup_for_region(HeapRegion* hr) { 3441 assert(hr != NULL, 3442 "claim_region() should have filtered out NULL regions"); 3443 assert(!hr->continuesHumongous(), 3444 "claim_region() should have filtered out continues humongous regions"); 3445 3446 if (_cm->verbose_low()) { 3447 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT, 3448 _worker_id, hr); 3449 } 3450 3451 _curr_region = hr; 3452 _finger = hr->bottom(); 3453 update_region_limit(); 3454 } 3455 3456 void CMTask::update_region_limit() { 3457 HeapRegion* hr = _curr_region; 3458 HeapWord* bottom = hr->bottom(); 3459 HeapWord* limit = hr->next_top_at_mark_start(); 3460 3461 if (limit == bottom) { 3462 if (_cm->verbose_low()) { 3463 gclog_or_tty->print_cr("[%u] found an empty region " 3464 "["PTR_FORMAT", "PTR_FORMAT")", 3465 _worker_id, bottom, limit); 3466 } 3467 // The region was collected underneath our feet. 3468 // We set the finger to bottom to ensure that the bitmap 3469 // iteration that will follow this will not do anything. 3470 // (this is not a condition that holds when we set the region up, 3471 // as the region is not supposed to be empty in the first place) 3472 _finger = bottom; 3473 } else if (limit >= _region_limit) { 3474 assert(limit >= _finger, "peace of mind"); 3475 } else { 3476 assert(limit < _region_limit, "only way to get here"); 3477 // This can happen under some pretty unusual circumstances. An 3478 // evacuation pause empties the region underneath our feet (NTAMS 3479 // at bottom). We then do some allocation in the region (NTAMS 3480 // stays at bottom), followed by the region being used as a GC 3481 // alloc region (NTAMS will move to top() and the objects 3482 // originally below it will be grayed). All objects now marked in 3483 // the region are explicitly grayed, if below the global finger, 3484 // and we do not need in fact to scan anything else. So, we simply 3485 // set _finger to be limit to ensure that the bitmap iteration 3486 // doesn't do anything. 3487 _finger = limit; 3488 } 3489 3490 _region_limit = limit; 3491 } 3492 3493 void CMTask::giveup_current_region() { 3494 assert(_curr_region != NULL, "invariant"); 3495 if (_cm->verbose_low()) { 3496 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT, 3497 _worker_id, _curr_region); 3498 } 3499 clear_region_fields(); 3500 } 3501 3502 void CMTask::clear_region_fields() { 3503 // Values for these three fields that indicate that we're not 3504 // holding on to a region. 3505 _curr_region = NULL; 3506 _finger = NULL; 3507 _region_limit = NULL; 3508 } 3509 3510 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3511 if (cm_oop_closure == NULL) { 3512 assert(_cm_oop_closure != NULL, "invariant"); 3513 } else { 3514 assert(_cm_oop_closure == NULL, "invariant"); 3515 } 3516 _cm_oop_closure = cm_oop_closure; 3517 } 3518 3519 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3520 guarantee(nextMarkBitMap != NULL, "invariant"); 3521 3522 if (_cm->verbose_low()) { 3523 gclog_or_tty->print_cr("[%u] resetting", _worker_id); 3524 } 3525 3526 _nextMarkBitMap = nextMarkBitMap; 3527 clear_region_fields(); 3528 3529 _calls = 0; 3530 _elapsed_time_ms = 0.0; 3531 _termination_time_ms = 0.0; 3532 _termination_start_time_ms = 0.0; 3533 3534 #if _MARKING_STATS_ 3535 _local_pushes = 0; 3536 _local_pops = 0; 3537 _local_max_size = 0; 3538 _objs_scanned = 0; 3539 _global_pushes = 0; 3540 _global_pops = 0; 3541 _global_max_size = 0; 3542 _global_transfers_to = 0; 3543 _global_transfers_from = 0; 3544 _regions_claimed = 0; 3545 _objs_found_on_bitmap = 0; 3546 _satb_buffers_processed = 0; 3547 _steal_attempts = 0; 3548 _steals = 0; 3549 _aborted = 0; 3550 _aborted_overflow = 0; 3551 _aborted_cm_aborted = 0; 3552 _aborted_yield = 0; 3553 _aborted_timed_out = 0; 3554 _aborted_satb = 0; 3555 _aborted_termination = 0; 3556 #endif // _MARKING_STATS_ 3557 } 3558 3559 bool CMTask::should_exit_termination() { 3560 regular_clock_call(); 3561 // This is called when we are in the termination protocol. We should 3562 // quit if, for some reason, this task wants to abort or the global 3563 // stack is not empty (this means that we can get work from it). 3564 return !_cm->mark_stack_empty() || has_aborted(); 3565 } 3566 3567 void CMTask::reached_limit() { 3568 assert(_words_scanned >= _words_scanned_limit || 3569 _refs_reached >= _refs_reached_limit , 3570 "shouldn't have been called otherwise"); 3571 regular_clock_call(); 3572 } 3573 3574 void CMTask::regular_clock_call() { 3575 if (has_aborted()) return; 3576 3577 // First, we need to recalculate the words scanned and refs reached 3578 // limits for the next clock call. 3579 recalculate_limits(); 3580 3581 // During the regular clock call we do the following 3582 3583 // (1) If an overflow has been flagged, then we abort. 3584 if (_cm->has_overflown()) { 3585 set_has_aborted(); 3586 return; 3587 } 3588 3589 // If we are not concurrent (i.e. we're doing remark) we don't need 3590 // to check anything else. The other steps are only needed during 3591 // the concurrent marking phase. 3592 if (!concurrent()) return; 3593 3594 // (2) If marking has been aborted for Full GC, then we also abort. 3595 if (_cm->has_aborted()) { 3596 set_has_aborted(); 3597 statsOnly( ++_aborted_cm_aborted ); 3598 return; 3599 } 3600 3601 double curr_time_ms = os::elapsedVTime() * 1000.0; 3602 3603 // (3) If marking stats are enabled, then we update the step history. 3604 #if _MARKING_STATS_ 3605 if (_words_scanned >= _words_scanned_limit) { 3606 ++_clock_due_to_scanning; 3607 } 3608 if (_refs_reached >= _refs_reached_limit) { 3609 ++_clock_due_to_marking; 3610 } 3611 3612 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3613 _interval_start_time_ms = curr_time_ms; 3614 _all_clock_intervals_ms.add(last_interval_ms); 3615 3616 if (_cm->verbose_medium()) { 3617 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, " 3618 "scanned = %d%s, refs reached = %d%s", 3619 _worker_id, last_interval_ms, 3620 _words_scanned, 3621 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3622 _refs_reached, 3623 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3624 } 3625 #endif // _MARKING_STATS_ 3626 3627 // (4) We check whether we should yield. If we have to, then we abort. 3628 if (SuspendibleThreadSet::should_yield()) { 3629 // We should yield. To do this we abort the task. The caller is 3630 // responsible for yielding. 3631 set_has_aborted(); 3632 statsOnly( ++_aborted_yield ); 3633 return; 3634 } 3635 3636 // (5) We check whether we've reached our time quota. If we have, 3637 // then we abort. 3638 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3639 if (elapsed_time_ms > _time_target_ms) { 3640 set_has_aborted(); 3641 _has_timed_out = true; 3642 statsOnly( ++_aborted_timed_out ); 3643 return; 3644 } 3645 3646 // (6) Finally, we check whether there are enough completed STAB 3647 // buffers available for processing. If there are, we abort. 3648 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3649 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3650 if (_cm->verbose_low()) { 3651 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers", 3652 _worker_id); 3653 } 3654 // we do need to process SATB buffers, we'll abort and restart 3655 // the marking task to do so 3656 set_has_aborted(); 3657 statsOnly( ++_aborted_satb ); 3658 return; 3659 } 3660 } 3661 3662 void CMTask::recalculate_limits() { 3663 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3664 _words_scanned_limit = _real_words_scanned_limit; 3665 3666 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3667 _refs_reached_limit = _real_refs_reached_limit; 3668 } 3669 3670 void CMTask::decrease_limits() { 3671 // This is called when we believe that we're going to do an infrequent 3672 // operation which will increase the per byte scanned cost (i.e. move 3673 // entries to/from the global stack). It basically tries to decrease the 3674 // scanning limit so that the clock is called earlier. 3675 3676 if (_cm->verbose_medium()) { 3677 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id); 3678 } 3679 3680 _words_scanned_limit = _real_words_scanned_limit - 3681 3 * words_scanned_period / 4; 3682 _refs_reached_limit = _real_refs_reached_limit - 3683 3 * refs_reached_period / 4; 3684 } 3685 3686 void CMTask::move_entries_to_global_stack() { 3687 // local array where we'll store the entries that will be popped 3688 // from the local queue 3689 oop buffer[global_stack_transfer_size]; 3690 3691 int n = 0; 3692 oop obj; 3693 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3694 buffer[n] = obj; 3695 ++n; 3696 } 3697 3698 if (n > 0) { 3699 // we popped at least one entry from the local queue 3700 3701 statsOnly( ++_global_transfers_to; _local_pops += n ); 3702 3703 if (!_cm->mark_stack_push(buffer, n)) { 3704 if (_cm->verbose_low()) { 3705 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow", 3706 _worker_id); 3707 } 3708 set_has_aborted(); 3709 } else { 3710 // the transfer was successful 3711 3712 if (_cm->verbose_medium()) { 3713 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack", 3714 _worker_id, n); 3715 } 3716 statsOnly( int tmp_size = _cm->mark_stack_size(); 3717 if (tmp_size > _global_max_size) { 3718 _global_max_size = tmp_size; 3719 } 3720 _global_pushes += n ); 3721 } 3722 } 3723 3724 // this operation was quite expensive, so decrease the limits 3725 decrease_limits(); 3726 } 3727 3728 void CMTask::get_entries_from_global_stack() { 3729 // local array where we'll store the entries that will be popped 3730 // from the global stack. 3731 oop buffer[global_stack_transfer_size]; 3732 int n; 3733 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3734 assert(n <= global_stack_transfer_size, 3735 "we should not pop more than the given limit"); 3736 if (n > 0) { 3737 // yes, we did actually pop at least one entry 3738 3739 statsOnly( ++_global_transfers_from; _global_pops += n ); 3740 if (_cm->verbose_medium()) { 3741 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack", 3742 _worker_id, n); 3743 } 3744 for (int i = 0; i < n; ++i) { 3745 bool success = _task_queue->push(buffer[i]); 3746 // We only call this when the local queue is empty or under a 3747 // given target limit. So, we do not expect this push to fail. 3748 assert(success, "invariant"); 3749 } 3750 3751 statsOnly( int tmp_size = _task_queue->size(); 3752 if (tmp_size > _local_max_size) { 3753 _local_max_size = tmp_size; 3754 } 3755 _local_pushes += n ); 3756 } 3757 3758 // this operation was quite expensive, so decrease the limits 3759 decrease_limits(); 3760 } 3761 3762 void CMTask::drain_local_queue(bool partially) { 3763 if (has_aborted()) return; 3764 3765 // Decide what the target size is, depending whether we're going to 3766 // drain it partially (so that other tasks can steal if they run out 3767 // of things to do) or totally (at the very end). 3768 size_t target_size; 3769 if (partially) { 3770 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3771 } else { 3772 target_size = 0; 3773 } 3774 3775 if (_task_queue->size() > target_size) { 3776 if (_cm->verbose_high()) { 3777 gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT, 3778 _worker_id, target_size); 3779 } 3780 3781 oop obj; 3782 bool ret = _task_queue->pop_local(obj); 3783 while (ret) { 3784 statsOnly( ++_local_pops ); 3785 3786 if (_cm->verbose_high()) { 3787 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id, 3788 (void*) obj); 3789 } 3790 3791 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3792 assert(!_g1h->is_on_master_free_list( 3793 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3794 3795 scan_object(obj); 3796 3797 if (_task_queue->size() <= target_size || has_aborted()) { 3798 ret = false; 3799 } else { 3800 ret = _task_queue->pop_local(obj); 3801 } 3802 } 3803 3804 if (_cm->verbose_high()) { 3805 gclog_or_tty->print_cr("[%u] drained local queue, size = %u", 3806 _worker_id, _task_queue->size()); 3807 } 3808 } 3809 } 3810 3811 void CMTask::drain_global_stack(bool partially) { 3812 if (has_aborted()) return; 3813 3814 // We have a policy to drain the local queue before we attempt to 3815 // drain the global stack. 3816 assert(partially || _task_queue->size() == 0, "invariant"); 3817 3818 // Decide what the target size is, depending whether we're going to 3819 // drain it partially (so that other tasks can steal if they run out 3820 // of things to do) or totally (at the very end). Notice that, 3821 // because we move entries from the global stack in chunks or 3822 // because another task might be doing the same, we might in fact 3823 // drop below the target. But, this is not a problem. 3824 size_t target_size; 3825 if (partially) { 3826 target_size = _cm->partial_mark_stack_size_target(); 3827 } else { 3828 target_size = 0; 3829 } 3830 3831 if (_cm->mark_stack_size() > target_size) { 3832 if (_cm->verbose_low()) { 3833 gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT, 3834 _worker_id, target_size); 3835 } 3836 3837 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3838 get_entries_from_global_stack(); 3839 drain_local_queue(partially); 3840 } 3841 3842 if (_cm->verbose_low()) { 3843 gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT, 3844 _worker_id, _cm->mark_stack_size()); 3845 } 3846 } 3847 } 3848 3849 // SATB Queue has several assumptions on whether to call the par or 3850 // non-par versions of the methods. this is why some of the code is 3851 // replicated. We should really get rid of the single-threaded version 3852 // of the code to simplify things. 3853 void CMTask::drain_satb_buffers() { 3854 if (has_aborted()) return; 3855 3856 // We set this so that the regular clock knows that we're in the 3857 // middle of draining buffers and doesn't set the abort flag when it 3858 // notices that SATB buffers are available for draining. It'd be 3859 // very counter productive if it did that. :-) 3860 _draining_satb_buffers = true; 3861 3862 CMObjectClosure oc(this); 3863 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3864 if (G1CollectedHeap::use_parallel_gc_threads()) { 3865 satb_mq_set.set_par_closure(_worker_id, &oc); 3866 } else { 3867 satb_mq_set.set_closure(&oc); 3868 } 3869 3870 // This keeps claiming and applying the closure to completed buffers 3871 // until we run out of buffers or we need to abort. 3872 if (G1CollectedHeap::use_parallel_gc_threads()) { 3873 while (!has_aborted() && 3874 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) { 3875 if (_cm->verbose_medium()) { 3876 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3877 } 3878 statsOnly( ++_satb_buffers_processed ); 3879 regular_clock_call(); 3880 } 3881 } else { 3882 while (!has_aborted() && 3883 satb_mq_set.apply_closure_to_completed_buffer()) { 3884 if (_cm->verbose_medium()) { 3885 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3886 } 3887 statsOnly( ++_satb_buffers_processed ); 3888 regular_clock_call(); 3889 } 3890 } 3891 3892 if (!concurrent() && !has_aborted()) { 3893 // We should only do this during remark. 3894 if (G1CollectedHeap::use_parallel_gc_threads()) { 3895 satb_mq_set.par_iterate_closure_all_threads(_worker_id); 3896 } else { 3897 satb_mq_set.iterate_closure_all_threads(); 3898 } 3899 } 3900 3901 _draining_satb_buffers = false; 3902 3903 assert(has_aborted() || 3904 concurrent() || 3905 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3906 3907 if (G1CollectedHeap::use_parallel_gc_threads()) { 3908 satb_mq_set.set_par_closure(_worker_id, NULL); 3909 } else { 3910 satb_mq_set.set_closure(NULL); 3911 } 3912 3913 // again, this was a potentially expensive operation, decrease the 3914 // limits to get the regular clock call early 3915 decrease_limits(); 3916 } 3917 3918 void CMTask::print_stats() { 3919 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 3920 _worker_id, _calls); 3921 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3922 _elapsed_time_ms, _termination_time_ms); 3923 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3924 _step_times_ms.num(), _step_times_ms.avg(), 3925 _step_times_ms.sd()); 3926 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3927 _step_times_ms.maximum(), _step_times_ms.sum()); 3928 3929 #if _MARKING_STATS_ 3930 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3931 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3932 _all_clock_intervals_ms.sd()); 3933 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3934 _all_clock_intervals_ms.maximum(), 3935 _all_clock_intervals_ms.sum()); 3936 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3937 _clock_due_to_scanning, _clock_due_to_marking); 3938 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3939 _objs_scanned, _objs_found_on_bitmap); 3940 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3941 _local_pushes, _local_pops, _local_max_size); 3942 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3943 _global_pushes, _global_pops, _global_max_size); 3944 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3945 _global_transfers_to,_global_transfers_from); 3946 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 3947 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3948 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3949 _steal_attempts, _steals); 3950 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3951 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3952 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3953 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3954 _aborted_timed_out, _aborted_satb, _aborted_termination); 3955 #endif // _MARKING_STATS_ 3956 } 3957 3958 /***************************************************************************** 3959 3960 The do_marking_step(time_target_ms, ...) method is the building 3961 block of the parallel marking framework. It can be called in parallel 3962 with other invocations of do_marking_step() on different tasks 3963 (but only one per task, obviously) and concurrently with the 3964 mutator threads, or during remark, hence it eliminates the need 3965 for two versions of the code. When called during remark, it will 3966 pick up from where the task left off during the concurrent marking 3967 phase. Interestingly, tasks are also claimable during evacuation 3968 pauses too, since do_marking_step() ensures that it aborts before 3969 it needs to yield. 3970 3971 The data structures that it uses to do marking work are the 3972 following: 3973 3974 (1) Marking Bitmap. If there are gray objects that appear only 3975 on the bitmap (this happens either when dealing with an overflow 3976 or when the initial marking phase has simply marked the roots 3977 and didn't push them on the stack), then tasks claim heap 3978 regions whose bitmap they then scan to find gray objects. A 3979 global finger indicates where the end of the last claimed region 3980 is. A local finger indicates how far into the region a task has 3981 scanned. The two fingers are used to determine how to gray an 3982 object (i.e. whether simply marking it is OK, as it will be 3983 visited by a task in the future, or whether it needs to be also 3984 pushed on a stack). 3985 3986 (2) Local Queue. The local queue of the task which is accessed 3987 reasonably efficiently by the task. Other tasks can steal from 3988 it when they run out of work. Throughout the marking phase, a 3989 task attempts to keep its local queue short but not totally 3990 empty, so that entries are available for stealing by other 3991 tasks. Only when there is no more work, a task will totally 3992 drain its local queue. 3993 3994 (3) Global Mark Stack. This handles local queue overflow. During 3995 marking only sets of entries are moved between it and the local 3996 queues, as access to it requires a mutex and more fine-grain 3997 interaction with it which might cause contention. If it 3998 overflows, then the marking phase should restart and iterate 3999 over the bitmap to identify gray objects. Throughout the marking 4000 phase, tasks attempt to keep the global mark stack at a small 4001 length but not totally empty, so that entries are available for 4002 popping by other tasks. Only when there is no more work, tasks 4003 will totally drain the global mark stack. 4004 4005 (4) SATB Buffer Queue. This is where completed SATB buffers are 4006 made available. Buffers are regularly removed from this queue 4007 and scanned for roots, so that the queue doesn't get too 4008 long. During remark, all completed buffers are processed, as 4009 well as the filled in parts of any uncompleted buffers. 4010 4011 The do_marking_step() method tries to abort when the time target 4012 has been reached. There are a few other cases when the 4013 do_marking_step() method also aborts: 4014 4015 (1) When the marking phase has been aborted (after a Full GC). 4016 4017 (2) When a global overflow (on the global stack) has been 4018 triggered. Before the task aborts, it will actually sync up with 4019 the other tasks to ensure that all the marking data structures 4020 (local queues, stacks, fingers etc.) are re-initialized so that 4021 when do_marking_step() completes, the marking phase can 4022 immediately restart. 4023 4024 (3) When enough completed SATB buffers are available. The 4025 do_marking_step() method only tries to drain SATB buffers right 4026 at the beginning. So, if enough buffers are available, the 4027 marking step aborts and the SATB buffers are processed at 4028 the beginning of the next invocation. 4029 4030 (4) To yield. when we have to yield then we abort and yield 4031 right at the end of do_marking_step(). This saves us from a lot 4032 of hassle as, by yielding we might allow a Full GC. If this 4033 happens then objects will be compacted underneath our feet, the 4034 heap might shrink, etc. We save checking for this by just 4035 aborting and doing the yield right at the end. 4036 4037 From the above it follows that the do_marking_step() method should 4038 be called in a loop (or, otherwise, regularly) until it completes. 4039 4040 If a marking step completes without its has_aborted() flag being 4041 true, it means it has completed the current marking phase (and 4042 also all other marking tasks have done so and have all synced up). 4043 4044 A method called regular_clock_call() is invoked "regularly" (in 4045 sub ms intervals) throughout marking. It is this clock method that 4046 checks all the abort conditions which were mentioned above and 4047 decides when the task should abort. A work-based scheme is used to 4048 trigger this clock method: when the number of object words the 4049 marking phase has scanned or the number of references the marking 4050 phase has visited reach a given limit. Additional invocations to 4051 the method clock have been planted in a few other strategic places 4052 too. The initial reason for the clock method was to avoid calling 4053 vtime too regularly, as it is quite expensive. So, once it was in 4054 place, it was natural to piggy-back all the other conditions on it 4055 too and not constantly check them throughout the code. 4056 4057 If do_termination is true then do_marking_step will enter its 4058 termination protocol. 4059 4060 The value of is_serial must be true when do_marking_step is being 4061 called serially (i.e. by the VMThread) and do_marking_step should 4062 skip any synchronization in the termination and overflow code. 4063 Examples include the serial remark code and the serial reference 4064 processing closures. 4065 4066 The value of is_serial must be false when do_marking_step is 4067 being called by any of the worker threads in a work gang. 4068 Examples include the concurrent marking code (CMMarkingTask), 4069 the MT remark code, and the MT reference processing closures. 4070 4071 *****************************************************************************/ 4072 4073 void CMTask::do_marking_step(double time_target_ms, 4074 bool do_termination, 4075 bool is_serial) { 4076 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 4077 assert(concurrent() == _cm->concurrent(), "they should be the same"); 4078 4079 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4080 assert(_task_queues != NULL, "invariant"); 4081 assert(_task_queue != NULL, "invariant"); 4082 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 4083 4084 assert(!_claimed, 4085 "only one thread should claim this task at any one time"); 4086 4087 // OK, this doesn't safeguard again all possible scenarios, as it is 4088 // possible for two threads to set the _claimed flag at the same 4089 // time. But it is only for debugging purposes anyway and it will 4090 // catch most problems. 4091 _claimed = true; 4092 4093 _start_time_ms = os::elapsedVTime() * 1000.0; 4094 statsOnly( _interval_start_time_ms = _start_time_ms ); 4095 4096 // If do_stealing is true then do_marking_step will attempt to 4097 // steal work from the other CMTasks. It only makes sense to 4098 // enable stealing when the termination protocol is enabled 4099 // and do_marking_step() is not being called serially. 4100 bool do_stealing = do_termination && !is_serial; 4101 4102 double diff_prediction_ms = 4103 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4104 _time_target_ms = time_target_ms - diff_prediction_ms; 4105 4106 // set up the variables that are used in the work-based scheme to 4107 // call the regular clock method 4108 _words_scanned = 0; 4109 _refs_reached = 0; 4110 recalculate_limits(); 4111 4112 // clear all flags 4113 clear_has_aborted(); 4114 _has_timed_out = false; 4115 _draining_satb_buffers = false; 4116 4117 ++_calls; 4118 4119 if (_cm->verbose_low()) { 4120 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, " 4121 "target = %1.2lfms >>>>>>>>>>", 4122 _worker_id, _calls, _time_target_ms); 4123 } 4124 4125 // Set up the bitmap and oop closures. Anything that uses them is 4126 // eventually called from this method, so it is OK to allocate these 4127 // statically. 4128 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4129 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 4130 set_cm_oop_closure(&cm_oop_closure); 4131 4132 if (_cm->has_overflown()) { 4133 // This can happen if the mark stack overflows during a GC pause 4134 // and this task, after a yield point, restarts. We have to abort 4135 // as we need to get into the overflow protocol which happens 4136 // right at the end of this task. 4137 set_has_aborted(); 4138 } 4139 4140 // First drain any available SATB buffers. After this, we will not 4141 // look at SATB buffers before the next invocation of this method. 4142 // If enough completed SATB buffers are queued up, the regular clock 4143 // will abort this task so that it restarts. 4144 drain_satb_buffers(); 4145 // ...then partially drain the local queue and the global stack 4146 drain_local_queue(true); 4147 drain_global_stack(true); 4148 4149 do { 4150 if (!has_aborted() && _curr_region != NULL) { 4151 // This means that we're already holding on to a region. 4152 assert(_finger != NULL, "if region is not NULL, then the finger " 4153 "should not be NULL either"); 4154 4155 // We might have restarted this task after an evacuation pause 4156 // which might have evacuated the region we're holding on to 4157 // underneath our feet. Let's read its limit again to make sure 4158 // that we do not iterate over a region of the heap that 4159 // contains garbage (update_region_limit() will also move 4160 // _finger to the start of the region if it is found empty). 4161 update_region_limit(); 4162 // We will start from _finger not from the start of the region, 4163 // as we might be restarting this task after aborting half-way 4164 // through scanning this region. In this case, _finger points to 4165 // the address where we last found a marked object. If this is a 4166 // fresh region, _finger points to start(). 4167 MemRegion mr = MemRegion(_finger, _region_limit); 4168 4169 if (_cm->verbose_low()) { 4170 gclog_or_tty->print_cr("[%u] we're scanning part " 4171 "["PTR_FORMAT", "PTR_FORMAT") " 4172 "of region "HR_FORMAT, 4173 _worker_id, _finger, _region_limit, 4174 HR_FORMAT_PARAMS(_curr_region)); 4175 } 4176 4177 assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(), 4178 "humongous regions should go around loop once only"); 4179 4180 // Some special cases: 4181 // If the memory region is empty, we can just give up the region. 4182 // If the current region is humongous then we only need to check 4183 // the bitmap for the bit associated with the start of the object, 4184 // scan the object if it's live, and give up the region. 4185 // Otherwise, let's iterate over the bitmap of the part of the region 4186 // that is left. 4187 // If the iteration is successful, give up the region. 4188 if (mr.is_empty()) { 4189 giveup_current_region(); 4190 regular_clock_call(); 4191 } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) { 4192 if (_nextMarkBitMap->isMarked(mr.start())) { 4193 // The object is marked - apply the closure 4194 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 4195 bitmap_closure.do_bit(offset); 4196 } 4197 // Even if this task aborted while scanning the humongous object 4198 // we can (and should) give up the current region. 4199 giveup_current_region(); 4200 regular_clock_call(); 4201 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4202 giveup_current_region(); 4203 regular_clock_call(); 4204 } else { 4205 assert(has_aborted(), "currently the only way to do so"); 4206 // The only way to abort the bitmap iteration is to return 4207 // false from the do_bit() method. However, inside the 4208 // do_bit() method we move the _finger to point to the 4209 // object currently being looked at. So, if we bail out, we 4210 // have definitely set _finger to something non-null. 4211 assert(_finger != NULL, "invariant"); 4212 4213 // Region iteration was actually aborted. So now _finger 4214 // points to the address of the object we last scanned. If we 4215 // leave it there, when we restart this task, we will rescan 4216 // the object. It is easy to avoid this. We move the finger by 4217 // enough to point to the next possible object header (the 4218 // bitmap knows by how much we need to move it as it knows its 4219 // granularity). 4220 assert(_finger < _region_limit, "invariant"); 4221 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 4222 // Check if bitmap iteration was aborted while scanning the last object 4223 if (new_finger >= _region_limit) { 4224 giveup_current_region(); 4225 } else { 4226 move_finger_to(new_finger); 4227 } 4228 } 4229 } 4230 // At this point we have either completed iterating over the 4231 // region we were holding on to, or we have aborted. 4232 4233 // We then partially drain the local queue and the global stack. 4234 // (Do we really need this?) 4235 drain_local_queue(true); 4236 drain_global_stack(true); 4237 4238 // Read the note on the claim_region() method on why it might 4239 // return NULL with potentially more regions available for 4240 // claiming and why we have to check out_of_regions() to determine 4241 // whether we're done or not. 4242 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4243 // We are going to try to claim a new region. We should have 4244 // given up on the previous one. 4245 // Separated the asserts so that we know which one fires. 4246 assert(_curr_region == NULL, "invariant"); 4247 assert(_finger == NULL, "invariant"); 4248 assert(_region_limit == NULL, "invariant"); 4249 if (_cm->verbose_low()) { 4250 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id); 4251 } 4252 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 4253 if (claimed_region != NULL) { 4254 // Yes, we managed to claim one 4255 statsOnly( ++_regions_claimed ); 4256 4257 if (_cm->verbose_low()) { 4258 gclog_or_tty->print_cr("[%u] we successfully claimed " 4259 "region "PTR_FORMAT, 4260 _worker_id, claimed_region); 4261 } 4262 4263 setup_for_region(claimed_region); 4264 assert(_curr_region == claimed_region, "invariant"); 4265 } 4266 // It is important to call the regular clock here. It might take 4267 // a while to claim a region if, for example, we hit a large 4268 // block of empty regions. So we need to call the regular clock 4269 // method once round the loop to make sure it's called 4270 // frequently enough. 4271 regular_clock_call(); 4272 } 4273 4274 if (!has_aborted() && _curr_region == NULL) { 4275 assert(_cm->out_of_regions(), 4276 "at this point we should be out of regions"); 4277 } 4278 } while ( _curr_region != NULL && !has_aborted()); 4279 4280 if (!has_aborted()) { 4281 // We cannot check whether the global stack is empty, since other 4282 // tasks might be pushing objects to it concurrently. 4283 assert(_cm->out_of_regions(), 4284 "at this point we should be out of regions"); 4285 4286 if (_cm->verbose_low()) { 4287 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id); 4288 } 4289 4290 // Try to reduce the number of available SATB buffers so that 4291 // remark has less work to do. 4292 drain_satb_buffers(); 4293 } 4294 4295 // Since we've done everything else, we can now totally drain the 4296 // local queue and global stack. 4297 drain_local_queue(false); 4298 drain_global_stack(false); 4299 4300 // Attempt at work stealing from other task's queues. 4301 if (do_stealing && !has_aborted()) { 4302 // We have not aborted. This means that we have finished all that 4303 // we could. Let's try to do some stealing... 4304 4305 // We cannot check whether the global stack is empty, since other 4306 // tasks might be pushing objects to it concurrently. 4307 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4308 "only way to reach here"); 4309 4310 if (_cm->verbose_low()) { 4311 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id); 4312 } 4313 4314 while (!has_aborted()) { 4315 oop obj; 4316 statsOnly( ++_steal_attempts ); 4317 4318 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 4319 if (_cm->verbose_medium()) { 4320 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully", 4321 _worker_id, (void*) obj); 4322 } 4323 4324 statsOnly( ++_steals ); 4325 4326 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4327 "any stolen object should be marked"); 4328 scan_object(obj); 4329 4330 // And since we're towards the end, let's totally drain the 4331 // local queue and global stack. 4332 drain_local_queue(false); 4333 drain_global_stack(false); 4334 } else { 4335 break; 4336 } 4337 } 4338 } 4339 4340 // If we are about to wrap up and go into termination, check if we 4341 // should raise the overflow flag. 4342 if (do_termination && !has_aborted()) { 4343 if (_cm->force_overflow()->should_force()) { 4344 _cm->set_has_overflown(); 4345 regular_clock_call(); 4346 } 4347 } 4348 4349 // We still haven't aborted. Now, let's try to get into the 4350 // termination protocol. 4351 if (do_termination && !has_aborted()) { 4352 // We cannot check whether the global stack is empty, since other 4353 // tasks might be concurrently pushing objects on it. 4354 // Separated the asserts so that we know which one fires. 4355 assert(_cm->out_of_regions(), "only way to reach here"); 4356 assert(_task_queue->size() == 0, "only way to reach here"); 4357 4358 if (_cm->verbose_low()) { 4359 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id); 4360 } 4361 4362 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4363 4364 // The CMTask class also extends the TerminatorTerminator class, 4365 // hence its should_exit_termination() method will also decide 4366 // whether to exit the termination protocol or not. 4367 bool finished = (is_serial || 4368 _cm->terminator()->offer_termination(this)); 4369 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4370 _termination_time_ms += 4371 termination_end_time_ms - _termination_start_time_ms; 4372 4373 if (finished) { 4374 // We're all done. 4375 4376 if (_worker_id == 0) { 4377 // let's allow task 0 to do this 4378 if (concurrent()) { 4379 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4380 // we need to set this to false before the next 4381 // safepoint. This way we ensure that the marking phase 4382 // doesn't observe any more heap expansions. 4383 _cm->clear_concurrent_marking_in_progress(); 4384 } 4385 } 4386 4387 // We can now guarantee that the global stack is empty, since 4388 // all other tasks have finished. We separated the guarantees so 4389 // that, if a condition is false, we can immediately find out 4390 // which one. 4391 guarantee(_cm->out_of_regions(), "only way to reach here"); 4392 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4393 guarantee(_task_queue->size() == 0, "only way to reach here"); 4394 guarantee(!_cm->has_overflown(), "only way to reach here"); 4395 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4396 4397 if (_cm->verbose_low()) { 4398 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id); 4399 } 4400 } else { 4401 // Apparently there's more work to do. Let's abort this task. It 4402 // will restart it and we can hopefully find more things to do. 4403 4404 if (_cm->verbose_low()) { 4405 gclog_or_tty->print_cr("[%u] apparently there is more work to do", 4406 _worker_id); 4407 } 4408 4409 set_has_aborted(); 4410 statsOnly( ++_aborted_termination ); 4411 } 4412 } 4413 4414 // Mainly for debugging purposes to make sure that a pointer to the 4415 // closure which was statically allocated in this frame doesn't 4416 // escape it by accident. 4417 set_cm_oop_closure(NULL); 4418 double end_time_ms = os::elapsedVTime() * 1000.0; 4419 double elapsed_time_ms = end_time_ms - _start_time_ms; 4420 // Update the step history. 4421 _step_times_ms.add(elapsed_time_ms); 4422 4423 if (has_aborted()) { 4424 // The task was aborted for some reason. 4425 4426 statsOnly( ++_aborted ); 4427 4428 if (_has_timed_out) { 4429 double diff_ms = elapsed_time_ms - _time_target_ms; 4430 // Keep statistics of how well we did with respect to hitting 4431 // our target only if we actually timed out (if we aborted for 4432 // other reasons, then the results might get skewed). 4433 _marking_step_diffs_ms.add(diff_ms); 4434 } 4435 4436 if (_cm->has_overflown()) { 4437 // This is the interesting one. We aborted because a global 4438 // overflow was raised. This means we have to restart the 4439 // marking phase and start iterating over regions. However, in 4440 // order to do this we have to make sure that all tasks stop 4441 // what they are doing and re-initialize in a safe manner. We 4442 // will achieve this with the use of two barrier sync points. 4443 4444 if (_cm->verbose_low()) { 4445 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id); 4446 } 4447 4448 if (!is_serial) { 4449 // We only need to enter the sync barrier if being called 4450 // from a parallel context 4451 _cm->enter_first_sync_barrier(_worker_id); 4452 4453 // When we exit this sync barrier we know that all tasks have 4454 // stopped doing marking work. So, it's now safe to 4455 // re-initialize our data structures. At the end of this method, 4456 // task 0 will clear the global data structures. 4457 } 4458 4459 statsOnly( ++_aborted_overflow ); 4460 4461 // We clear the local state of this task... 4462 clear_region_fields(); 4463 4464 if (!is_serial) { 4465 // ...and enter the second barrier. 4466 _cm->enter_second_sync_barrier(_worker_id); 4467 } 4468 // At this point, if we're during the concurrent phase of 4469 // marking, everything has been re-initialized and we're 4470 // ready to restart. 4471 } 4472 4473 if (_cm->verbose_low()) { 4474 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4475 "elapsed = %1.2lfms <<<<<<<<<<", 4476 _worker_id, _time_target_ms, elapsed_time_ms); 4477 if (_cm->has_aborted()) { 4478 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========", 4479 _worker_id); 4480 } 4481 } 4482 } else { 4483 if (_cm->verbose_low()) { 4484 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4485 "elapsed = %1.2lfms <<<<<<<<<<", 4486 _worker_id, _time_target_ms, elapsed_time_ms); 4487 } 4488 } 4489 4490 _claimed = false; 4491 } 4492 4493 CMTask::CMTask(uint worker_id, 4494 ConcurrentMark* cm, 4495 size_t* marked_bytes, 4496 BitMap* card_bm, 4497 CMTaskQueue* task_queue, 4498 CMTaskQueueSet* task_queues) 4499 : _g1h(G1CollectedHeap::heap()), 4500 _worker_id(worker_id), _cm(cm), 4501 _claimed(false), 4502 _nextMarkBitMap(NULL), _hash_seed(17), 4503 _task_queue(task_queue), 4504 _task_queues(task_queues), 4505 _cm_oop_closure(NULL), 4506 _marked_bytes_array(marked_bytes), 4507 _card_bm(card_bm) { 4508 guarantee(task_queue != NULL, "invariant"); 4509 guarantee(task_queues != NULL, "invariant"); 4510 4511 statsOnly( _clock_due_to_scanning = 0; 4512 _clock_due_to_marking = 0 ); 4513 4514 _marking_step_diffs_ms.add(0.5); 4515 } 4516 4517 // These are formatting macros that are used below to ensure 4518 // consistent formatting. The *_H_* versions are used to format the 4519 // header for a particular value and they should be kept consistent 4520 // with the corresponding macro. Also note that most of the macros add 4521 // the necessary white space (as a prefix) which makes them a bit 4522 // easier to compose. 4523 4524 // All the output lines are prefixed with this string to be able to 4525 // identify them easily in a large log file. 4526 #define G1PPRL_LINE_PREFIX "###" 4527 4528 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4529 #ifdef _LP64 4530 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4531 #else // _LP64 4532 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4533 #endif // _LP64 4534 4535 // For per-region info 4536 #define G1PPRL_TYPE_FORMAT " %-4s" 4537 #define G1PPRL_TYPE_H_FORMAT " %4s" 4538 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4539 #define G1PPRL_BYTE_H_FORMAT " %9s" 4540 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4541 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4542 4543 // For summary info 4544 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4545 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4546 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4547 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4548 4549 G1PrintRegionLivenessInfoClosure:: 4550 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4551 : _out(out), 4552 _total_used_bytes(0), _total_capacity_bytes(0), 4553 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4554 _hum_used_bytes(0), _hum_capacity_bytes(0), 4555 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 4556 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 4557 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4558 MemRegion g1_committed = g1h->g1_committed(); 4559 MemRegion g1_reserved = g1h->g1_reserved(); 4560 double now = os::elapsedTime(); 4561 4562 // Print the header of the output. 4563 _out->cr(); 4564 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4565 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4566 G1PPRL_SUM_ADDR_FORMAT("committed") 4567 G1PPRL_SUM_ADDR_FORMAT("reserved") 4568 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4569 g1_committed.start(), g1_committed.end(), 4570 g1_reserved.start(), g1_reserved.end(), 4571 HeapRegion::GrainBytes); 4572 _out->print_cr(G1PPRL_LINE_PREFIX); 4573 _out->print_cr(G1PPRL_LINE_PREFIX 4574 G1PPRL_TYPE_H_FORMAT 4575 G1PPRL_ADDR_BASE_H_FORMAT 4576 G1PPRL_BYTE_H_FORMAT 4577 G1PPRL_BYTE_H_FORMAT 4578 G1PPRL_BYTE_H_FORMAT 4579 G1PPRL_DOUBLE_H_FORMAT 4580 G1PPRL_BYTE_H_FORMAT 4581 G1PPRL_BYTE_H_FORMAT, 4582 "type", "address-range", 4583 "used", "prev-live", "next-live", "gc-eff", 4584 "remset", "code-roots"); 4585 _out->print_cr(G1PPRL_LINE_PREFIX 4586 G1PPRL_TYPE_H_FORMAT 4587 G1PPRL_ADDR_BASE_H_FORMAT 4588 G1PPRL_BYTE_H_FORMAT 4589 G1PPRL_BYTE_H_FORMAT 4590 G1PPRL_BYTE_H_FORMAT 4591 G1PPRL_DOUBLE_H_FORMAT 4592 G1PPRL_BYTE_H_FORMAT 4593 G1PPRL_BYTE_H_FORMAT, 4594 "", "", 4595 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 4596 "(bytes)", "(bytes)"); 4597 } 4598 4599 // It takes as a parameter a reference to one of the _hum_* fields, it 4600 // deduces the corresponding value for a region in a humongous region 4601 // series (either the region size, or what's left if the _hum_* field 4602 // is < the region size), and updates the _hum_* field accordingly. 4603 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4604 size_t bytes = 0; 4605 // The > 0 check is to deal with the prev and next live bytes which 4606 // could be 0. 4607 if (*hum_bytes > 0) { 4608 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4609 *hum_bytes -= bytes; 4610 } 4611 return bytes; 4612 } 4613 4614 // It deduces the values for a region in a humongous region series 4615 // from the _hum_* fields and updates those accordingly. It assumes 4616 // that that _hum_* fields have already been set up from the "starts 4617 // humongous" region and we visit the regions in address order. 4618 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4619 size_t* capacity_bytes, 4620 size_t* prev_live_bytes, 4621 size_t* next_live_bytes) { 4622 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4623 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4624 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4625 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4626 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4627 } 4628 4629 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4630 const char* type = ""; 4631 HeapWord* bottom = r->bottom(); 4632 HeapWord* end = r->end(); 4633 size_t capacity_bytes = r->capacity(); 4634 size_t used_bytes = r->used(); 4635 size_t prev_live_bytes = r->live_bytes(); 4636 size_t next_live_bytes = r->next_live_bytes(); 4637 double gc_eff = r->gc_efficiency(); 4638 size_t remset_bytes = r->rem_set()->mem_size(); 4639 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 4640 4641 if (r->used() == 0) { 4642 type = "FREE"; 4643 } else if (r->is_survivor()) { 4644 type = "SURV"; 4645 } else if (r->is_young()) { 4646 type = "EDEN"; 4647 } else if (r->startsHumongous()) { 4648 type = "HUMS"; 4649 4650 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4651 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4652 "they should have been zeroed after the last time we used them"); 4653 // Set up the _hum_* fields. 4654 _hum_capacity_bytes = capacity_bytes; 4655 _hum_used_bytes = used_bytes; 4656 _hum_prev_live_bytes = prev_live_bytes; 4657 _hum_next_live_bytes = next_live_bytes; 4658 get_hum_bytes(&used_bytes, &capacity_bytes, 4659 &prev_live_bytes, &next_live_bytes); 4660 end = bottom + HeapRegion::GrainWords; 4661 } else if (r->continuesHumongous()) { 4662 type = "HUMC"; 4663 get_hum_bytes(&used_bytes, &capacity_bytes, 4664 &prev_live_bytes, &next_live_bytes); 4665 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4666 } else { 4667 type = "OLD"; 4668 } 4669 4670 _total_used_bytes += used_bytes; 4671 _total_capacity_bytes += capacity_bytes; 4672 _total_prev_live_bytes += prev_live_bytes; 4673 _total_next_live_bytes += next_live_bytes; 4674 _total_remset_bytes += remset_bytes; 4675 _total_strong_code_roots_bytes += strong_code_roots_bytes; 4676 4677 // Print a line for this particular region. 4678 _out->print_cr(G1PPRL_LINE_PREFIX 4679 G1PPRL_TYPE_FORMAT 4680 G1PPRL_ADDR_BASE_FORMAT 4681 G1PPRL_BYTE_FORMAT 4682 G1PPRL_BYTE_FORMAT 4683 G1PPRL_BYTE_FORMAT 4684 G1PPRL_DOUBLE_FORMAT 4685 G1PPRL_BYTE_FORMAT 4686 G1PPRL_BYTE_FORMAT, 4687 type, bottom, end, 4688 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 4689 remset_bytes, strong_code_roots_bytes); 4690 4691 return false; 4692 } 4693 4694 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4695 // add static memory usages to remembered set sizes 4696 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 4697 // Print the footer of the output. 4698 _out->print_cr(G1PPRL_LINE_PREFIX); 4699 _out->print_cr(G1PPRL_LINE_PREFIX 4700 " SUMMARY" 4701 G1PPRL_SUM_MB_FORMAT("capacity") 4702 G1PPRL_SUM_MB_PERC_FORMAT("used") 4703 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4704 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 4705 G1PPRL_SUM_MB_FORMAT("remset") 4706 G1PPRL_SUM_MB_FORMAT("code-roots"), 4707 bytes_to_mb(_total_capacity_bytes), 4708 bytes_to_mb(_total_used_bytes), 4709 perc(_total_used_bytes, _total_capacity_bytes), 4710 bytes_to_mb(_total_prev_live_bytes), 4711 perc(_total_prev_live_bytes, _total_capacity_bytes), 4712 bytes_to_mb(_total_next_live_bytes), 4713 perc(_total_next_live_bytes, _total_capacity_bytes), 4714 bytes_to_mb(_total_remset_bytes), 4715 bytes_to_mb(_total_strong_code_roots_bytes)); 4716 _out->cr(); 4717 }