1 /* 2 * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "code/codeCache.hpp" 28 #include "gc_implementation/g1/concurrentMark.inline.hpp" 29 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 30 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 31 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 32 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 33 #include "gc_implementation/g1/g1Log.hpp" 34 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 35 #include "gc_implementation/g1/g1RemSet.hpp" 36 #include "gc_implementation/g1/heapRegion.inline.hpp" 37 #include "gc_implementation/g1/heapRegionRemSet.hpp" 38 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 39 #include "gc_implementation/shared/vmGCOperations.hpp" 40 #include "gc_implementation/shared/gcTimer.hpp" 41 #include "gc_implementation/shared/gcTrace.hpp" 42 #include "gc_implementation/shared/gcTraceTime.hpp" 43 #include "memory/allocation.hpp" 44 #include "memory/genOopClosures.inline.hpp" 45 #include "memory/referencePolicy.hpp" 46 #include "memory/resourceArea.hpp" 47 #include "oops/oop.inline.hpp" 48 #include "runtime/handles.inline.hpp" 49 #include "runtime/java.hpp" 50 #include "runtime/atomic.inline.hpp" 51 #include "runtime/prefetch.inline.hpp" 52 #include "services/memTracker.hpp" 53 54 // Concurrent marking bit map wrapper 55 56 CMBitMapRO::CMBitMapRO(int shifter) : 57 _bm(), 58 _shifter(shifter) { 59 _bmStartWord = 0; 60 _bmWordSize = 0; 61 } 62 63 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 64 const HeapWord* limit) const { 65 // First we must round addr *up* to a possible object boundary. 66 addr = (HeapWord*)align_size_up((intptr_t)addr, 67 HeapWordSize << _shifter); 68 size_t addrOffset = heapWordToOffset(addr); 69 if (limit == NULL) { 70 limit = _bmStartWord + _bmWordSize; 71 } 72 size_t limitOffset = heapWordToOffset(limit); 73 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 74 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 75 assert(nextAddr >= addr, "get_next_one postcondition"); 76 assert(nextAddr == limit || isMarked(nextAddr), 77 "get_next_one postcondition"); 78 return nextAddr; 79 } 80 81 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr, 82 const HeapWord* limit) const { 83 size_t addrOffset = heapWordToOffset(addr); 84 if (limit == NULL) { 85 limit = _bmStartWord + _bmWordSize; 86 } 87 size_t limitOffset = heapWordToOffset(limit); 88 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 89 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 90 assert(nextAddr >= addr, "get_next_one postcondition"); 91 assert(nextAddr == limit || !isMarked(nextAddr), 92 "get_next_one postcondition"); 93 return nextAddr; 94 } 95 96 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 97 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 98 return (int) (diff >> _shifter); 99 } 100 101 #ifndef PRODUCT 102 bool CMBitMapRO::covers(ReservedSpace heap_rs) const { 103 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 104 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 105 "size inconsistency"); 106 return _bmStartWord == (HeapWord*)(heap_rs.base()) && 107 _bmWordSize == heap_rs.size()>>LogHeapWordSize; 108 } 109 #endif 110 111 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 112 _bm.print_on_error(st, prefix); 113 } 114 115 bool CMBitMap::allocate(ReservedSpace heap_rs) { 116 _bmStartWord = (HeapWord*)(heap_rs.base()); 117 _bmWordSize = heap_rs.size()/HeapWordSize; // heap_rs.size() is in bytes 118 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 119 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 120 if (!brs.is_reserved()) { 121 warning("ConcurrentMark marking bit map allocation failure"); 122 return false; 123 } 124 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC); 125 // For now we'll just commit all of the bit map up front. 126 // Later on we'll try to be more parsimonious with swap. 127 if (!_virtual_space.initialize(brs, brs.size())) { 128 warning("ConcurrentMark marking bit map backing store failure"); 129 return false; 130 } 131 assert(_virtual_space.committed_size() == brs.size(), 132 "didn't reserve backing store for all of concurrent marking bit map?"); 133 _bm.set_map((BitMap::bm_word_t*)_virtual_space.low()); 134 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 135 _bmWordSize, "inconsistency in bit map sizing"); 136 _bm.set_size(_bmWordSize >> _shifter); 137 return true; 138 } 139 140 void CMBitMap::clearAll() { 141 _bm.clear(); 142 return; 143 } 144 145 void CMBitMap::markRange(MemRegion mr) { 146 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 147 assert(!mr.is_empty(), "unexpected empty region"); 148 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 149 ((HeapWord *) mr.end())), 150 "markRange memory region end is not card aligned"); 151 // convert address range into offset range 152 _bm.at_put_range(heapWordToOffset(mr.start()), 153 heapWordToOffset(mr.end()), true); 154 } 155 156 void CMBitMap::clearRange(MemRegion mr) { 157 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 158 assert(!mr.is_empty(), "unexpected empty region"); 159 // convert address range into offset range 160 _bm.at_put_range(heapWordToOffset(mr.start()), 161 heapWordToOffset(mr.end()), false); 162 } 163 164 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 165 HeapWord* end_addr) { 166 HeapWord* start = getNextMarkedWordAddress(addr); 167 start = MIN2(start, end_addr); 168 HeapWord* end = getNextUnmarkedWordAddress(start); 169 end = MIN2(end, end_addr); 170 assert(start <= end, "Consistency check"); 171 MemRegion mr(start, end); 172 if (!mr.is_empty()) { 173 clearRange(mr); 174 } 175 return mr; 176 } 177 178 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 179 _base(NULL), _cm(cm) 180 #ifdef ASSERT 181 , _drain_in_progress(false) 182 , _drain_in_progress_yields(false) 183 #endif 184 {} 185 186 bool CMMarkStack::allocate(size_t capacity) { 187 // allocate a stack of the requisite depth 188 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 189 if (!rs.is_reserved()) { 190 warning("ConcurrentMark MarkStack allocation failure"); 191 return false; 192 } 193 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 194 if (!_virtual_space.initialize(rs, rs.size())) { 195 warning("ConcurrentMark MarkStack backing store failure"); 196 // Release the virtual memory reserved for the marking stack 197 rs.release(); 198 return false; 199 } 200 assert(_virtual_space.committed_size() == rs.size(), 201 "Didn't reserve backing store for all of ConcurrentMark stack?"); 202 _base = (oop*) _virtual_space.low(); 203 setEmpty(); 204 _capacity = (jint) capacity; 205 _saved_index = -1; 206 _should_expand = false; 207 NOT_PRODUCT(_max_depth = 0); 208 return true; 209 } 210 211 void CMMarkStack::expand() { 212 // Called, during remark, if we've overflown the marking stack during marking. 213 assert(isEmpty(), "stack should been emptied while handling overflow"); 214 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 215 // Clear expansion flag 216 _should_expand = false; 217 if (_capacity == (jint) MarkStackSizeMax) { 218 if (PrintGCDetails && Verbose) { 219 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 220 } 221 return; 222 } 223 // Double capacity if possible 224 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 225 // Do not give up existing stack until we have managed to 226 // get the double capacity that we desired. 227 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 228 sizeof(oop))); 229 if (rs.is_reserved()) { 230 // Release the backing store associated with old stack 231 _virtual_space.release(); 232 // Reinitialize virtual space for new stack 233 if (!_virtual_space.initialize(rs, rs.size())) { 234 fatal("Not enough swap for expanded marking stack capacity"); 235 } 236 _base = (oop*)(_virtual_space.low()); 237 _index = 0; 238 _capacity = new_capacity; 239 } else { 240 if (PrintGCDetails && Verbose) { 241 // Failed to double capacity, continue; 242 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 243 SIZE_FORMAT"K to " SIZE_FORMAT"K", 244 _capacity / K, new_capacity / K); 245 } 246 } 247 } 248 249 void CMMarkStack::set_should_expand() { 250 // If we're resetting the marking state because of an 251 // marking stack overflow, record that we should, if 252 // possible, expand the stack. 253 _should_expand = _cm->has_overflown(); 254 } 255 256 CMMarkStack::~CMMarkStack() { 257 if (_base != NULL) { 258 _base = NULL; 259 _virtual_space.release(); 260 } 261 } 262 263 void CMMarkStack::par_push(oop ptr) { 264 while (true) { 265 if (isFull()) { 266 _overflow = true; 267 return; 268 } 269 // Otherwise... 270 jint index = _index; 271 jint next_index = index+1; 272 jint res = Atomic::cmpxchg(next_index, &_index, index); 273 if (res == index) { 274 _base[index] = ptr; 275 // Note that we don't maintain this atomically. We could, but it 276 // doesn't seem necessary. 277 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 278 return; 279 } 280 // Otherwise, we need to try again. 281 } 282 } 283 284 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 285 while (true) { 286 if (isFull()) { 287 _overflow = true; 288 return; 289 } 290 // Otherwise... 291 jint index = _index; 292 jint next_index = index + n; 293 if (next_index > _capacity) { 294 _overflow = true; 295 return; 296 } 297 jint res = Atomic::cmpxchg(next_index, &_index, index); 298 if (res == index) { 299 for (int i = 0; i < n; i++) { 300 int ind = index + i; 301 assert(ind < _capacity, "By overflow test above."); 302 _base[ind] = ptr_arr[i]; 303 } 304 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 305 return; 306 } 307 // Otherwise, we need to try again. 308 } 309 } 310 311 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 312 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 313 jint start = _index; 314 jint next_index = start + n; 315 if (next_index > _capacity) { 316 _overflow = true; 317 return; 318 } 319 // Otherwise. 320 _index = next_index; 321 for (int i = 0; i < n; i++) { 322 int ind = start + i; 323 assert(ind < _capacity, "By overflow test above."); 324 _base[ind] = ptr_arr[i]; 325 } 326 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 327 } 328 329 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 330 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 331 jint index = _index; 332 if (index == 0) { 333 *n = 0; 334 return false; 335 } else { 336 int k = MIN2(max, index); 337 jint new_ind = index - k; 338 for (int j = 0; j < k; j++) { 339 ptr_arr[j] = _base[new_ind + j]; 340 } 341 _index = new_ind; 342 *n = k; 343 return true; 344 } 345 } 346 347 template<class OopClosureClass> 348 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 349 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 350 || SafepointSynchronize::is_at_safepoint(), 351 "Drain recursion must be yield-safe."); 352 bool res = true; 353 debug_only(_drain_in_progress = true); 354 debug_only(_drain_in_progress_yields = yield_after); 355 while (!isEmpty()) { 356 oop newOop = pop(); 357 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 358 assert(newOop->is_oop(), "Expected an oop"); 359 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 360 "only grey objects on this stack"); 361 newOop->oop_iterate(cl); 362 if (yield_after && _cm->do_yield_check()) { 363 res = false; 364 break; 365 } 366 } 367 debug_only(_drain_in_progress = false); 368 return res; 369 } 370 371 void CMMarkStack::note_start_of_gc() { 372 assert(_saved_index == -1, 373 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 374 _saved_index = _index; 375 } 376 377 void CMMarkStack::note_end_of_gc() { 378 // This is intentionally a guarantee, instead of an assert. If we 379 // accidentally add something to the mark stack during GC, it 380 // will be a correctness issue so it's better if we crash. we'll 381 // only check this once per GC anyway, so it won't be a performance 382 // issue in any way. 383 guarantee(_saved_index == _index, 384 err_msg("saved index: %d index: %d", _saved_index, _index)); 385 _saved_index = -1; 386 } 387 388 void CMMarkStack::oops_do(OopClosure* f) { 389 assert(_saved_index == _index, 390 err_msg("saved index: %d index: %d", _saved_index, _index)); 391 for (int i = 0; i < _index; i += 1) { 392 f->do_oop(&_base[i]); 393 } 394 } 395 396 bool ConcurrentMark::not_yet_marked(oop obj) const { 397 return _g1h->is_obj_ill(obj); 398 } 399 400 CMRootRegions::CMRootRegions() : 401 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 402 _should_abort(false), _next_survivor(NULL) { } 403 404 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 405 _young_list = g1h->young_list(); 406 _cm = cm; 407 } 408 409 void CMRootRegions::prepare_for_scan() { 410 assert(!scan_in_progress(), "pre-condition"); 411 412 // Currently, only survivors can be root regions. 413 assert(_next_survivor == NULL, "pre-condition"); 414 _next_survivor = _young_list->first_survivor_region(); 415 _scan_in_progress = (_next_survivor != NULL); 416 _should_abort = false; 417 } 418 419 HeapRegion* CMRootRegions::claim_next() { 420 if (_should_abort) { 421 // If someone has set the should_abort flag, we return NULL to 422 // force the caller to bail out of their loop. 423 return NULL; 424 } 425 426 // Currently, only survivors can be root regions. 427 HeapRegion* res = _next_survivor; 428 if (res != NULL) { 429 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 430 // Read it again in case it changed while we were waiting for the lock. 431 res = _next_survivor; 432 if (res != NULL) { 433 if (res == _young_list->last_survivor_region()) { 434 // We just claimed the last survivor so store NULL to indicate 435 // that we're done. 436 _next_survivor = NULL; 437 } else { 438 _next_survivor = res->get_next_young_region(); 439 } 440 } else { 441 // Someone else claimed the last survivor while we were trying 442 // to take the lock so nothing else to do. 443 } 444 } 445 assert(res == NULL || res->is_survivor(), "post-condition"); 446 447 return res; 448 } 449 450 void CMRootRegions::scan_finished() { 451 assert(scan_in_progress(), "pre-condition"); 452 453 // Currently, only survivors can be root regions. 454 if (!_should_abort) { 455 assert(_next_survivor == NULL, "we should have claimed all survivors"); 456 } 457 _next_survivor = NULL; 458 459 { 460 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 461 _scan_in_progress = false; 462 RootRegionScan_lock->notify_all(); 463 } 464 } 465 466 bool CMRootRegions::wait_until_scan_finished() { 467 if (!scan_in_progress()) return false; 468 469 { 470 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 471 while (scan_in_progress()) { 472 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 473 } 474 } 475 return true; 476 } 477 478 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 479 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 480 #endif // _MSC_VER 481 482 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 483 return MAX2((n_par_threads + 2) / 4, 1U); 484 } 485 486 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) : 487 _g1h(g1h), 488 _markBitMap1(log2_intptr(MinObjAlignment)), 489 _markBitMap2(log2_intptr(MinObjAlignment)), 490 _parallel_marking_threads(0), 491 _max_parallel_marking_threads(0), 492 _sleep_factor(0.0), 493 _marking_task_overhead(1.0), 494 _cleanup_sleep_factor(0.0), 495 _cleanup_task_overhead(1.0), 496 _cleanup_list("Cleanup List"), 497 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 498 _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >> 499 CardTableModRefBS::card_shift, 500 false /* in_resource_area*/), 501 502 _prevMarkBitMap(&_markBitMap1), 503 _nextMarkBitMap(&_markBitMap2), 504 505 _markStack(this), 506 // _finger set in set_non_marking_state 507 508 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)), 509 // _active_tasks set in set_non_marking_state 510 // _tasks set inside the constructor 511 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 512 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 513 514 _has_overflown(false), 515 _concurrent(false), 516 _has_aborted(false), 517 _aborted_gc_id(GCId::undefined()), 518 _restart_for_overflow(false), 519 _concurrent_marking_in_progress(false), 520 521 // _verbose_level set below 522 523 _init_times(), 524 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 525 _cleanup_times(), 526 _total_counting_time(0.0), 527 _total_rs_scrub_time(0.0), 528 529 _parallel_workers(NULL), 530 531 _count_card_bitmaps(NULL), 532 _count_marked_bytes(NULL), 533 _completed_initialization(false) { 534 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 535 if (verbose_level < no_verbose) { 536 verbose_level = no_verbose; 537 } 538 if (verbose_level > high_verbose) { 539 verbose_level = high_verbose; 540 } 541 _verbose_level = verbose_level; 542 543 if (verbose_low()) { 544 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 545 "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end)); 546 } 547 548 if (!_markBitMap1.allocate(heap_rs)) { 549 warning("Failed to allocate first CM bit map"); 550 return; 551 } 552 if (!_markBitMap2.allocate(heap_rs)) { 553 warning("Failed to allocate second CM bit map"); 554 return; 555 } 556 557 // Create & start a ConcurrentMark thread. 558 _cmThread = new ConcurrentMarkThread(this); 559 assert(cmThread() != NULL, "CM Thread should have been created"); 560 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 561 if (_cmThread->osthread() == NULL) { 562 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 563 } 564 565 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 566 assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency"); 567 assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency"); 568 569 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 570 satb_qs.set_buffer_size(G1SATBBufferSize); 571 572 _root_regions.init(_g1h, this); 573 574 if (ConcGCThreads > ParallelGCThreads) { 575 warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") " 576 "than ParallelGCThreads (" UINTX_FORMAT ").", 577 ConcGCThreads, ParallelGCThreads); 578 return; 579 } 580 if (ParallelGCThreads == 0) { 581 // if we are not running with any parallel GC threads we will not 582 // spawn any marking threads either 583 _parallel_marking_threads = 0; 584 _max_parallel_marking_threads = 0; 585 _sleep_factor = 0.0; 586 _marking_task_overhead = 1.0; 587 } else { 588 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 589 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 590 // if both are set 591 _sleep_factor = 0.0; 592 _marking_task_overhead = 1.0; 593 } else if (G1MarkingOverheadPercent > 0) { 594 // We will calculate the number of parallel marking threads based 595 // on a target overhead with respect to the soft real-time goal 596 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 597 double overall_cm_overhead = 598 (double) MaxGCPauseMillis * marking_overhead / 599 (double) GCPauseIntervalMillis; 600 double cpu_ratio = 1.0 / (double) os::processor_count(); 601 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 602 double marking_task_overhead = 603 overall_cm_overhead / marking_thread_num * 604 (double) os::processor_count(); 605 double sleep_factor = 606 (1.0 - marking_task_overhead) / marking_task_overhead; 607 608 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num); 609 _sleep_factor = sleep_factor; 610 _marking_task_overhead = marking_task_overhead; 611 } else { 612 // Calculate the number of parallel marking threads by scaling 613 // the number of parallel GC threads. 614 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads); 615 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num); 616 _sleep_factor = 0.0; 617 _marking_task_overhead = 1.0; 618 } 619 620 assert(ConcGCThreads > 0, "Should have been set"); 621 _parallel_marking_threads = (uint) ConcGCThreads; 622 _max_parallel_marking_threads = _parallel_marking_threads; 623 624 if (parallel_marking_threads() > 1) { 625 _cleanup_task_overhead = 1.0; 626 } else { 627 _cleanup_task_overhead = marking_task_overhead(); 628 } 629 _cleanup_sleep_factor = 630 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 631 632 #if 0 633 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 634 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 635 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 636 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 637 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 638 #endif 639 640 guarantee(parallel_marking_threads() > 0, "peace of mind"); 641 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 642 _max_parallel_marking_threads, false, true); 643 if (_parallel_workers == NULL) { 644 vm_exit_during_initialization("Failed necessary allocation."); 645 } else { 646 _parallel_workers->initialize_workers(); 647 } 648 } 649 650 if (FLAG_IS_DEFAULT(MarkStackSize)) { 651 uintx mark_stack_size = 652 MIN2(MarkStackSizeMax, 653 MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE))); 654 // Verify that the calculated value for MarkStackSize is in range. 655 // It would be nice to use the private utility routine from Arguments. 656 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 657 warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): " 658 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 659 mark_stack_size, (uintx) 1, MarkStackSizeMax); 660 return; 661 } 662 FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size); 663 } else { 664 // Verify MarkStackSize is in range. 665 if (FLAG_IS_CMDLINE(MarkStackSize)) { 666 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 667 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 668 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): " 669 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 670 MarkStackSize, (uintx) 1, MarkStackSizeMax); 671 return; 672 } 673 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 674 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 675 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")" 676 " or for MarkStackSizeMax (" UINTX_FORMAT ")", 677 MarkStackSize, MarkStackSizeMax); 678 return; 679 } 680 } 681 } 682 } 683 684 if (!_markStack.allocate(MarkStackSize)) { 685 warning("Failed to allocate CM marking stack"); 686 return; 687 } 688 689 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 690 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 691 692 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 693 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 694 695 BitMap::idx_t card_bm_size = _card_bm.size(); 696 697 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 698 _active_tasks = _max_worker_id; 699 700 size_t max_regions = (size_t) _g1h->max_regions(); 701 for (uint i = 0; i < _max_worker_id; ++i) { 702 CMTaskQueue* task_queue = new CMTaskQueue(); 703 task_queue->initialize(); 704 _task_queues->register_queue(i, task_queue); 705 706 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 707 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 708 709 _tasks[i] = new CMTask(i, this, 710 _count_marked_bytes[i], 711 &_count_card_bitmaps[i], 712 task_queue, _task_queues); 713 714 _accum_task_vtime[i] = 0.0; 715 } 716 717 // Calculate the card number for the bottom of the heap. Used 718 // in biasing indexes into the accounting card bitmaps. 719 _heap_bottom_card_num = 720 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 721 CardTableModRefBS::card_shift); 722 723 // Clear all the liveness counting data 724 clear_all_count_data(); 725 726 // so that the call below can read a sensible value 727 _heap_start = (HeapWord*) heap_rs.base(); 728 set_non_marking_state(); 729 _completed_initialization = true; 730 } 731 732 void ConcurrentMark::update_g1_committed(bool force) { 733 // If concurrent marking is not in progress, then we do not need to 734 // update _heap_end. 735 if (!concurrent_marking_in_progress() && !force) return; 736 737 MemRegion committed = _g1h->g1_committed(); 738 assert(committed.start() == _heap_start, "start shouldn't change"); 739 HeapWord* new_end = committed.end(); 740 if (new_end > _heap_end) { 741 // The heap has been expanded. 742 743 _heap_end = new_end; 744 } 745 // Notice that the heap can also shrink. However, this only happens 746 // during a Full GC (at least currently) and the entire marking 747 // phase will bail out and the task will not be restarted. So, let's 748 // do nothing. 749 } 750 751 void ConcurrentMark::reset() { 752 // Starting values for these two. This should be called in a STW 753 // phase. CM will be notified of any future g1_committed expansions 754 // will be at the end of evacuation pauses, when tasks are 755 // inactive. 756 MemRegion committed = _g1h->g1_committed(); 757 _heap_start = committed.start(); 758 _heap_end = committed.end(); 759 760 // Separated the asserts so that we know which one fires. 761 assert(_heap_start != NULL, "heap bounds should look ok"); 762 assert(_heap_end != NULL, "heap bounds should look ok"); 763 assert(_heap_start < _heap_end, "heap bounds should look ok"); 764 765 // Reset all the marking data structures and any necessary flags 766 reset_marking_state(); 767 768 if (verbose_low()) { 769 gclog_or_tty->print_cr("[global] resetting"); 770 } 771 772 // We do reset all of them, since different phases will use 773 // different number of active threads. So, it's easiest to have all 774 // of them ready. 775 for (uint i = 0; i < _max_worker_id; ++i) { 776 _tasks[i]->reset(_nextMarkBitMap); 777 } 778 779 // we need this to make sure that the flag is on during the evac 780 // pause with initial mark piggy-backed 781 set_concurrent_marking_in_progress(); 782 } 783 784 785 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 786 _markStack.set_should_expand(); 787 _markStack.setEmpty(); // Also clears the _markStack overflow flag 788 if (clear_overflow) { 789 clear_has_overflown(); 790 } else { 791 assert(has_overflown(), "pre-condition"); 792 } 793 _finger = _heap_start; 794 795 for (uint i = 0; i < _max_worker_id; ++i) { 796 CMTaskQueue* queue = _task_queues->queue(i); 797 queue->set_empty(); 798 } 799 } 800 801 void ConcurrentMark::set_concurrency(uint active_tasks) { 802 assert(active_tasks <= _max_worker_id, "we should not have more"); 803 804 _active_tasks = active_tasks; 805 // Need to update the three data structures below according to the 806 // number of active threads for this phase. 807 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 808 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 809 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 810 } 811 812 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 813 set_concurrency(active_tasks); 814 815 _concurrent = concurrent; 816 // We propagate this to all tasks, not just the active ones. 817 for (uint i = 0; i < _max_worker_id; ++i) 818 _tasks[i]->set_concurrent(concurrent); 819 820 if (concurrent) { 821 set_concurrent_marking_in_progress(); 822 } else { 823 // We currently assume that the concurrent flag has been set to 824 // false before we start remark. At this point we should also be 825 // in a STW phase. 826 assert(!concurrent_marking_in_progress(), "invariant"); 827 assert(out_of_regions(), 828 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT, 829 p2i(_finger), p2i(_heap_end))); 830 update_g1_committed(true); 831 } 832 } 833 834 void ConcurrentMark::set_non_marking_state() { 835 // We set the global marking state to some default values when we're 836 // not doing marking. 837 reset_marking_state(); 838 _active_tasks = 0; 839 clear_concurrent_marking_in_progress(); 840 } 841 842 ConcurrentMark::~ConcurrentMark() { 843 // The ConcurrentMark instance is never freed. 844 ShouldNotReachHere(); 845 } 846 847 void ConcurrentMark::clearNextBitmap() { 848 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 849 G1CollectorPolicy* g1p = g1h->g1_policy(); 850 851 // Make sure that the concurrent mark thread looks to still be in 852 // the current cycle. 853 guarantee(cmThread()->during_cycle(), "invariant"); 854 855 // We are finishing up the current cycle by clearing the next 856 // marking bitmap and getting it ready for the next cycle. During 857 // this time no other cycle can start. So, let's make sure that this 858 // is the case. 859 guarantee(!g1h->mark_in_progress(), "invariant"); 860 861 // clear the mark bitmap (no grey objects to start with). 862 // We need to do this in chunks and offer to yield in between 863 // each chunk. 864 HeapWord* start = _nextMarkBitMap->startWord(); 865 HeapWord* end = _nextMarkBitMap->endWord(); 866 HeapWord* cur = start; 867 size_t chunkSize = M; 868 while (cur < end) { 869 HeapWord* next = cur + chunkSize; 870 if (next > end) { 871 next = end; 872 } 873 MemRegion mr(cur,next); 874 _nextMarkBitMap->clearRange(mr); 875 cur = next; 876 do_yield_check(); 877 878 // Repeat the asserts from above. We'll do them as asserts here to 879 // minimize their overhead on the product. However, we'll have 880 // them as guarantees at the beginning / end of the bitmap 881 // clearing to get some checking in the product. 882 assert(cmThread()->during_cycle(), "invariant"); 883 assert(!g1h->mark_in_progress(), "invariant"); 884 } 885 886 // Clear the liveness counting data 887 clear_all_count_data(); 888 889 // Repeat the asserts from above. 890 guarantee(cmThread()->during_cycle(), "invariant"); 891 guarantee(!g1h->mark_in_progress(), "invariant"); 892 } 893 894 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 895 public: 896 bool doHeapRegion(HeapRegion* r) { 897 if (!r->continuesHumongous()) { 898 r->note_start_of_marking(); 899 } 900 return false; 901 } 902 }; 903 904 void ConcurrentMark::checkpointRootsInitialPre() { 905 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 906 G1CollectorPolicy* g1p = g1h->g1_policy(); 907 908 _has_aborted = false; 909 910 #ifndef PRODUCT 911 if (G1PrintReachableAtInitialMark) { 912 print_reachable("at-cycle-start", 913 VerifyOption_G1UsePrevMarking, true /* all */); 914 } 915 #endif 916 917 // Initialize marking structures. This has to be done in a STW phase. 918 reset(); 919 920 // For each region note start of marking. 921 NoteStartOfMarkHRClosure startcl; 922 g1h->heap_region_iterate(&startcl); 923 } 924 925 926 void ConcurrentMark::checkpointRootsInitialPost() { 927 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 928 929 // If we force an overflow during remark, the remark operation will 930 // actually abort and we'll restart concurrent marking. If we always 931 // force an overflow during remark we'll never actually complete the 932 // marking phase. So, we initialize this here, at the start of the 933 // cycle, so that at the remaining overflow number will decrease at 934 // every remark and we'll eventually not need to cause one. 935 force_overflow_stw()->init(); 936 937 // Start Concurrent Marking weak-reference discovery. 938 ReferenceProcessor* rp = g1h->ref_processor_cm(); 939 // enable ("weak") refs discovery 940 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 941 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 942 943 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 944 // This is the start of the marking cycle, we're expected all 945 // threads to have SATB queues with active set to false. 946 satb_mq_set.set_active_all_threads(true, /* new active value */ 947 false /* expected_active */); 948 949 _root_regions.prepare_for_scan(); 950 951 // update_g1_committed() will be called at the end of an evac pause 952 // when marking is on. So, it's also called at the end of the 953 // initial-mark pause to update the heap end, if the heap expands 954 // during it. No need to call it here. 955 } 956 957 /* 958 * Notice that in the next two methods, we actually leave the STS 959 * during the barrier sync and join it immediately afterwards. If we 960 * do not do this, the following deadlock can occur: one thread could 961 * be in the barrier sync code, waiting for the other thread to also 962 * sync up, whereas another one could be trying to yield, while also 963 * waiting for the other threads to sync up too. 964 * 965 * Note, however, that this code is also used during remark and in 966 * this case we should not attempt to leave / enter the STS, otherwise 967 * we'll either hit an assert (debug / fastdebug) or deadlock 968 * (product). So we should only leave / enter the STS if we are 969 * operating concurrently. 970 * 971 * Because the thread that does the sync barrier has left the STS, it 972 * is possible to be suspended for a Full GC or an evacuation pause 973 * could occur. This is actually safe, since the entering the sync 974 * barrier is one of the last things do_marking_step() does, and it 975 * doesn't manipulate any data structures afterwards. 976 */ 977 978 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 979 if (verbose_low()) { 980 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id); 981 } 982 983 if (concurrent()) { 984 SuspendibleThreadSet::leave(); 985 } 986 987 bool barrier_aborted = !_first_overflow_barrier_sync.enter(); 988 989 if (concurrent()) { 990 SuspendibleThreadSet::join(); 991 } 992 // at this point everyone should have synced up and not be doing any 993 // more work 994 995 if (verbose_low()) { 996 if (barrier_aborted) { 997 gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id); 998 } else { 999 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); 1000 } 1001 } 1002 1003 if (barrier_aborted) { 1004 // If the barrier aborted we ignore the overflow condition and 1005 // just abort the whole marking phase as quickly as possible. 1006 return; 1007 } 1008 1009 // If we're executing the concurrent phase of marking, reset the marking 1010 // state; otherwise the marking state is reset after reference processing, 1011 // during the remark pause. 1012 // If we reset here as a result of an overflow during the remark we will 1013 // see assertion failures from any subsequent set_concurrency_and_phase() 1014 // calls. 1015 if (concurrent()) { 1016 // let the task associated with with worker 0 do this 1017 if (worker_id == 0) { 1018 // task 0 is responsible for clearing the global data structures 1019 // We should be here because of an overflow. During STW we should 1020 // not clear the overflow flag since we rely on it being true when 1021 // we exit this method to abort the pause and restart concurrent 1022 // marking. 1023 reset_marking_state(true /* clear_overflow */); 1024 force_overflow()->update(); 1025 1026 if (G1Log::fine()) { 1027 gclog_or_tty->gclog_stamp(concurrent_gc_id()); 1028 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 1029 } 1030 } 1031 } 1032 1033 // after this, each task should reset its own data structures then 1034 // then go into the second barrier 1035 } 1036 1037 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 1038 if (verbose_low()) { 1039 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id); 1040 } 1041 1042 if (concurrent()) { 1043 SuspendibleThreadSet::leave(); 1044 } 1045 1046 bool barrier_aborted = !_second_overflow_barrier_sync.enter(); 1047 1048 if (concurrent()) { 1049 SuspendibleThreadSet::join(); 1050 } 1051 // at this point everything should be re-initialized and ready to go 1052 1053 if (verbose_low()) { 1054 if (barrier_aborted) { 1055 gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id); 1056 } else { 1057 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); 1058 } 1059 } 1060 } 1061 1062 #ifndef PRODUCT 1063 void ForceOverflowSettings::init() { 1064 _num_remaining = G1ConcMarkForceOverflow; 1065 _force = false; 1066 update(); 1067 } 1068 1069 void ForceOverflowSettings::update() { 1070 if (_num_remaining > 0) { 1071 _num_remaining -= 1; 1072 _force = true; 1073 } else { 1074 _force = false; 1075 } 1076 } 1077 1078 bool ForceOverflowSettings::should_force() { 1079 if (_force) { 1080 _force = false; 1081 return true; 1082 } else { 1083 return false; 1084 } 1085 } 1086 #endif // !PRODUCT 1087 1088 class CMConcurrentMarkingTask: public AbstractGangTask { 1089 private: 1090 ConcurrentMark* _cm; 1091 ConcurrentMarkThread* _cmt; 1092 1093 public: 1094 void work(uint worker_id) { 1095 assert(Thread::current()->is_ConcurrentGC_thread(), 1096 "this should only be done by a conc GC thread"); 1097 ResourceMark rm; 1098 1099 double start_vtime = os::elapsedVTime(); 1100 1101 SuspendibleThreadSet::join(); 1102 1103 assert(worker_id < _cm->active_tasks(), "invariant"); 1104 CMTask* the_task = _cm->task(worker_id); 1105 the_task->record_start_time(); 1106 if (!_cm->has_aborted()) { 1107 do { 1108 double start_vtime_sec = os::elapsedVTime(); 1109 double start_time_sec = os::elapsedTime(); 1110 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1111 1112 the_task->do_marking_step(mark_step_duration_ms, 1113 true /* do_termination */, 1114 false /* is_serial*/); 1115 1116 double end_time_sec = os::elapsedTime(); 1117 double end_vtime_sec = os::elapsedVTime(); 1118 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1119 double elapsed_time_sec = end_time_sec - start_time_sec; 1120 _cm->clear_has_overflown(); 1121 1122 bool ret = _cm->do_yield_check(worker_id); 1123 1124 jlong sleep_time_ms; 1125 if (!_cm->has_aborted() && the_task->has_aborted()) { 1126 sleep_time_ms = 1127 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1128 SuspendibleThreadSet::leave(); 1129 os::sleep(Thread::current(), sleep_time_ms, false); 1130 SuspendibleThreadSet::join(); 1131 } 1132 double end_time2_sec = os::elapsedTime(); 1133 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1134 1135 #if 0 1136 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1137 "overhead %1.4lf", 1138 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1139 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1140 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1141 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1142 #endif 1143 } while (!_cm->has_aborted() && the_task->has_aborted()); 1144 } 1145 the_task->record_end_time(); 1146 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1147 1148 SuspendibleThreadSet::leave(); 1149 1150 double end_vtime = os::elapsedVTime(); 1151 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1152 } 1153 1154 CMConcurrentMarkingTask(ConcurrentMark* cm, 1155 ConcurrentMarkThread* cmt) : 1156 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1157 1158 ~CMConcurrentMarkingTask() { } 1159 }; 1160 1161 // Calculates the number of active workers for a concurrent 1162 // phase. 1163 uint ConcurrentMark::calc_parallel_marking_threads() { 1164 if (G1CollectedHeap::use_parallel_gc_threads()) { 1165 uint n_conc_workers = 0; 1166 if (!UseDynamicNumberOfGCThreads || 1167 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1168 !ForceDynamicNumberOfGCThreads)) { 1169 n_conc_workers = max_parallel_marking_threads(); 1170 } else { 1171 n_conc_workers = 1172 AdaptiveSizePolicy::calc_default_active_workers( 1173 max_parallel_marking_threads(), 1174 1, /* Minimum workers */ 1175 parallel_marking_threads(), 1176 Threads::number_of_non_daemon_threads()); 1177 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1178 // that scaling has already gone into "_max_parallel_marking_threads". 1179 } 1180 assert(n_conc_workers > 0, "Always need at least 1"); 1181 return n_conc_workers; 1182 } 1183 // If we are not running with any parallel GC threads we will not 1184 // have spawned any marking threads either. Hence the number of 1185 // concurrent workers should be 0. 1186 return 0; 1187 } 1188 1189 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1190 // Currently, only survivors can be root regions. 1191 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1192 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1193 1194 const uintx interval = PrefetchScanIntervalInBytes; 1195 HeapWord* curr = hr->bottom(); 1196 const HeapWord* end = hr->top(); 1197 while (curr < end) { 1198 Prefetch::read(curr, interval); 1199 oop obj = oop(curr); 1200 int size = obj->oop_iterate(&cl); 1201 assert(size == obj->size(), "sanity"); 1202 curr += size; 1203 } 1204 } 1205 1206 class CMRootRegionScanTask : public AbstractGangTask { 1207 private: 1208 ConcurrentMark* _cm; 1209 1210 public: 1211 CMRootRegionScanTask(ConcurrentMark* cm) : 1212 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1213 1214 void work(uint worker_id) { 1215 assert(Thread::current()->is_ConcurrentGC_thread(), 1216 "this should only be done by a conc GC thread"); 1217 1218 CMRootRegions* root_regions = _cm->root_regions(); 1219 HeapRegion* hr = root_regions->claim_next(); 1220 while (hr != NULL) { 1221 _cm->scanRootRegion(hr, worker_id); 1222 hr = root_regions->claim_next(); 1223 } 1224 } 1225 }; 1226 1227 void ConcurrentMark::scanRootRegions() { 1228 // Start of concurrent marking. 1229 ClassLoaderDataGraph::clear_claimed_marks(); 1230 1231 // scan_in_progress() will have been set to true only if there was 1232 // at least one root region to scan. So, if it's false, we 1233 // should not attempt to do any further work. 1234 if (root_regions()->scan_in_progress()) { 1235 _parallel_marking_threads = calc_parallel_marking_threads(); 1236 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1237 "Maximum number of marking threads exceeded"); 1238 uint active_workers = MAX2(1U, parallel_marking_threads()); 1239 1240 CMRootRegionScanTask task(this); 1241 if (use_parallel_marking_threads()) { 1242 _parallel_workers->set_active_workers((int) active_workers); 1243 _parallel_workers->run_task(&task); 1244 } else { 1245 task.work(0); 1246 } 1247 1248 // It's possible that has_aborted() is true here without actually 1249 // aborting the survivor scan earlier. This is OK as it's 1250 // mainly used for sanity checking. 1251 root_regions()->scan_finished(); 1252 } 1253 } 1254 1255 void ConcurrentMark::markFromRoots() { 1256 // we might be tempted to assert that: 1257 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1258 // "inconsistent argument?"); 1259 // However that wouldn't be right, because it's possible that 1260 // a safepoint is indeed in progress as a younger generation 1261 // stop-the-world GC happens even as we mark in this generation. 1262 1263 _restart_for_overflow = false; 1264 force_overflow_conc()->init(); 1265 1266 // _g1h has _n_par_threads 1267 _parallel_marking_threads = calc_parallel_marking_threads(); 1268 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1269 "Maximum number of marking threads exceeded"); 1270 1271 uint active_workers = MAX2(1U, parallel_marking_threads()); 1272 1273 // Parallel task terminator is set in "set_concurrency_and_phase()" 1274 set_concurrency_and_phase(active_workers, true /* concurrent */); 1275 1276 CMConcurrentMarkingTask markingTask(this, cmThread()); 1277 if (use_parallel_marking_threads()) { 1278 _parallel_workers->set_active_workers((int)active_workers); 1279 // Don't set _n_par_threads because it affects MT in process_roots() 1280 // and the decisions on that MT processing is made elsewhere. 1281 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1282 _parallel_workers->run_task(&markingTask); 1283 } else { 1284 markingTask.work(0); 1285 } 1286 print_stats(); 1287 } 1288 1289 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1290 // world is stopped at this checkpoint 1291 assert(SafepointSynchronize::is_at_safepoint(), 1292 "world should be stopped"); 1293 1294 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1295 1296 // If a full collection has happened, we shouldn't do this. 1297 if (has_aborted()) { 1298 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1299 return; 1300 } 1301 1302 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1303 1304 if (VerifyDuringGC) { 1305 HandleMark hm; // handle scope 1306 Universe::heap()->prepare_for_verify(); 1307 Universe::verify(VerifyOption_G1UsePrevMarking, 1308 " VerifyDuringGC:(before)"); 1309 } 1310 g1h->check_bitmaps("Remark Start"); 1311 1312 G1CollectorPolicy* g1p = g1h->g1_policy(); 1313 g1p->record_concurrent_mark_remark_start(); 1314 1315 double start = os::elapsedTime(); 1316 1317 checkpointRootsFinalWork(); 1318 1319 double mark_work_end = os::elapsedTime(); 1320 1321 weakRefsWork(clear_all_soft_refs); 1322 1323 if (has_overflown()) { 1324 // Oops. We overflowed. Restart concurrent marking. 1325 _restart_for_overflow = true; 1326 if (G1TraceMarkStackOverflow) { 1327 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1328 } 1329 1330 // Verify the heap w.r.t. the previous marking bitmap. 1331 if (VerifyDuringGC) { 1332 HandleMark hm; // handle scope 1333 Universe::heap()->prepare_for_verify(); 1334 Universe::verify(VerifyOption_G1UsePrevMarking, 1335 " VerifyDuringGC:(overflow)"); 1336 } 1337 1338 // Clear the marking state because we will be restarting 1339 // marking due to overflowing the global mark stack. 1340 reset_marking_state(); 1341 } else { 1342 // Aggregate the per-task counting data that we have accumulated 1343 // while marking. 1344 aggregate_count_data(); 1345 1346 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1347 // We're done with marking. 1348 // This is the end of the marking cycle, we're expected all 1349 // threads to have SATB queues with active set to true. 1350 satb_mq_set.set_active_all_threads(false, /* new active value */ 1351 true /* expected_active */); 1352 1353 if (VerifyDuringGC) { 1354 HandleMark hm; // handle scope 1355 Universe::heap()->prepare_for_verify(); 1356 Universe::verify(VerifyOption_G1UseNextMarking, 1357 " VerifyDuringGC:(after)"); 1358 } 1359 g1h->check_bitmaps("Remark End"); 1360 assert(!restart_for_overflow(), "sanity"); 1361 // Completely reset the marking state since marking completed 1362 set_non_marking_state(); 1363 } 1364 1365 // Expand the marking stack, if we have to and if we can. 1366 if (_markStack.should_expand()) { 1367 _markStack.expand(); 1368 } 1369 1370 // Statistics 1371 double now = os::elapsedTime(); 1372 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1373 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1374 _remark_times.add((now - start) * 1000.0); 1375 1376 g1p->record_concurrent_mark_remark_end(); 1377 1378 G1CMIsAliveClosure is_alive(g1h); 1379 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1380 } 1381 1382 // Base class of the closures that finalize and verify the 1383 // liveness counting data. 1384 class CMCountDataClosureBase: public HeapRegionClosure { 1385 protected: 1386 G1CollectedHeap* _g1h; 1387 ConcurrentMark* _cm; 1388 CardTableModRefBS* _ct_bs; 1389 1390 BitMap* _region_bm; 1391 BitMap* _card_bm; 1392 1393 // Takes a region that's not empty (i.e., it has at least one 1394 // live object in it and sets its corresponding bit on the region 1395 // bitmap to 1. If the region is "starts humongous" it will also set 1396 // to 1 the bits on the region bitmap that correspond to its 1397 // associated "continues humongous" regions. 1398 void set_bit_for_region(HeapRegion* hr) { 1399 assert(!hr->continuesHumongous(), "should have filtered those out"); 1400 1401 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1402 if (!hr->startsHumongous()) { 1403 // Normal (non-humongous) case: just set the bit. 1404 _region_bm->par_at_put(index, true); 1405 } else { 1406 // Starts humongous case: calculate how many regions are part of 1407 // this humongous region and then set the bit range. 1408 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1409 _region_bm->par_at_put_range(index, end_index, true); 1410 } 1411 } 1412 1413 public: 1414 CMCountDataClosureBase(G1CollectedHeap* g1h, 1415 BitMap* region_bm, BitMap* card_bm): 1416 _g1h(g1h), _cm(g1h->concurrent_mark()), 1417 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 1418 _region_bm(region_bm), _card_bm(card_bm) { } 1419 }; 1420 1421 // Closure that calculates the # live objects per region. Used 1422 // for verification purposes during the cleanup pause. 1423 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1424 CMBitMapRO* _bm; 1425 size_t _region_marked_bytes; 1426 1427 public: 1428 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1429 BitMap* region_bm, BitMap* card_bm) : 1430 CMCountDataClosureBase(g1h, region_bm, card_bm), 1431 _bm(bm), _region_marked_bytes(0) { } 1432 1433 bool doHeapRegion(HeapRegion* hr) { 1434 1435 if (hr->continuesHumongous()) { 1436 // We will ignore these here and process them when their 1437 // associated "starts humongous" region is processed (see 1438 // set_bit_for_heap_region()). Note that we cannot rely on their 1439 // associated "starts humongous" region to have their bit set to 1440 // 1 since, due to the region chunking in the parallel region 1441 // iteration, a "continues humongous" region might be visited 1442 // before its associated "starts humongous". 1443 return false; 1444 } 1445 1446 HeapWord* ntams = hr->next_top_at_mark_start(); 1447 HeapWord* start = hr->bottom(); 1448 1449 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1450 err_msg("Preconditions not met - " 1451 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT, 1452 p2i(start), p2i(ntams), p2i(hr->end()))); 1453 1454 // Find the first marked object at or after "start". 1455 start = _bm->getNextMarkedWordAddress(start, ntams); 1456 1457 size_t marked_bytes = 0; 1458 1459 while (start < ntams) { 1460 oop obj = oop(start); 1461 int obj_sz = obj->size(); 1462 HeapWord* obj_end = start + obj_sz; 1463 1464 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1465 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1466 1467 // Note: if we're looking at the last region in heap - obj_end 1468 // could be actually just beyond the end of the heap; end_idx 1469 // will then correspond to a (non-existent) card that is also 1470 // just beyond the heap. 1471 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1472 // end of object is not card aligned - increment to cover 1473 // all the cards spanned by the object 1474 end_idx += 1; 1475 } 1476 1477 // Set the bits in the card BM for the cards spanned by this object. 1478 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1479 1480 // Add the size of this object to the number of marked bytes. 1481 marked_bytes += (size_t)obj_sz * HeapWordSize; 1482 1483 // Find the next marked object after this one. 1484 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1485 } 1486 1487 // Mark the allocated-since-marking portion... 1488 HeapWord* top = hr->top(); 1489 if (ntams < top) { 1490 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1491 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1492 1493 // Note: if we're looking at the last region in heap - top 1494 // could be actually just beyond the end of the heap; end_idx 1495 // will then correspond to a (non-existent) card that is also 1496 // just beyond the heap. 1497 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1498 // end of object is not card aligned - increment to cover 1499 // all the cards spanned by the object 1500 end_idx += 1; 1501 } 1502 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1503 1504 // This definitely means the region has live objects. 1505 set_bit_for_region(hr); 1506 } 1507 1508 // Update the live region bitmap. 1509 if (marked_bytes > 0) { 1510 set_bit_for_region(hr); 1511 } 1512 1513 // Set the marked bytes for the current region so that 1514 // it can be queried by a calling verification routine 1515 _region_marked_bytes = marked_bytes; 1516 1517 return false; 1518 } 1519 1520 size_t region_marked_bytes() const { return _region_marked_bytes; } 1521 }; 1522 1523 // Heap region closure used for verifying the counting data 1524 // that was accumulated concurrently and aggregated during 1525 // the remark pause. This closure is applied to the heap 1526 // regions during the STW cleanup pause. 1527 1528 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1529 G1CollectedHeap* _g1h; 1530 ConcurrentMark* _cm; 1531 CalcLiveObjectsClosure _calc_cl; 1532 BitMap* _region_bm; // Region BM to be verified 1533 BitMap* _card_bm; // Card BM to be verified 1534 bool _verbose; // verbose output? 1535 1536 BitMap* _exp_region_bm; // Expected Region BM values 1537 BitMap* _exp_card_bm; // Expected card BM values 1538 1539 int _failures; 1540 1541 public: 1542 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1543 BitMap* region_bm, 1544 BitMap* card_bm, 1545 BitMap* exp_region_bm, 1546 BitMap* exp_card_bm, 1547 bool verbose) : 1548 _g1h(g1h), _cm(g1h->concurrent_mark()), 1549 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1550 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1551 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1552 _failures(0) { } 1553 1554 int failures() const { return _failures; } 1555 1556 bool doHeapRegion(HeapRegion* hr) { 1557 if (hr->continuesHumongous()) { 1558 // We will ignore these here and process them when their 1559 // associated "starts humongous" region is processed (see 1560 // set_bit_for_heap_region()). Note that we cannot rely on their 1561 // associated "starts humongous" region to have their bit set to 1562 // 1 since, due to the region chunking in the parallel region 1563 // iteration, a "continues humongous" region might be visited 1564 // before its associated "starts humongous". 1565 return false; 1566 } 1567 1568 int failures = 0; 1569 1570 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1571 // this region and set the corresponding bits in the expected region 1572 // and card bitmaps. 1573 bool res = _calc_cl.doHeapRegion(hr); 1574 assert(res == false, "should be continuing"); 1575 1576 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1577 Mutex::_no_safepoint_check_flag); 1578 1579 // Verify the marked bytes for this region. 1580 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1581 size_t act_marked_bytes = hr->next_marked_bytes(); 1582 1583 // We're not OK if expected marked bytes > actual marked bytes. It means 1584 // we have missed accounting some objects during the actual marking. 1585 if (exp_marked_bytes > act_marked_bytes) { 1586 if (_verbose) { 1587 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1588 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1589 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1590 } 1591 failures += 1; 1592 } 1593 1594 // Verify the bit, for this region, in the actual and expected 1595 // (which was just calculated) region bit maps. 1596 // We're not OK if the bit in the calculated expected region 1597 // bitmap is set and the bit in the actual region bitmap is not. 1598 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1599 1600 bool expected = _exp_region_bm->at(index); 1601 bool actual = _region_bm->at(index); 1602 if (expected && !actual) { 1603 if (_verbose) { 1604 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1605 "expected: %s, actual: %s", 1606 hr->hrs_index(), 1607 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1608 } 1609 failures += 1; 1610 } 1611 1612 // Verify that the card bit maps for the cards spanned by the current 1613 // region match. We have an error if we have a set bit in the expected 1614 // bit map and the corresponding bit in the actual bitmap is not set. 1615 1616 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1617 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1618 1619 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1620 expected = _exp_card_bm->at(i); 1621 actual = _card_bm->at(i); 1622 1623 if (expected && !actual) { 1624 if (_verbose) { 1625 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1626 "expected: %s, actual: %s", 1627 hr->hrs_index(), i, 1628 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1629 } 1630 failures += 1; 1631 } 1632 } 1633 1634 if (failures > 0 && _verbose) { 1635 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1636 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1637 HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()), 1638 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1639 } 1640 1641 _failures += failures; 1642 1643 // We could stop iteration over the heap when we 1644 // find the first violating region by returning true. 1645 return false; 1646 } 1647 }; 1648 1649 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1650 protected: 1651 G1CollectedHeap* _g1h; 1652 ConcurrentMark* _cm; 1653 BitMap* _actual_region_bm; 1654 BitMap* _actual_card_bm; 1655 1656 uint _n_workers; 1657 1658 BitMap* _expected_region_bm; 1659 BitMap* _expected_card_bm; 1660 1661 int _failures; 1662 bool _verbose; 1663 1664 public: 1665 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1666 BitMap* region_bm, BitMap* card_bm, 1667 BitMap* expected_region_bm, BitMap* expected_card_bm) 1668 : AbstractGangTask("G1 verify final counting"), 1669 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1670 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1671 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1672 _failures(0), _verbose(false), 1673 _n_workers(0) { 1674 assert(VerifyDuringGC, "don't call this otherwise"); 1675 1676 // Use the value already set as the number of active threads 1677 // in the call to run_task(). 1678 if (G1CollectedHeap::use_parallel_gc_threads()) { 1679 assert( _g1h->workers()->active_workers() > 0, 1680 "Should have been previously set"); 1681 _n_workers = _g1h->workers()->active_workers(); 1682 } else { 1683 _n_workers = 1; 1684 } 1685 1686 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1687 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1688 1689 _verbose = _cm->verbose_medium(); 1690 } 1691 1692 void work(uint worker_id) { 1693 assert(worker_id < _n_workers, "invariant"); 1694 1695 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1696 _actual_region_bm, _actual_card_bm, 1697 _expected_region_bm, 1698 _expected_card_bm, 1699 _verbose); 1700 1701 if (G1CollectedHeap::use_parallel_gc_threads()) { 1702 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1703 worker_id, 1704 _n_workers, 1705 HeapRegion::VerifyCountClaimValue); 1706 } else { 1707 _g1h->heap_region_iterate(&verify_cl); 1708 } 1709 1710 Atomic::add(verify_cl.failures(), &_failures); 1711 } 1712 1713 int failures() const { return _failures; } 1714 }; 1715 1716 // Closure that finalizes the liveness counting data. 1717 // Used during the cleanup pause. 1718 // Sets the bits corresponding to the interval [NTAMS, top] 1719 // (which contains the implicitly live objects) in the 1720 // card liveness bitmap. Also sets the bit for each region, 1721 // containing live data, in the region liveness bitmap. 1722 1723 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1724 public: 1725 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1726 BitMap* region_bm, 1727 BitMap* card_bm) : 1728 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1729 1730 bool doHeapRegion(HeapRegion* hr) { 1731 1732 if (hr->continuesHumongous()) { 1733 // We will ignore these here and process them when their 1734 // associated "starts humongous" region is processed (see 1735 // set_bit_for_heap_region()). Note that we cannot rely on their 1736 // associated "starts humongous" region to have their bit set to 1737 // 1 since, due to the region chunking in the parallel region 1738 // iteration, a "continues humongous" region might be visited 1739 // before its associated "starts humongous". 1740 return false; 1741 } 1742 1743 HeapWord* ntams = hr->next_top_at_mark_start(); 1744 HeapWord* top = hr->top(); 1745 1746 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1747 1748 // Mark the allocated-since-marking portion... 1749 if (ntams < top) { 1750 // This definitely means the region has live objects. 1751 set_bit_for_region(hr); 1752 1753 // Now set the bits in the card bitmap for [ntams, top) 1754 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1755 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1756 1757 // Note: if we're looking at the last region in heap - top 1758 // could be actually just beyond the end of the heap; end_idx 1759 // will then correspond to a (non-existent) card that is also 1760 // just beyond the heap. 1761 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1762 // end of object is not card aligned - increment to cover 1763 // all the cards spanned by the object 1764 end_idx += 1; 1765 } 1766 1767 assert(end_idx <= _card_bm->size(), 1768 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1769 end_idx, _card_bm->size())); 1770 assert(start_idx < _card_bm->size(), 1771 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1772 start_idx, _card_bm->size())); 1773 1774 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1775 } 1776 1777 // Set the bit for the region if it contains live data 1778 if (hr->next_marked_bytes() > 0) { 1779 set_bit_for_region(hr); 1780 } 1781 1782 return false; 1783 } 1784 }; 1785 1786 class G1ParFinalCountTask: public AbstractGangTask { 1787 protected: 1788 G1CollectedHeap* _g1h; 1789 ConcurrentMark* _cm; 1790 BitMap* _actual_region_bm; 1791 BitMap* _actual_card_bm; 1792 1793 uint _n_workers; 1794 1795 public: 1796 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1797 : AbstractGangTask("G1 final counting"), 1798 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1799 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1800 _n_workers(0) { 1801 // Use the value already set as the number of active threads 1802 // in the call to run_task(). 1803 if (G1CollectedHeap::use_parallel_gc_threads()) { 1804 assert( _g1h->workers()->active_workers() > 0, 1805 "Should have been previously set"); 1806 _n_workers = _g1h->workers()->active_workers(); 1807 } else { 1808 _n_workers = 1; 1809 } 1810 } 1811 1812 void work(uint worker_id) { 1813 assert(worker_id < _n_workers, "invariant"); 1814 1815 FinalCountDataUpdateClosure final_update_cl(_g1h, 1816 _actual_region_bm, 1817 _actual_card_bm); 1818 1819 if (G1CollectedHeap::use_parallel_gc_threads()) { 1820 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1821 worker_id, 1822 _n_workers, 1823 HeapRegion::FinalCountClaimValue); 1824 } else { 1825 _g1h->heap_region_iterate(&final_update_cl); 1826 } 1827 } 1828 }; 1829 1830 class G1ParNoteEndTask; 1831 1832 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1833 G1CollectedHeap* _g1; 1834 size_t _max_live_bytes; 1835 uint _regions_claimed; 1836 size_t _freed_bytes; 1837 FreeRegionList* _local_cleanup_list; 1838 HeapRegionSetCount _old_regions_removed; 1839 HeapRegionSetCount _humongous_regions_removed; 1840 HRRSCleanupTask* _hrrs_cleanup_task; 1841 double _claimed_region_time; 1842 double _max_region_time; 1843 1844 public: 1845 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1846 FreeRegionList* local_cleanup_list, 1847 HRRSCleanupTask* hrrs_cleanup_task) : 1848 _g1(g1), 1849 _max_live_bytes(0), _regions_claimed(0), 1850 _freed_bytes(0), 1851 _claimed_region_time(0.0), _max_region_time(0.0), 1852 _local_cleanup_list(local_cleanup_list), 1853 _old_regions_removed(), 1854 _humongous_regions_removed(), 1855 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1856 1857 size_t freed_bytes() { return _freed_bytes; } 1858 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; } 1859 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } 1860 1861 bool doHeapRegion(HeapRegion *hr) { 1862 if (hr->continuesHumongous()) { 1863 return false; 1864 } 1865 // We use a claim value of zero here because all regions 1866 // were claimed with value 1 in the FinalCount task. 1867 _g1->reset_gc_time_stamps(hr); 1868 double start = os::elapsedTime(); 1869 _regions_claimed++; 1870 hr->note_end_of_marking(); 1871 _max_live_bytes += hr->max_live_bytes(); 1872 1873 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1874 _freed_bytes += hr->used(); 1875 hr->set_containing_set(NULL); 1876 if (hr->isHumongous()) { 1877 assert(hr->startsHumongous(), "we should only see starts humongous"); 1878 _humongous_regions_removed.increment(1u, hr->capacity()); 1879 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1880 } else { 1881 _old_regions_removed.increment(1u, hr->capacity()); 1882 _g1->free_region(hr, _local_cleanup_list, true); 1883 } 1884 } else { 1885 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1886 } 1887 1888 double region_time = (os::elapsedTime() - start); 1889 _claimed_region_time += region_time; 1890 if (region_time > _max_region_time) { 1891 _max_region_time = region_time; 1892 } 1893 return false; 1894 } 1895 1896 size_t max_live_bytes() { return _max_live_bytes; } 1897 uint regions_claimed() { return _regions_claimed; } 1898 double claimed_region_time_sec() { return _claimed_region_time; } 1899 double max_region_time_sec() { return _max_region_time; } 1900 }; 1901 1902 class G1ParNoteEndTask: public AbstractGangTask { 1903 friend class G1NoteEndOfConcMarkClosure; 1904 1905 protected: 1906 G1CollectedHeap* _g1h; 1907 size_t _max_live_bytes; 1908 size_t _freed_bytes; 1909 FreeRegionList* _cleanup_list; 1910 1911 public: 1912 G1ParNoteEndTask(G1CollectedHeap* g1h, 1913 FreeRegionList* cleanup_list) : 1914 AbstractGangTask("G1 note end"), _g1h(g1h), 1915 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1916 1917 void work(uint worker_id) { 1918 double start = os::elapsedTime(); 1919 FreeRegionList local_cleanup_list("Local Cleanup List"); 1920 HRRSCleanupTask hrrs_cleanup_task; 1921 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1922 &hrrs_cleanup_task); 1923 if (G1CollectedHeap::use_parallel_gc_threads()) { 1924 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1925 _g1h->workers()->active_workers(), 1926 HeapRegion::NoteEndClaimValue); 1927 } else { 1928 _g1h->heap_region_iterate(&g1_note_end); 1929 } 1930 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1931 1932 // Now update the lists 1933 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1934 { 1935 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1936 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1937 _max_live_bytes += g1_note_end.max_live_bytes(); 1938 _freed_bytes += g1_note_end.freed_bytes(); 1939 1940 // If we iterate over the global cleanup list at the end of 1941 // cleanup to do this printing we will not guarantee to only 1942 // generate output for the newly-reclaimed regions (the list 1943 // might not be empty at the beginning of cleanup; we might 1944 // still be working on its previous contents). So we do the 1945 // printing here, before we append the new regions to the global 1946 // cleanup list. 1947 1948 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1949 if (hr_printer->is_active()) { 1950 FreeRegionListIterator iter(&local_cleanup_list); 1951 while (iter.more_available()) { 1952 HeapRegion* hr = iter.get_next(); 1953 hr_printer->cleanup(hr); 1954 } 1955 } 1956 1957 _cleanup_list->add_ordered(&local_cleanup_list); 1958 assert(local_cleanup_list.is_empty(), "post-condition"); 1959 1960 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1961 } 1962 } 1963 size_t max_live_bytes() { return _max_live_bytes; } 1964 size_t freed_bytes() { return _freed_bytes; } 1965 }; 1966 1967 class G1ParScrubRemSetTask: public AbstractGangTask { 1968 protected: 1969 G1RemSet* _g1rs; 1970 BitMap* _region_bm; 1971 BitMap* _card_bm; 1972 public: 1973 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1974 BitMap* region_bm, BitMap* card_bm) : 1975 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1976 _region_bm(region_bm), _card_bm(card_bm) { } 1977 1978 void work(uint worker_id) { 1979 if (G1CollectedHeap::use_parallel_gc_threads()) { 1980 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1981 HeapRegion::ScrubRemSetClaimValue); 1982 } else { 1983 _g1rs->scrub(_region_bm, _card_bm); 1984 } 1985 } 1986 1987 }; 1988 1989 void ConcurrentMark::cleanup() { 1990 // world is stopped at this checkpoint 1991 assert(SafepointSynchronize::is_at_safepoint(), 1992 "world should be stopped"); 1993 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1994 1995 // If a full collection has happened, we shouldn't do this. 1996 if (has_aborted()) { 1997 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1998 return; 1999 } 2000 2001 g1h->verify_region_sets_optional(); 2002 2003 if (VerifyDuringGC) { 2004 HandleMark hm; // handle scope 2005 Universe::heap()->prepare_for_verify(); 2006 Universe::verify(VerifyOption_G1UsePrevMarking, 2007 " VerifyDuringGC:(before)"); 2008 } 2009 g1h->check_bitmaps("Cleanup Start"); 2010 2011 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 2012 g1p->record_concurrent_mark_cleanup_start(); 2013 2014 double start = os::elapsedTime(); 2015 2016 HeapRegionRemSet::reset_for_cleanup_tasks(); 2017 2018 uint n_workers; 2019 2020 // Do counting once more with the world stopped for good measure. 2021 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 2022 2023 if (G1CollectedHeap::use_parallel_gc_threads()) { 2024 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 2025 "sanity check"); 2026 2027 g1h->set_par_threads(); 2028 n_workers = g1h->n_par_threads(); 2029 assert(g1h->n_par_threads() == n_workers, 2030 "Should not have been reset"); 2031 g1h->workers()->run_task(&g1_par_count_task); 2032 // Done with the parallel phase so reset to 0. 2033 g1h->set_par_threads(0); 2034 2035 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 2036 "sanity check"); 2037 } else { 2038 n_workers = 1; 2039 g1_par_count_task.work(0); 2040 } 2041 2042 if (VerifyDuringGC) { 2043 // Verify that the counting data accumulated during marking matches 2044 // that calculated by walking the marking bitmap. 2045 2046 // Bitmaps to hold expected values 2047 BitMap expected_region_bm(_region_bm.size(), true); 2048 BitMap expected_card_bm(_card_bm.size(), true); 2049 2050 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 2051 &_region_bm, 2052 &_card_bm, 2053 &expected_region_bm, 2054 &expected_card_bm); 2055 2056 if (G1CollectedHeap::use_parallel_gc_threads()) { 2057 g1h->set_par_threads((int)n_workers); 2058 g1h->workers()->run_task(&g1_par_verify_task); 2059 // Done with the parallel phase so reset to 0. 2060 g1h->set_par_threads(0); 2061 2062 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 2063 "sanity check"); 2064 } else { 2065 g1_par_verify_task.work(0); 2066 } 2067 2068 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 2069 } 2070 2071 size_t start_used_bytes = g1h->used(); 2072 g1h->set_marking_complete(); 2073 2074 double count_end = os::elapsedTime(); 2075 double this_final_counting_time = (count_end - start); 2076 _total_counting_time += this_final_counting_time; 2077 2078 if (G1PrintRegionLivenessInfo) { 2079 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 2080 _g1h->heap_region_iterate(&cl); 2081 } 2082 2083 // Install newly created mark bitMap as "prev". 2084 swapMarkBitMaps(); 2085 2086 g1h->reset_gc_time_stamp(); 2087 2088 // Note end of marking in all heap regions. 2089 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 2090 if (G1CollectedHeap::use_parallel_gc_threads()) { 2091 g1h->set_par_threads((int)n_workers); 2092 g1h->workers()->run_task(&g1_par_note_end_task); 2093 g1h->set_par_threads(0); 2094 2095 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 2096 "sanity check"); 2097 } else { 2098 g1_par_note_end_task.work(0); 2099 } 2100 g1h->check_gc_time_stamps(); 2101 2102 if (!cleanup_list_is_empty()) { 2103 // The cleanup list is not empty, so we'll have to process it 2104 // concurrently. Notify anyone else that might be wanting free 2105 // regions that there will be more free regions coming soon. 2106 g1h->set_free_regions_coming(); 2107 } 2108 2109 // call below, since it affects the metric by which we sort the heap 2110 // regions. 2111 if (G1ScrubRemSets) { 2112 double rs_scrub_start = os::elapsedTime(); 2113 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 2114 if (G1CollectedHeap::use_parallel_gc_threads()) { 2115 g1h->set_par_threads((int)n_workers); 2116 g1h->workers()->run_task(&g1_par_scrub_rs_task); 2117 g1h->set_par_threads(0); 2118 2119 assert(g1h->check_heap_region_claim_values( 2120 HeapRegion::ScrubRemSetClaimValue), 2121 "sanity check"); 2122 } else { 2123 g1_par_scrub_rs_task.work(0); 2124 } 2125 2126 double rs_scrub_end = os::elapsedTime(); 2127 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 2128 _total_rs_scrub_time += this_rs_scrub_time; 2129 } 2130 2131 // this will also free any regions totally full of garbage objects, 2132 // and sort the regions. 2133 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 2134 2135 // Statistics. 2136 double end = os::elapsedTime(); 2137 _cleanup_times.add((end - start) * 1000.0); 2138 2139 if (G1Log::fine()) { 2140 g1h->print_size_transition(gclog_or_tty, 2141 start_used_bytes, 2142 g1h->used(), 2143 g1h->capacity()); 2144 } 2145 2146 // Clean up will have freed any regions completely full of garbage. 2147 // Update the soft reference policy with the new heap occupancy. 2148 Universe::update_heap_info_at_gc(); 2149 2150 if (VerifyDuringGC) { 2151 HandleMark hm; // handle scope 2152 Universe::heap()->prepare_for_verify(); 2153 Universe::verify(VerifyOption_G1UsePrevMarking, 2154 " VerifyDuringGC:(after)"); 2155 } 2156 2157 g1h->check_bitmaps("Cleanup End"); 2158 2159 g1h->verify_region_sets_optional(); 2160 2161 // We need to make this be a "collection" so any collection pause that 2162 // races with it goes around and waits for completeCleanup to finish. 2163 g1h->increment_total_collections(); 2164 2165 // Clean out dead classes and update Metaspace sizes. 2166 ClassLoaderDataGraph::purge(); 2167 MetaspaceGC::compute_new_size(); 2168 2169 // We reclaimed old regions so we should calculate the sizes to make 2170 // sure we update the old gen/space data. 2171 g1h->g1mm()->update_sizes(); 2172 2173 g1h->trace_heap_after_concurrent_cycle(); 2174 } 2175 2176 void ConcurrentMark::completeCleanup() { 2177 if (has_aborted()) return; 2178 2179 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2180 2181 _cleanup_list.verify_optional(); 2182 FreeRegionList tmp_free_list("Tmp Free List"); 2183 2184 if (G1ConcRegionFreeingVerbose) { 2185 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2186 "cleanup list has %u entries", 2187 _cleanup_list.length()); 2188 } 2189 2190 // Noone else should be accessing the _cleanup_list at this point, 2191 // so it's not necessary to take any locks 2192 while (!_cleanup_list.is_empty()) { 2193 HeapRegion* hr = _cleanup_list.remove_head(); 2194 assert(hr != NULL, "Got NULL from a non-empty list"); 2195 hr->par_clear(); 2196 tmp_free_list.add_ordered(hr); 2197 2198 // Instead of adding one region at a time to the secondary_free_list, 2199 // we accumulate them in the local list and move them a few at a 2200 // time. This also cuts down on the number of notify_all() calls 2201 // we do during this process. We'll also append the local list when 2202 // _cleanup_list is empty (which means we just removed the last 2203 // region from the _cleanup_list). 2204 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2205 _cleanup_list.is_empty()) { 2206 if (G1ConcRegionFreeingVerbose) { 2207 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2208 "appending %u entries to the secondary_free_list, " 2209 "cleanup list still has %u entries", 2210 tmp_free_list.length(), 2211 _cleanup_list.length()); 2212 } 2213 2214 { 2215 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2216 g1h->secondary_free_list_add(&tmp_free_list); 2217 SecondaryFreeList_lock->notify_all(); 2218 } 2219 2220 if (G1StressConcRegionFreeing) { 2221 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2222 os::sleep(Thread::current(), (jlong) 1, false); 2223 } 2224 } 2225 } 2226 } 2227 assert(tmp_free_list.is_empty(), "post-condition"); 2228 } 2229 2230 // Supporting Object and Oop closures for reference discovery 2231 // and processing in during marking 2232 2233 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2234 HeapWord* addr = (HeapWord*)obj; 2235 return addr != NULL && 2236 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2237 } 2238 2239 // 'Keep Alive' oop closure used by both serial parallel reference processing. 2240 // Uses the CMTask associated with a worker thread (for serial reference 2241 // processing the CMTask for worker 0 is used) to preserve (mark) and 2242 // trace referent objects. 2243 // 2244 // Using the CMTask and embedded local queues avoids having the worker 2245 // threads operating on the global mark stack. This reduces the risk 2246 // of overflowing the stack - which we would rather avoid at this late 2247 // state. Also using the tasks' local queues removes the potential 2248 // of the workers interfering with each other that could occur if 2249 // operating on the global stack. 2250 2251 class G1CMKeepAliveAndDrainClosure: public OopClosure { 2252 ConcurrentMark* _cm; 2253 CMTask* _task; 2254 int _ref_counter_limit; 2255 int _ref_counter; 2256 bool _is_serial; 2257 public: 2258 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2259 _cm(cm), _task(task), _is_serial(is_serial), 2260 _ref_counter_limit(G1RefProcDrainInterval) { 2261 assert(_ref_counter_limit > 0, "sanity"); 2262 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2263 _ref_counter = _ref_counter_limit; 2264 } 2265 2266 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2267 virtual void do_oop( oop* p) { do_oop_work(p); } 2268 2269 template <class T> void do_oop_work(T* p) { 2270 if (!_cm->has_overflown()) { 2271 oop obj = oopDesc::load_decode_heap_oop(p); 2272 if (_cm->verbose_high()) { 2273 gclog_or_tty->print_cr("\t[%u] we're looking at location " 2274 "*"PTR_FORMAT" = "PTR_FORMAT, 2275 _task->worker_id(), p2i(p), p2i((void*) obj)); 2276 } 2277 2278 _task->deal_with_reference(obj); 2279 _ref_counter--; 2280 2281 if (_ref_counter == 0) { 2282 // We have dealt with _ref_counter_limit references, pushing them 2283 // and objects reachable from them on to the local stack (and 2284 // possibly the global stack). Call CMTask::do_marking_step() to 2285 // process these entries. 2286 // 2287 // We call CMTask::do_marking_step() in a loop, which we'll exit if 2288 // there's nothing more to do (i.e. we're done with the entries that 2289 // were pushed as a result of the CMTask::deal_with_reference() calls 2290 // above) or we overflow. 2291 // 2292 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2293 // flag while there may still be some work to do. (See the comment at 2294 // the beginning of CMTask::do_marking_step() for those conditions - 2295 // one of which is reaching the specified time target.) It is only 2296 // when CMTask::do_marking_step() returns without setting the 2297 // has_aborted() flag that the marking step has completed. 2298 do { 2299 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2300 _task->do_marking_step(mark_step_duration_ms, 2301 false /* do_termination */, 2302 _is_serial); 2303 } while (_task->has_aborted() && !_cm->has_overflown()); 2304 _ref_counter = _ref_counter_limit; 2305 } 2306 } else { 2307 if (_cm->verbose_high()) { 2308 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id()); 2309 } 2310 } 2311 } 2312 }; 2313 2314 // 'Drain' oop closure used by both serial and parallel reference processing. 2315 // Uses the CMTask associated with a given worker thread (for serial 2316 // reference processing the CMtask for worker 0 is used). Calls the 2317 // do_marking_step routine, with an unbelievably large timeout value, 2318 // to drain the marking data structures of the remaining entries 2319 // added by the 'keep alive' oop closure above. 2320 2321 class G1CMDrainMarkingStackClosure: public VoidClosure { 2322 ConcurrentMark* _cm; 2323 CMTask* _task; 2324 bool _is_serial; 2325 public: 2326 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2327 _cm(cm), _task(task), _is_serial(is_serial) { 2328 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2329 } 2330 2331 void do_void() { 2332 do { 2333 if (_cm->verbose_high()) { 2334 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s", 2335 _task->worker_id(), BOOL_TO_STR(_is_serial)); 2336 } 2337 2338 // We call CMTask::do_marking_step() to completely drain the local 2339 // and global marking stacks of entries pushed by the 'keep alive' 2340 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2341 // 2342 // CMTask::do_marking_step() is called in a loop, which we'll exit 2343 // if there's nothing more to do (i.e. we've completely drained the 2344 // entries that were pushed as a a result of applying the 'keep alive' 2345 // closure to the entries on the discovered ref lists) or we overflow 2346 // the global marking stack. 2347 // 2348 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2349 // flag while there may still be some work to do. (See the comment at 2350 // the beginning of CMTask::do_marking_step() for those conditions - 2351 // one of which is reaching the specified time target.) It is only 2352 // when CMTask::do_marking_step() returns without setting the 2353 // has_aborted() flag that the marking step has completed. 2354 2355 _task->do_marking_step(1000000000.0 /* something very large */, 2356 true /* do_termination */, 2357 _is_serial); 2358 } while (_task->has_aborted() && !_cm->has_overflown()); 2359 } 2360 }; 2361 2362 // Implementation of AbstractRefProcTaskExecutor for parallel 2363 // reference processing at the end of G1 concurrent marking 2364 2365 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2366 private: 2367 G1CollectedHeap* _g1h; 2368 ConcurrentMark* _cm; 2369 WorkGang* _workers; 2370 int _active_workers; 2371 2372 public: 2373 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2374 ConcurrentMark* cm, 2375 WorkGang* workers, 2376 int n_workers) : 2377 _g1h(g1h), _cm(cm), 2378 _workers(workers), _active_workers(n_workers) { } 2379 2380 // Executes the given task using concurrent marking worker threads. 2381 virtual void execute(ProcessTask& task); 2382 virtual void execute(EnqueueTask& task); 2383 }; 2384 2385 class G1CMRefProcTaskProxy: public AbstractGangTask { 2386 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2387 ProcessTask& _proc_task; 2388 G1CollectedHeap* _g1h; 2389 ConcurrentMark* _cm; 2390 2391 public: 2392 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2393 G1CollectedHeap* g1h, 2394 ConcurrentMark* cm) : 2395 AbstractGangTask("Process reference objects in parallel"), 2396 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2397 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2398 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2399 } 2400 2401 virtual void work(uint worker_id) { 2402 CMTask* task = _cm->task(worker_id); 2403 G1CMIsAliveClosure g1_is_alive(_g1h); 2404 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2405 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2406 2407 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2408 } 2409 }; 2410 2411 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2412 assert(_workers != NULL, "Need parallel worker threads."); 2413 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2414 2415 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2416 2417 // We need to reset the concurrency level before each 2418 // proxy task execution, so that the termination protocol 2419 // and overflow handling in CMTask::do_marking_step() knows 2420 // how many workers to wait for. 2421 _cm->set_concurrency(_active_workers); 2422 _g1h->set_par_threads(_active_workers); 2423 _workers->run_task(&proc_task_proxy); 2424 _g1h->set_par_threads(0); 2425 } 2426 2427 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2428 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2429 EnqueueTask& _enq_task; 2430 2431 public: 2432 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2433 AbstractGangTask("Enqueue reference objects in parallel"), 2434 _enq_task(enq_task) { } 2435 2436 virtual void work(uint worker_id) { 2437 _enq_task.work(worker_id); 2438 } 2439 }; 2440 2441 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2442 assert(_workers != NULL, "Need parallel worker threads."); 2443 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2444 2445 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2446 2447 // Not strictly necessary but... 2448 // 2449 // We need to reset the concurrency level before each 2450 // proxy task execution, so that the termination protocol 2451 // and overflow handling in CMTask::do_marking_step() knows 2452 // how many workers to wait for. 2453 _cm->set_concurrency(_active_workers); 2454 _g1h->set_par_threads(_active_workers); 2455 _workers->run_task(&enq_task_proxy); 2456 _g1h->set_par_threads(0); 2457 } 2458 2459 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 2460 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 2461 } 2462 2463 // Helper class to get rid of some boilerplate code. 2464 class G1RemarkGCTraceTime : public GCTraceTime { 2465 static bool doit_and_prepend(bool doit) { 2466 if (doit) { 2467 gclog_or_tty->put(' '); 2468 } 2469 return doit; 2470 } 2471 2472 public: 2473 G1RemarkGCTraceTime(const char* title, bool doit) 2474 : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(), 2475 G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) { 2476 } 2477 }; 2478 2479 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2480 if (has_overflown()) { 2481 // Skip processing the discovered references if we have 2482 // overflown the global marking stack. Reference objects 2483 // only get discovered once so it is OK to not 2484 // de-populate the discovered reference lists. We could have, 2485 // but the only benefit would be that, when marking restarts, 2486 // less reference objects are discovered. 2487 return; 2488 } 2489 2490 ResourceMark rm; 2491 HandleMark hm; 2492 2493 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2494 2495 // Is alive closure. 2496 G1CMIsAliveClosure g1_is_alive(g1h); 2497 2498 // Inner scope to exclude the cleaning of the string and symbol 2499 // tables from the displayed time. 2500 { 2501 if (G1Log::finer()) { 2502 gclog_or_tty->put(' '); 2503 } 2504 GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm(), concurrent_gc_id()); 2505 2506 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2507 2508 // See the comment in G1CollectedHeap::ref_processing_init() 2509 // about how reference processing currently works in G1. 2510 2511 // Set the soft reference policy 2512 rp->setup_policy(clear_all_soft_refs); 2513 assert(_markStack.isEmpty(), "mark stack should be empty"); 2514 2515 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2516 // in serial reference processing. Note these closures are also 2517 // used for serially processing (by the the current thread) the 2518 // JNI references during parallel reference processing. 2519 // 2520 // These closures do not need to synchronize with the worker 2521 // threads involved in parallel reference processing as these 2522 // instances are executed serially by the current thread (e.g. 2523 // reference processing is not multi-threaded and is thus 2524 // performed by the current thread instead of a gang worker). 2525 // 2526 // The gang tasks involved in parallel reference processing create 2527 // their own instances of these closures, which do their own 2528 // synchronization among themselves. 2529 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2530 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2531 2532 // We need at least one active thread. If reference processing 2533 // is not multi-threaded we use the current (VMThread) thread, 2534 // otherwise we use the work gang from the G1CollectedHeap and 2535 // we utilize all the worker threads we can. 2536 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL; 2537 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2538 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2539 2540 // Parallel processing task executor. 2541 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2542 g1h->workers(), active_workers); 2543 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2544 2545 // Set the concurrency level. The phase was already set prior to 2546 // executing the remark task. 2547 set_concurrency(active_workers); 2548 2549 // Set the degree of MT processing here. If the discovery was done MT, 2550 // the number of threads involved during discovery could differ from 2551 // the number of active workers. This is OK as long as the discovered 2552 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2553 rp->set_active_mt_degree(active_workers); 2554 2555 // Process the weak references. 2556 const ReferenceProcessorStats& stats = 2557 rp->process_discovered_references(&g1_is_alive, 2558 &g1_keep_alive, 2559 &g1_drain_mark_stack, 2560 executor, 2561 g1h->gc_timer_cm(), 2562 concurrent_gc_id()); 2563 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2564 2565 // The do_oop work routines of the keep_alive and drain_marking_stack 2566 // oop closures will set the has_overflown flag if we overflow the 2567 // global marking stack. 2568 2569 assert(_markStack.overflow() || _markStack.isEmpty(), 2570 "mark stack should be empty (unless it overflowed)"); 2571 2572 if (_markStack.overflow()) { 2573 // This should have been done already when we tried to push an 2574 // entry on to the global mark stack. But let's do it again. 2575 set_has_overflown(); 2576 } 2577 2578 assert(rp->num_q() == active_workers, "why not"); 2579 2580 rp->enqueue_discovered_references(executor); 2581 2582 rp->verify_no_references_recorded(); 2583 assert(!rp->discovery_enabled(), "Post condition"); 2584 } 2585 2586 if (has_overflown()) { 2587 // We can not trust g1_is_alive if the marking stack overflowed 2588 return; 2589 } 2590 2591 assert(_markStack.isEmpty(), "Marking should have completed"); 2592 2593 // Unload Klasses, String, Symbols, Code Cache, etc. 2594 2595 G1RemarkGCTraceTime trace("Unloading", G1Log::finer()); 2596 2597 bool purged_classes; 2598 2599 { 2600 G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest()); 2601 purged_classes = SystemDictionary::do_unloading(&g1_is_alive); 2602 } 2603 2604 { 2605 G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest()); 2606 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2607 } 2608 2609 if (G1StringDedup::is_enabled()) { 2610 G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest()); 2611 G1StringDedup::unlink(&g1_is_alive); 2612 } 2613 } 2614 2615 void ConcurrentMark::swapMarkBitMaps() { 2616 CMBitMapRO* temp = _prevMarkBitMap; 2617 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2618 _nextMarkBitMap = (CMBitMap*) temp; 2619 } 2620 2621 class CMObjectClosure; 2622 2623 // Closure for iterating over objects, currently only used for 2624 // processing SATB buffers. 2625 class CMObjectClosure : public ObjectClosure { 2626 private: 2627 CMTask* _task; 2628 2629 public: 2630 void do_object(oop obj) { 2631 _task->deal_with_reference(obj); 2632 } 2633 2634 CMObjectClosure(CMTask* task) : _task(task) { } 2635 }; 2636 2637 class G1RemarkThreadsClosure : public ThreadClosure { 2638 CMObjectClosure _cm_obj; 2639 G1CMOopClosure _cm_cl; 2640 MarkingCodeBlobClosure _code_cl; 2641 int _thread_parity; 2642 bool _is_par; 2643 2644 public: 2645 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) : 2646 _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2647 _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {} 2648 2649 void do_thread(Thread* thread) { 2650 if (thread->is_Java_thread()) { 2651 if (thread->claim_oops_do(_is_par, _thread_parity)) { 2652 JavaThread* jt = (JavaThread*)thread; 2653 2654 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2655 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2656 // * Alive if on the stack of an executing method 2657 // * Weakly reachable otherwise 2658 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2659 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2660 jt->nmethods_do(&_code_cl); 2661 2662 jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj); 2663 } 2664 } else if (thread->is_VM_thread()) { 2665 if (thread->claim_oops_do(_is_par, _thread_parity)) { 2666 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj); 2667 } 2668 } 2669 } 2670 }; 2671 2672 class CMRemarkTask: public AbstractGangTask { 2673 private: 2674 ConcurrentMark* _cm; 2675 bool _is_serial; 2676 public: 2677 void work(uint worker_id) { 2678 // Since all available tasks are actually started, we should 2679 // only proceed if we're supposed to be active. 2680 if (worker_id < _cm->active_tasks()) { 2681 CMTask* task = _cm->task(worker_id); 2682 task->record_start_time(); 2683 { 2684 ResourceMark rm; 2685 HandleMark hm; 2686 2687 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task, !_is_serial); 2688 Threads::threads_do(&threads_f); 2689 } 2690 2691 do { 2692 task->do_marking_step(1000000000.0 /* something very large */, 2693 true /* do_termination */, 2694 _is_serial); 2695 } while (task->has_aborted() && !_cm->has_overflown()); 2696 // If we overflow, then we do not want to restart. We instead 2697 // want to abort remark and do concurrent marking again. 2698 task->record_end_time(); 2699 } 2700 } 2701 2702 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) : 2703 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) { 2704 _cm->terminator()->reset_for_reuse(active_workers); 2705 } 2706 }; 2707 2708 void ConcurrentMark::checkpointRootsFinalWork() { 2709 ResourceMark rm; 2710 HandleMark hm; 2711 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2712 2713 G1RemarkGCTraceTime trace("Finalize Marking", G1Log::finer()); 2714 2715 g1h->ensure_parsability(false); 2716 2717 if (G1CollectedHeap::use_parallel_gc_threads()) { 2718 G1CollectedHeap::StrongRootsScope srs(g1h); 2719 // this is remark, so we'll use up all active threads 2720 uint active_workers = g1h->workers()->active_workers(); 2721 if (active_workers == 0) { 2722 assert(active_workers > 0, "Should have been set earlier"); 2723 active_workers = (uint) ParallelGCThreads; 2724 g1h->workers()->set_active_workers(active_workers); 2725 } 2726 set_concurrency_and_phase(active_workers, false /* concurrent */); 2727 // Leave _parallel_marking_threads at it's 2728 // value originally calculated in the ConcurrentMark 2729 // constructor and pass values of the active workers 2730 // through the gang in the task. 2731 2732 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */); 2733 // We will start all available threads, even if we decide that the 2734 // active_workers will be fewer. The extra ones will just bail out 2735 // immediately. 2736 g1h->set_par_threads(active_workers); 2737 g1h->workers()->run_task(&remarkTask); 2738 g1h->set_par_threads(0); 2739 } else { 2740 G1CollectedHeap::StrongRootsScope srs(g1h); 2741 uint active_workers = 1; 2742 set_concurrency_and_phase(active_workers, false /* concurrent */); 2743 2744 // Note - if there's no work gang then the VMThread will be 2745 // the thread to execute the remark - serially. We have 2746 // to pass true for the is_serial parameter so that 2747 // CMTask::do_marking_step() doesn't enter the sync 2748 // barriers in the event of an overflow. Doing so will 2749 // cause an assert that the current thread is not a 2750 // concurrent GC thread. 2751 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/); 2752 remarkTask.work(0); 2753 } 2754 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2755 guarantee(has_overflown() || 2756 satb_mq_set.completed_buffers_num() == 0, 2757 err_msg("Invariant: has_overflown = %s, num buffers = %d", 2758 BOOL_TO_STR(has_overflown()), 2759 satb_mq_set.completed_buffers_num())); 2760 2761 print_stats(); 2762 } 2763 2764 #ifndef PRODUCT 2765 2766 class PrintReachableOopClosure: public OopClosure { 2767 private: 2768 G1CollectedHeap* _g1h; 2769 outputStream* _out; 2770 VerifyOption _vo; 2771 bool _all; 2772 2773 public: 2774 PrintReachableOopClosure(outputStream* out, 2775 VerifyOption vo, 2776 bool all) : 2777 _g1h(G1CollectedHeap::heap()), 2778 _out(out), _vo(vo), _all(all) { } 2779 2780 void do_oop(narrowOop* p) { do_oop_work(p); } 2781 void do_oop( oop* p) { do_oop_work(p); } 2782 2783 template <class T> void do_oop_work(T* p) { 2784 oop obj = oopDesc::load_decode_heap_oop(p); 2785 const char* str = NULL; 2786 const char* str2 = ""; 2787 2788 if (obj == NULL) { 2789 str = ""; 2790 } else if (!_g1h->is_in_g1_reserved(obj)) { 2791 str = " O"; 2792 } else { 2793 HeapRegion* hr = _g1h->heap_region_containing(obj); 2794 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2795 bool marked = _g1h->is_marked(obj, _vo); 2796 2797 if (over_tams) { 2798 str = " >"; 2799 if (marked) { 2800 str2 = " AND MARKED"; 2801 } 2802 } else if (marked) { 2803 str = " M"; 2804 } else { 2805 str = " NOT"; 2806 } 2807 } 2808 2809 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2810 p2i(p), p2i((void*) obj), str, str2); 2811 } 2812 }; 2813 2814 class PrintReachableObjectClosure : public ObjectClosure { 2815 private: 2816 G1CollectedHeap* _g1h; 2817 outputStream* _out; 2818 VerifyOption _vo; 2819 bool _all; 2820 HeapRegion* _hr; 2821 2822 public: 2823 PrintReachableObjectClosure(outputStream* out, 2824 VerifyOption vo, 2825 bool all, 2826 HeapRegion* hr) : 2827 _g1h(G1CollectedHeap::heap()), 2828 _out(out), _vo(vo), _all(all), _hr(hr) { } 2829 2830 void do_object(oop o) { 2831 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2832 bool marked = _g1h->is_marked(o, _vo); 2833 bool print_it = _all || over_tams || marked; 2834 2835 if (print_it) { 2836 _out->print_cr(" "PTR_FORMAT"%s", 2837 p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : ""); 2838 PrintReachableOopClosure oopCl(_out, _vo, _all); 2839 o->oop_iterate_no_header(&oopCl); 2840 } 2841 } 2842 }; 2843 2844 class PrintReachableRegionClosure : public HeapRegionClosure { 2845 private: 2846 G1CollectedHeap* _g1h; 2847 outputStream* _out; 2848 VerifyOption _vo; 2849 bool _all; 2850 2851 public: 2852 bool doHeapRegion(HeapRegion* hr) { 2853 HeapWord* b = hr->bottom(); 2854 HeapWord* e = hr->end(); 2855 HeapWord* t = hr->top(); 2856 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2857 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2858 "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p)); 2859 _out->cr(); 2860 2861 HeapWord* from = b; 2862 HeapWord* to = t; 2863 2864 if (to > from) { 2865 _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to)); 2866 _out->cr(); 2867 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2868 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2869 _out->cr(); 2870 } 2871 2872 return false; 2873 } 2874 2875 PrintReachableRegionClosure(outputStream* out, 2876 VerifyOption vo, 2877 bool all) : 2878 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2879 }; 2880 2881 void ConcurrentMark::print_reachable(const char* str, 2882 VerifyOption vo, 2883 bool all) { 2884 gclog_or_tty->cr(); 2885 gclog_or_tty->print_cr("== Doing heap dump... "); 2886 2887 if (G1PrintReachableBaseFile == NULL) { 2888 gclog_or_tty->print_cr(" #### error: no base file defined"); 2889 return; 2890 } 2891 2892 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2893 (JVM_MAXPATHLEN - 1)) { 2894 gclog_or_tty->print_cr(" #### error: file name too long"); 2895 return; 2896 } 2897 2898 char file_name[JVM_MAXPATHLEN]; 2899 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2900 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2901 2902 fileStream fout(file_name); 2903 if (!fout.is_open()) { 2904 gclog_or_tty->print_cr(" #### error: could not open file"); 2905 return; 2906 } 2907 2908 outputStream* out = &fout; 2909 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2910 out->cr(); 2911 2912 out->print_cr("--- ITERATING OVER REGIONS"); 2913 out->cr(); 2914 PrintReachableRegionClosure rcl(out, vo, all); 2915 _g1h->heap_region_iterate(&rcl); 2916 out->cr(); 2917 2918 gclog_or_tty->print_cr(" done"); 2919 gclog_or_tty->flush(); 2920 } 2921 2922 #endif // PRODUCT 2923 2924 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2925 // Note we are overriding the read-only view of the prev map here, via 2926 // the cast. 2927 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2928 } 2929 2930 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2931 _nextMarkBitMap->clearRange(mr); 2932 } 2933 2934 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2935 clearRangePrevBitmap(mr); 2936 clearRangeNextBitmap(mr); 2937 } 2938 2939 HeapRegion* 2940 ConcurrentMark::claim_region(uint worker_id) { 2941 // "checkpoint" the finger 2942 HeapWord* finger = _finger; 2943 2944 // _heap_end will not change underneath our feet; it only changes at 2945 // yield points. 2946 while (finger < _heap_end) { 2947 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2948 2949 // Note on how this code handles humongous regions. In the 2950 // normal case the finger will reach the start of a "starts 2951 // humongous" (SH) region. Its end will either be the end of the 2952 // last "continues humongous" (CH) region in the sequence, or the 2953 // standard end of the SH region (if the SH is the only region in 2954 // the sequence). That way claim_region() will skip over the CH 2955 // regions. However, there is a subtle race between a CM thread 2956 // executing this method and a mutator thread doing a humongous 2957 // object allocation. The two are not mutually exclusive as the CM 2958 // thread does not need to hold the Heap_lock when it gets 2959 // here. So there is a chance that claim_region() will come across 2960 // a free region that's in the progress of becoming a SH or a CH 2961 // region. In the former case, it will either 2962 // a) Miss the update to the region's end, in which case it will 2963 // visit every subsequent CH region, will find their bitmaps 2964 // empty, and do nothing, or 2965 // b) Will observe the update of the region's end (in which case 2966 // it will skip the subsequent CH regions). 2967 // If it comes across a region that suddenly becomes CH, the 2968 // scenario will be similar to b). So, the race between 2969 // claim_region() and a humongous object allocation might force us 2970 // to do a bit of unnecessary work (due to some unnecessary bitmap 2971 // iterations) but it should not introduce and correctness issues. 2972 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2973 HeapWord* bottom = curr_region->bottom(); 2974 HeapWord* end = curr_region->end(); 2975 HeapWord* limit = curr_region->next_top_at_mark_start(); 2976 2977 if (verbose_low()) { 2978 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" " 2979 "["PTR_FORMAT", "PTR_FORMAT"), " 2980 "limit = "PTR_FORMAT, 2981 worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit)); 2982 } 2983 2984 // Is the gap between reading the finger and doing the CAS too long? 2985 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2986 if (res == finger) { 2987 // we succeeded 2988 2989 // notice that _finger == end cannot be guaranteed here since, 2990 // someone else might have moved the finger even further 2991 assert(_finger >= end, "the finger should have moved forward"); 2992 2993 if (verbose_low()) { 2994 gclog_or_tty->print_cr("[%u] we were successful with region = " 2995 PTR_FORMAT, worker_id, p2i(curr_region)); 2996 } 2997 2998 if (limit > bottom) { 2999 if (verbose_low()) { 3000 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, " 3001 "returning it ", worker_id, p2i(curr_region)); 3002 } 3003 return curr_region; 3004 } else { 3005 assert(limit == bottom, 3006 "the region limit should be at bottom"); 3007 if (verbose_low()) { 3008 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, " 3009 "returning NULL", worker_id, p2i(curr_region)); 3010 } 3011 // we return NULL and the caller should try calling 3012 // claim_region() again. 3013 return NULL; 3014 } 3015 } else { 3016 assert(_finger > finger, "the finger should have moved forward"); 3017 if (verbose_low()) { 3018 gclog_or_tty->print_cr("[%u] somebody else moved the finger, " 3019 "global finger = "PTR_FORMAT", " 3020 "our finger = "PTR_FORMAT, 3021 worker_id, p2i(_finger), p2i(finger)); 3022 } 3023 3024 // read it again 3025 finger = _finger; 3026 } 3027 } 3028 3029 return NULL; 3030 } 3031 3032 #ifndef PRODUCT 3033 enum VerifyNoCSetOopsPhase { 3034 VerifyNoCSetOopsStack, 3035 VerifyNoCSetOopsQueues, 3036 VerifyNoCSetOopsSATBCompleted, 3037 VerifyNoCSetOopsSATBThread 3038 }; 3039 3040 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 3041 private: 3042 G1CollectedHeap* _g1h; 3043 VerifyNoCSetOopsPhase _phase; 3044 int _info; 3045 3046 const char* phase_str() { 3047 switch (_phase) { 3048 case VerifyNoCSetOopsStack: return "Stack"; 3049 case VerifyNoCSetOopsQueues: return "Queue"; 3050 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 3051 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 3052 default: ShouldNotReachHere(); 3053 } 3054 return NULL; 3055 } 3056 3057 void do_object_work(oop obj) { 3058 guarantee(!_g1h->obj_in_cs(obj), 3059 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 3060 p2i((void*) obj), phase_str(), _info)); 3061 } 3062 3063 public: 3064 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 3065 3066 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 3067 _phase = phase; 3068 _info = info; 3069 } 3070 3071 virtual void do_oop(oop* p) { 3072 oop obj = oopDesc::load_decode_heap_oop(p); 3073 do_object_work(obj); 3074 } 3075 3076 virtual void do_oop(narrowOop* p) { 3077 // We should not come across narrow oops while scanning marking 3078 // stacks and SATB buffers. 3079 ShouldNotReachHere(); 3080 } 3081 3082 virtual void do_object(oop obj) { 3083 do_object_work(obj); 3084 } 3085 }; 3086 3087 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 3088 bool verify_enqueued_buffers, 3089 bool verify_thread_buffers, 3090 bool verify_fingers) { 3091 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 3092 if (!G1CollectedHeap::heap()->mark_in_progress()) { 3093 return; 3094 } 3095 3096 VerifyNoCSetOopsClosure cl; 3097 3098 if (verify_stacks) { 3099 // Verify entries on the global mark stack 3100 cl.set_phase(VerifyNoCSetOopsStack); 3101 _markStack.oops_do(&cl); 3102 3103 // Verify entries on the task queues 3104 for (uint i = 0; i < _max_worker_id; i += 1) { 3105 cl.set_phase(VerifyNoCSetOopsQueues, i); 3106 CMTaskQueue* queue = _task_queues->queue(i); 3107 queue->oops_do(&cl); 3108 } 3109 } 3110 3111 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 3112 3113 // Verify entries on the enqueued SATB buffers 3114 if (verify_enqueued_buffers) { 3115 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 3116 satb_qs.iterate_completed_buffers_read_only(&cl); 3117 } 3118 3119 // Verify entries on the per-thread SATB buffers 3120 if (verify_thread_buffers) { 3121 cl.set_phase(VerifyNoCSetOopsSATBThread); 3122 satb_qs.iterate_thread_buffers_read_only(&cl); 3123 } 3124 3125 if (verify_fingers) { 3126 // Verify the global finger 3127 HeapWord* global_finger = finger(); 3128 if (global_finger != NULL && global_finger < _heap_end) { 3129 // The global finger always points to a heap region boundary. We 3130 // use heap_region_containing_raw() to get the containing region 3131 // given that the global finger could be pointing to a free region 3132 // which subsequently becomes continues humongous. If that 3133 // happens, heap_region_containing() will return the bottom of the 3134 // corresponding starts humongous region and the check below will 3135 // not hold any more. 3136 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 3137 guarantee(global_finger == global_hr->bottom(), 3138 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 3139 p2i(global_finger), HR_FORMAT_PARAMS(global_hr))); 3140 } 3141 3142 // Verify the task fingers 3143 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 3144 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 3145 CMTask* task = _tasks[i]; 3146 HeapWord* task_finger = task->finger(); 3147 if (task_finger != NULL && task_finger < _heap_end) { 3148 // See above note on the global finger verification. 3149 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 3150 guarantee(task_finger == task_hr->bottom() || 3151 !task_hr->in_collection_set(), 3152 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 3153 p2i(task_finger), HR_FORMAT_PARAMS(task_hr))); 3154 } 3155 } 3156 } 3157 } 3158 #endif // PRODUCT 3159 3160 // Aggregate the counting data that was constructed concurrently 3161 // with marking. 3162 class AggregateCountDataHRClosure: public HeapRegionClosure { 3163 G1CollectedHeap* _g1h; 3164 ConcurrentMark* _cm; 3165 CardTableModRefBS* _ct_bs; 3166 BitMap* _cm_card_bm; 3167 uint _max_worker_id; 3168 3169 public: 3170 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 3171 BitMap* cm_card_bm, 3172 uint max_worker_id) : 3173 _g1h(g1h), _cm(g1h->concurrent_mark()), 3174 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 3175 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 3176 3177 bool doHeapRegion(HeapRegion* hr) { 3178 if (hr->continuesHumongous()) { 3179 // We will ignore these here and process them when their 3180 // associated "starts humongous" region is processed. 3181 // Note that we cannot rely on their associated 3182 // "starts humongous" region to have their bit set to 1 3183 // since, due to the region chunking in the parallel region 3184 // iteration, a "continues humongous" region might be visited 3185 // before its associated "starts humongous". 3186 return false; 3187 } 3188 3189 HeapWord* start = hr->bottom(); 3190 HeapWord* limit = hr->next_top_at_mark_start(); 3191 HeapWord* end = hr->end(); 3192 3193 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 3194 err_msg("Preconditions not met - " 3195 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 3196 "top: "PTR_FORMAT", end: "PTR_FORMAT, 3197 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()))); 3198 3199 assert(hr->next_marked_bytes() == 0, "Precondition"); 3200 3201 if (start == limit) { 3202 // NTAMS of this region has not been set so nothing to do. 3203 return false; 3204 } 3205 3206 // 'start' should be in the heap. 3207 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 3208 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 3209 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 3210 3211 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 3212 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 3213 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 3214 3215 // If ntams is not card aligned then we bump card bitmap index 3216 // for limit so that we get the all the cards spanned by 3217 // the object ending at ntams. 3218 // Note: if this is the last region in the heap then ntams 3219 // could be actually just beyond the end of the the heap; 3220 // limit_idx will then correspond to a (non-existent) card 3221 // that is also outside the heap. 3222 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 3223 limit_idx += 1; 3224 } 3225 3226 assert(limit_idx <= end_idx, "or else use atomics"); 3227 3228 // Aggregate the "stripe" in the count data associated with hr. 3229 uint hrs_index = hr->hrs_index(); 3230 size_t marked_bytes = 0; 3231 3232 for (uint i = 0; i < _max_worker_id; i += 1) { 3233 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 3234 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 3235 3236 // Fetch the marked_bytes in this region for task i and 3237 // add it to the running total for this region. 3238 marked_bytes += marked_bytes_array[hrs_index]; 3239 3240 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 3241 // into the global card bitmap. 3242 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 3243 3244 while (scan_idx < limit_idx) { 3245 assert(task_card_bm->at(scan_idx) == true, "should be"); 3246 _cm_card_bm->set_bit(scan_idx); 3247 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 3248 3249 // BitMap::get_next_one_offset() can handle the case when 3250 // its left_offset parameter is greater than its right_offset 3251 // parameter. It does, however, have an early exit if 3252 // left_offset == right_offset. So let's limit the value 3253 // passed in for left offset here. 3254 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 3255 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 3256 } 3257 } 3258 3259 // Update the marked bytes for this region. 3260 hr->add_to_marked_bytes(marked_bytes); 3261 3262 // Next heap region 3263 return false; 3264 } 3265 }; 3266 3267 class G1AggregateCountDataTask: public AbstractGangTask { 3268 protected: 3269 G1CollectedHeap* _g1h; 3270 ConcurrentMark* _cm; 3271 BitMap* _cm_card_bm; 3272 uint _max_worker_id; 3273 int _active_workers; 3274 3275 public: 3276 G1AggregateCountDataTask(G1CollectedHeap* g1h, 3277 ConcurrentMark* cm, 3278 BitMap* cm_card_bm, 3279 uint max_worker_id, 3280 int n_workers) : 3281 AbstractGangTask("Count Aggregation"), 3282 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 3283 _max_worker_id(max_worker_id), 3284 _active_workers(n_workers) { } 3285 3286 void work(uint worker_id) { 3287 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 3288 3289 if (G1CollectedHeap::use_parallel_gc_threads()) { 3290 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 3291 _active_workers, 3292 HeapRegion::AggregateCountClaimValue); 3293 } else { 3294 _g1h->heap_region_iterate(&cl); 3295 } 3296 } 3297 }; 3298 3299 3300 void ConcurrentMark::aggregate_count_data() { 3301 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3302 _g1h->workers()->active_workers() : 3303 1); 3304 3305 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3306 _max_worker_id, n_workers); 3307 3308 if (G1CollectedHeap::use_parallel_gc_threads()) { 3309 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3310 "sanity check"); 3311 _g1h->set_par_threads(n_workers); 3312 _g1h->workers()->run_task(&g1_par_agg_task); 3313 _g1h->set_par_threads(0); 3314 3315 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3316 "sanity check"); 3317 _g1h->reset_heap_region_claim_values(); 3318 } else { 3319 g1_par_agg_task.work(0); 3320 } 3321 } 3322 3323 // Clear the per-worker arrays used to store the per-region counting data 3324 void ConcurrentMark::clear_all_count_data() { 3325 // Clear the global card bitmap - it will be filled during 3326 // liveness count aggregation (during remark) and the 3327 // final counting task. 3328 _card_bm.clear(); 3329 3330 // Clear the global region bitmap - it will be filled as part 3331 // of the final counting task. 3332 _region_bm.clear(); 3333 3334 uint max_regions = _g1h->max_regions(); 3335 assert(_max_worker_id > 0, "uninitialized"); 3336 3337 for (uint i = 0; i < _max_worker_id; i += 1) { 3338 BitMap* task_card_bm = count_card_bitmap_for(i); 3339 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3340 3341 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3342 assert(marked_bytes_array != NULL, "uninitialized"); 3343 3344 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3345 task_card_bm->clear(); 3346 } 3347 } 3348 3349 void ConcurrentMark::print_stats() { 3350 if (verbose_stats()) { 3351 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3352 for (size_t i = 0; i < _active_tasks; ++i) { 3353 _tasks[i]->print_stats(); 3354 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3355 } 3356 } 3357 } 3358 3359 // abandon current marking iteration due to a Full GC 3360 void ConcurrentMark::abort() { 3361 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 3362 // concurrent bitmap clearing. 3363 _nextMarkBitMap->clearAll(); 3364 3365 // Note we cannot clear the previous marking bitmap here 3366 // since VerifyDuringGC verifies the objects marked during 3367 // a full GC against the previous bitmap. 3368 3369 // Clear the liveness counting data 3370 clear_all_count_data(); 3371 // Empty mark stack 3372 reset_marking_state(); 3373 for (uint i = 0; i < _max_worker_id; ++i) { 3374 _tasks[i]->clear_region_fields(); 3375 } 3376 _first_overflow_barrier_sync.abort(); 3377 _second_overflow_barrier_sync.abort(); 3378 const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id(); 3379 if (!gc_id.is_undefined()) { 3380 // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance 3381 // to detect that it was aborted. Only keep track of the first GC id that we aborted. 3382 _aborted_gc_id = gc_id; 3383 } 3384 _has_aborted = true; 3385 3386 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3387 satb_mq_set.abandon_partial_marking(); 3388 // This can be called either during or outside marking, we'll read 3389 // the expected_active value from the SATB queue set. 3390 satb_mq_set.set_active_all_threads( 3391 false, /* new active value */ 3392 satb_mq_set.is_active() /* expected_active */); 3393 3394 _g1h->trace_heap_after_concurrent_cycle(); 3395 _g1h->register_concurrent_cycle_end(); 3396 } 3397 3398 const GCId& ConcurrentMark::concurrent_gc_id() { 3399 if (has_aborted()) { 3400 return _aborted_gc_id; 3401 } 3402 return _g1h->gc_tracer_cm()->gc_id(); 3403 } 3404 3405 static void print_ms_time_info(const char* prefix, const char* name, 3406 NumberSeq& ns) { 3407 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3408 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3409 if (ns.num() > 0) { 3410 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3411 prefix, ns.sd(), ns.maximum()); 3412 } 3413 } 3414 3415 void ConcurrentMark::print_summary_info() { 3416 gclog_or_tty->print_cr(" Concurrent marking:"); 3417 print_ms_time_info(" ", "init marks", _init_times); 3418 print_ms_time_info(" ", "remarks", _remark_times); 3419 { 3420 print_ms_time_info(" ", "final marks", _remark_mark_times); 3421 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3422 3423 } 3424 print_ms_time_info(" ", "cleanups", _cleanup_times); 3425 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3426 _total_counting_time, 3427 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3428 (double)_cleanup_times.num() 3429 : 0.0)); 3430 if (G1ScrubRemSets) { 3431 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3432 _total_rs_scrub_time, 3433 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3434 (double)_cleanup_times.num() 3435 : 0.0)); 3436 } 3437 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3438 (_init_times.sum() + _remark_times.sum() + 3439 _cleanup_times.sum())/1000.0); 3440 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3441 "(%8.2f s marking).", 3442 cmThread()->vtime_accum(), 3443 cmThread()->vtime_mark_accum()); 3444 } 3445 3446 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3447 if (use_parallel_marking_threads()) { 3448 _parallel_workers->print_worker_threads_on(st); 3449 } 3450 } 3451 3452 void ConcurrentMark::print_on_error(outputStream* st) const { 3453 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 3454 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 3455 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 3456 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 3457 } 3458 3459 // We take a break if someone is trying to stop the world. 3460 bool ConcurrentMark::do_yield_check(uint worker_id) { 3461 if (SuspendibleThreadSet::should_yield()) { 3462 if (worker_id == 0) { 3463 _g1h->g1_policy()->record_concurrent_pause(); 3464 } 3465 SuspendibleThreadSet::yield(); 3466 return true; 3467 } else { 3468 return false; 3469 } 3470 } 3471 3472 bool ConcurrentMark::containing_card_is_marked(void* p) { 3473 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3474 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3475 } 3476 3477 bool ConcurrentMark::containing_cards_are_marked(void* start, 3478 void* last) { 3479 return containing_card_is_marked(start) && 3480 containing_card_is_marked(last); 3481 } 3482 3483 #ifndef PRODUCT 3484 // for debugging purposes 3485 void ConcurrentMark::print_finger() { 3486 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3487 p2i(_heap_start), p2i(_heap_end), p2i(_finger)); 3488 for (uint i = 0; i < _max_worker_id; ++i) { 3489 gclog_or_tty->print(" %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger())); 3490 } 3491 gclog_or_tty->cr(); 3492 } 3493 #endif 3494 3495 void CMTask::scan_object(oop obj) { 3496 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3497 3498 if (_cm->verbose_high()) { 3499 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT, 3500 _worker_id, p2i((void*) obj)); 3501 } 3502 3503 size_t obj_size = obj->size(); 3504 _words_scanned += obj_size; 3505 3506 obj->oop_iterate(_cm_oop_closure); 3507 statsOnly( ++_objs_scanned ); 3508 check_limits(); 3509 } 3510 3511 // Closure for iteration over bitmaps 3512 class CMBitMapClosure : public BitMapClosure { 3513 private: 3514 // the bitmap that is being iterated over 3515 CMBitMap* _nextMarkBitMap; 3516 ConcurrentMark* _cm; 3517 CMTask* _task; 3518 3519 public: 3520 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3521 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3522 3523 bool do_bit(size_t offset) { 3524 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3525 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3526 assert( addr < _cm->finger(), "invariant"); 3527 3528 statsOnly( _task->increase_objs_found_on_bitmap() ); 3529 assert(addr >= _task->finger(), "invariant"); 3530 3531 // We move that task's local finger along. 3532 _task->move_finger_to(addr); 3533 3534 _task->scan_object(oop(addr)); 3535 // we only partially drain the local queue and global stack 3536 _task->drain_local_queue(true); 3537 _task->drain_global_stack(true); 3538 3539 // if the has_aborted flag has been raised, we need to bail out of 3540 // the iteration 3541 return !_task->has_aborted(); 3542 } 3543 }; 3544 3545 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3546 ConcurrentMark* cm, 3547 CMTask* task) 3548 : _g1h(g1h), _cm(cm), _task(task) { 3549 assert(_ref_processor == NULL, "should be initialized to NULL"); 3550 3551 if (G1UseConcMarkReferenceProcessing) { 3552 _ref_processor = g1h->ref_processor_cm(); 3553 assert(_ref_processor != NULL, "should not be NULL"); 3554 } 3555 } 3556 3557 void CMTask::setup_for_region(HeapRegion* hr) { 3558 assert(hr != NULL, 3559 "claim_region() should have filtered out NULL regions"); 3560 assert(!hr->continuesHumongous(), 3561 "claim_region() should have filtered out continues humongous regions"); 3562 3563 if (_cm->verbose_low()) { 3564 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT, 3565 _worker_id, p2i(hr)); 3566 } 3567 3568 _curr_region = hr; 3569 _finger = hr->bottom(); 3570 update_region_limit(); 3571 } 3572 3573 void CMTask::update_region_limit() { 3574 HeapRegion* hr = _curr_region; 3575 HeapWord* bottom = hr->bottom(); 3576 HeapWord* limit = hr->next_top_at_mark_start(); 3577 3578 if (limit == bottom) { 3579 if (_cm->verbose_low()) { 3580 gclog_or_tty->print_cr("[%u] found an empty region " 3581 "["PTR_FORMAT", "PTR_FORMAT")", 3582 _worker_id, p2i(bottom), p2i(limit)); 3583 } 3584 // The region was collected underneath our feet. 3585 // We set the finger to bottom to ensure that the bitmap 3586 // iteration that will follow this will not do anything. 3587 // (this is not a condition that holds when we set the region up, 3588 // as the region is not supposed to be empty in the first place) 3589 _finger = bottom; 3590 } else if (limit >= _region_limit) { 3591 assert(limit >= _finger, "peace of mind"); 3592 } else { 3593 assert(limit < _region_limit, "only way to get here"); 3594 // This can happen under some pretty unusual circumstances. An 3595 // evacuation pause empties the region underneath our feet (NTAMS 3596 // at bottom). We then do some allocation in the region (NTAMS 3597 // stays at bottom), followed by the region being used as a GC 3598 // alloc region (NTAMS will move to top() and the objects 3599 // originally below it will be grayed). All objects now marked in 3600 // the region are explicitly grayed, if below the global finger, 3601 // and we do not need in fact to scan anything else. So, we simply 3602 // set _finger to be limit to ensure that the bitmap iteration 3603 // doesn't do anything. 3604 _finger = limit; 3605 } 3606 3607 _region_limit = limit; 3608 } 3609 3610 void CMTask::giveup_current_region() { 3611 assert(_curr_region != NULL, "invariant"); 3612 if (_cm->verbose_low()) { 3613 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT, 3614 _worker_id, p2i(_curr_region)); 3615 } 3616 clear_region_fields(); 3617 } 3618 3619 void CMTask::clear_region_fields() { 3620 // Values for these three fields that indicate that we're not 3621 // holding on to a region. 3622 _curr_region = NULL; 3623 _finger = NULL; 3624 _region_limit = NULL; 3625 } 3626 3627 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3628 if (cm_oop_closure == NULL) { 3629 assert(_cm_oop_closure != NULL, "invariant"); 3630 } else { 3631 assert(_cm_oop_closure == NULL, "invariant"); 3632 } 3633 _cm_oop_closure = cm_oop_closure; 3634 } 3635 3636 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3637 guarantee(nextMarkBitMap != NULL, "invariant"); 3638 3639 if (_cm->verbose_low()) { 3640 gclog_or_tty->print_cr("[%u] resetting", _worker_id); 3641 } 3642 3643 _nextMarkBitMap = nextMarkBitMap; 3644 clear_region_fields(); 3645 3646 _calls = 0; 3647 _elapsed_time_ms = 0.0; 3648 _termination_time_ms = 0.0; 3649 _termination_start_time_ms = 0.0; 3650 3651 #if _MARKING_STATS_ 3652 _local_pushes = 0; 3653 _local_pops = 0; 3654 _local_max_size = 0; 3655 _objs_scanned = 0; 3656 _global_pushes = 0; 3657 _global_pops = 0; 3658 _global_max_size = 0; 3659 _global_transfers_to = 0; 3660 _global_transfers_from = 0; 3661 _regions_claimed = 0; 3662 _objs_found_on_bitmap = 0; 3663 _satb_buffers_processed = 0; 3664 _steal_attempts = 0; 3665 _steals = 0; 3666 _aborted = 0; 3667 _aborted_overflow = 0; 3668 _aborted_cm_aborted = 0; 3669 _aborted_yield = 0; 3670 _aborted_timed_out = 0; 3671 _aborted_satb = 0; 3672 _aborted_termination = 0; 3673 #endif // _MARKING_STATS_ 3674 } 3675 3676 bool CMTask::should_exit_termination() { 3677 regular_clock_call(); 3678 // This is called when we are in the termination protocol. We should 3679 // quit if, for some reason, this task wants to abort or the global 3680 // stack is not empty (this means that we can get work from it). 3681 return !_cm->mark_stack_empty() || has_aborted(); 3682 } 3683 3684 void CMTask::reached_limit() { 3685 assert(_words_scanned >= _words_scanned_limit || 3686 _refs_reached >= _refs_reached_limit , 3687 "shouldn't have been called otherwise"); 3688 regular_clock_call(); 3689 } 3690 3691 void CMTask::regular_clock_call() { 3692 if (has_aborted()) return; 3693 3694 // First, we need to recalculate the words scanned and refs reached 3695 // limits for the next clock call. 3696 recalculate_limits(); 3697 3698 // During the regular clock call we do the following 3699 3700 // (1) If an overflow has been flagged, then we abort. 3701 if (_cm->has_overflown()) { 3702 set_has_aborted(); 3703 return; 3704 } 3705 3706 // If we are not concurrent (i.e. we're doing remark) we don't need 3707 // to check anything else. The other steps are only needed during 3708 // the concurrent marking phase. 3709 if (!concurrent()) return; 3710 3711 // (2) If marking has been aborted for Full GC, then we also abort. 3712 if (_cm->has_aborted()) { 3713 set_has_aborted(); 3714 statsOnly( ++_aborted_cm_aborted ); 3715 return; 3716 } 3717 3718 double curr_time_ms = os::elapsedVTime() * 1000.0; 3719 3720 // (3) If marking stats are enabled, then we update the step history. 3721 #if _MARKING_STATS_ 3722 if (_words_scanned >= _words_scanned_limit) { 3723 ++_clock_due_to_scanning; 3724 } 3725 if (_refs_reached >= _refs_reached_limit) { 3726 ++_clock_due_to_marking; 3727 } 3728 3729 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3730 _interval_start_time_ms = curr_time_ms; 3731 _all_clock_intervals_ms.add(last_interval_ms); 3732 3733 if (_cm->verbose_medium()) { 3734 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, " 3735 "scanned = %d%s, refs reached = %d%s", 3736 _worker_id, last_interval_ms, 3737 _words_scanned, 3738 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3739 _refs_reached, 3740 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3741 } 3742 #endif // _MARKING_STATS_ 3743 3744 // (4) We check whether we should yield. If we have to, then we abort. 3745 if (SuspendibleThreadSet::should_yield()) { 3746 // We should yield. To do this we abort the task. The caller is 3747 // responsible for yielding. 3748 set_has_aborted(); 3749 statsOnly( ++_aborted_yield ); 3750 return; 3751 } 3752 3753 // (5) We check whether we've reached our time quota. If we have, 3754 // then we abort. 3755 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3756 if (elapsed_time_ms > _time_target_ms) { 3757 set_has_aborted(); 3758 _has_timed_out = true; 3759 statsOnly( ++_aborted_timed_out ); 3760 return; 3761 } 3762 3763 // (6) Finally, we check whether there are enough completed STAB 3764 // buffers available for processing. If there are, we abort. 3765 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3766 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3767 if (_cm->verbose_low()) { 3768 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers", 3769 _worker_id); 3770 } 3771 // we do need to process SATB buffers, we'll abort and restart 3772 // the marking task to do so 3773 set_has_aborted(); 3774 statsOnly( ++_aborted_satb ); 3775 return; 3776 } 3777 } 3778 3779 void CMTask::recalculate_limits() { 3780 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3781 _words_scanned_limit = _real_words_scanned_limit; 3782 3783 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3784 _refs_reached_limit = _real_refs_reached_limit; 3785 } 3786 3787 void CMTask::decrease_limits() { 3788 // This is called when we believe that we're going to do an infrequent 3789 // operation which will increase the per byte scanned cost (i.e. move 3790 // entries to/from the global stack). It basically tries to decrease the 3791 // scanning limit so that the clock is called earlier. 3792 3793 if (_cm->verbose_medium()) { 3794 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id); 3795 } 3796 3797 _words_scanned_limit = _real_words_scanned_limit - 3798 3 * words_scanned_period / 4; 3799 _refs_reached_limit = _real_refs_reached_limit - 3800 3 * refs_reached_period / 4; 3801 } 3802 3803 void CMTask::move_entries_to_global_stack() { 3804 // local array where we'll store the entries that will be popped 3805 // from the local queue 3806 oop buffer[global_stack_transfer_size]; 3807 3808 int n = 0; 3809 oop obj; 3810 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3811 buffer[n] = obj; 3812 ++n; 3813 } 3814 3815 if (n > 0) { 3816 // we popped at least one entry from the local queue 3817 3818 statsOnly( ++_global_transfers_to; _local_pops += n ); 3819 3820 if (!_cm->mark_stack_push(buffer, n)) { 3821 if (_cm->verbose_low()) { 3822 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow", 3823 _worker_id); 3824 } 3825 set_has_aborted(); 3826 } else { 3827 // the transfer was successful 3828 3829 if (_cm->verbose_medium()) { 3830 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack", 3831 _worker_id, n); 3832 } 3833 statsOnly( int tmp_size = _cm->mark_stack_size(); 3834 if (tmp_size > _global_max_size) { 3835 _global_max_size = tmp_size; 3836 } 3837 _global_pushes += n ); 3838 } 3839 } 3840 3841 // this operation was quite expensive, so decrease the limits 3842 decrease_limits(); 3843 } 3844 3845 void CMTask::get_entries_from_global_stack() { 3846 // local array where we'll store the entries that will be popped 3847 // from the global stack. 3848 oop buffer[global_stack_transfer_size]; 3849 int n; 3850 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3851 assert(n <= global_stack_transfer_size, 3852 "we should not pop more than the given limit"); 3853 if (n > 0) { 3854 // yes, we did actually pop at least one entry 3855 3856 statsOnly( ++_global_transfers_from; _global_pops += n ); 3857 if (_cm->verbose_medium()) { 3858 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack", 3859 _worker_id, n); 3860 } 3861 for (int i = 0; i < n; ++i) { 3862 bool success = _task_queue->push(buffer[i]); 3863 // We only call this when the local queue is empty or under a 3864 // given target limit. So, we do not expect this push to fail. 3865 assert(success, "invariant"); 3866 } 3867 3868 statsOnly( int tmp_size = _task_queue->size(); 3869 if (tmp_size > _local_max_size) { 3870 _local_max_size = tmp_size; 3871 } 3872 _local_pushes += n ); 3873 } 3874 3875 // this operation was quite expensive, so decrease the limits 3876 decrease_limits(); 3877 } 3878 3879 void CMTask::drain_local_queue(bool partially) { 3880 if (has_aborted()) return; 3881 3882 // Decide what the target size is, depending whether we're going to 3883 // drain it partially (so that other tasks can steal if they run out 3884 // of things to do) or totally (at the very end). 3885 size_t target_size; 3886 if (partially) { 3887 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3888 } else { 3889 target_size = 0; 3890 } 3891 3892 if (_task_queue->size() > target_size) { 3893 if (_cm->verbose_high()) { 3894 gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT, 3895 _worker_id, target_size); 3896 } 3897 3898 oop obj; 3899 bool ret = _task_queue->pop_local(obj); 3900 while (ret) { 3901 statsOnly( ++_local_pops ); 3902 3903 if (_cm->verbose_high()) { 3904 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id, 3905 p2i((void*) obj)); 3906 } 3907 3908 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3909 assert(!_g1h->is_on_master_free_list( 3910 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3911 3912 scan_object(obj); 3913 3914 if (_task_queue->size() <= target_size || has_aborted()) { 3915 ret = false; 3916 } else { 3917 ret = _task_queue->pop_local(obj); 3918 } 3919 } 3920 3921 if (_cm->verbose_high()) { 3922 gclog_or_tty->print_cr("[%u] drained local queue, size = %u", 3923 _worker_id, _task_queue->size()); 3924 } 3925 } 3926 } 3927 3928 void CMTask::drain_global_stack(bool partially) { 3929 if (has_aborted()) return; 3930 3931 // We have a policy to drain the local queue before we attempt to 3932 // drain the global stack. 3933 assert(partially || _task_queue->size() == 0, "invariant"); 3934 3935 // Decide what the target size is, depending whether we're going to 3936 // drain it partially (so that other tasks can steal if they run out 3937 // of things to do) or totally (at the very end). Notice that, 3938 // because we move entries from the global stack in chunks or 3939 // because another task might be doing the same, we might in fact 3940 // drop below the target. But, this is not a problem. 3941 size_t target_size; 3942 if (partially) { 3943 target_size = _cm->partial_mark_stack_size_target(); 3944 } else { 3945 target_size = 0; 3946 } 3947 3948 if (_cm->mark_stack_size() > target_size) { 3949 if (_cm->verbose_low()) { 3950 gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT, 3951 _worker_id, target_size); 3952 } 3953 3954 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3955 get_entries_from_global_stack(); 3956 drain_local_queue(partially); 3957 } 3958 3959 if (_cm->verbose_low()) { 3960 gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT, 3961 _worker_id, _cm->mark_stack_size()); 3962 } 3963 } 3964 } 3965 3966 // SATB Queue has several assumptions on whether to call the par or 3967 // non-par versions of the methods. this is why some of the code is 3968 // replicated. We should really get rid of the single-threaded version 3969 // of the code to simplify things. 3970 void CMTask::drain_satb_buffers() { 3971 if (has_aborted()) return; 3972 3973 // We set this so that the regular clock knows that we're in the 3974 // middle of draining buffers and doesn't set the abort flag when it 3975 // notices that SATB buffers are available for draining. It'd be 3976 // very counter productive if it did that. :-) 3977 _draining_satb_buffers = true; 3978 3979 CMObjectClosure oc(this); 3980 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3981 if (G1CollectedHeap::use_parallel_gc_threads()) { 3982 satb_mq_set.set_par_closure(_worker_id, &oc); 3983 } else { 3984 satb_mq_set.set_closure(&oc); 3985 } 3986 3987 // This keeps claiming and applying the closure to completed buffers 3988 // until we run out of buffers or we need to abort. 3989 if (G1CollectedHeap::use_parallel_gc_threads()) { 3990 while (!has_aborted() && 3991 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) { 3992 if (_cm->verbose_medium()) { 3993 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3994 } 3995 statsOnly( ++_satb_buffers_processed ); 3996 regular_clock_call(); 3997 } 3998 } else { 3999 while (!has_aborted() && 4000 satb_mq_set.apply_closure_to_completed_buffer()) { 4001 if (_cm->verbose_medium()) { 4002 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 4003 } 4004 statsOnly( ++_satb_buffers_processed ); 4005 regular_clock_call(); 4006 } 4007 } 4008 4009 _draining_satb_buffers = false; 4010 4011 assert(has_aborted() || 4012 concurrent() || 4013 satb_mq_set.completed_buffers_num() == 0, "invariant"); 4014 4015 if (G1CollectedHeap::use_parallel_gc_threads()) { 4016 satb_mq_set.set_par_closure(_worker_id, NULL); 4017 } else { 4018 satb_mq_set.set_closure(NULL); 4019 } 4020 4021 // again, this was a potentially expensive operation, decrease the 4022 // limits to get the regular clock call early 4023 decrease_limits(); 4024 } 4025 4026 void CMTask::print_stats() { 4027 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 4028 _worker_id, _calls); 4029 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 4030 _elapsed_time_ms, _termination_time_ms); 4031 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4032 _step_times_ms.num(), _step_times_ms.avg(), 4033 _step_times_ms.sd()); 4034 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4035 _step_times_ms.maximum(), _step_times_ms.sum()); 4036 4037 #if _MARKING_STATS_ 4038 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4039 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 4040 _all_clock_intervals_ms.sd()); 4041 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4042 _all_clock_intervals_ms.maximum(), 4043 _all_clock_intervals_ms.sum()); 4044 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 4045 _clock_due_to_scanning, _clock_due_to_marking); 4046 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 4047 _objs_scanned, _objs_found_on_bitmap); 4048 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 4049 _local_pushes, _local_pops, _local_max_size); 4050 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 4051 _global_pushes, _global_pops, _global_max_size); 4052 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 4053 _global_transfers_to,_global_transfers_from); 4054 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 4055 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 4056 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 4057 _steal_attempts, _steals); 4058 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 4059 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 4060 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 4061 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 4062 _aborted_timed_out, _aborted_satb, _aborted_termination); 4063 #endif // _MARKING_STATS_ 4064 } 4065 4066 /***************************************************************************** 4067 4068 The do_marking_step(time_target_ms, ...) method is the building 4069 block of the parallel marking framework. It can be called in parallel 4070 with other invocations of do_marking_step() on different tasks 4071 (but only one per task, obviously) and concurrently with the 4072 mutator threads, or during remark, hence it eliminates the need 4073 for two versions of the code. When called during remark, it will 4074 pick up from where the task left off during the concurrent marking 4075 phase. Interestingly, tasks are also claimable during evacuation 4076 pauses too, since do_marking_step() ensures that it aborts before 4077 it needs to yield. 4078 4079 The data structures that it uses to do marking work are the 4080 following: 4081 4082 (1) Marking Bitmap. If there are gray objects that appear only 4083 on the bitmap (this happens either when dealing with an overflow 4084 or when the initial marking phase has simply marked the roots 4085 and didn't push them on the stack), then tasks claim heap 4086 regions whose bitmap they then scan to find gray objects. A 4087 global finger indicates where the end of the last claimed region 4088 is. A local finger indicates how far into the region a task has 4089 scanned. The two fingers are used to determine how to gray an 4090 object (i.e. whether simply marking it is OK, as it will be 4091 visited by a task in the future, or whether it needs to be also 4092 pushed on a stack). 4093 4094 (2) Local Queue. The local queue of the task which is accessed 4095 reasonably efficiently by the task. Other tasks can steal from 4096 it when they run out of work. Throughout the marking phase, a 4097 task attempts to keep its local queue short but not totally 4098 empty, so that entries are available for stealing by other 4099 tasks. Only when there is no more work, a task will totally 4100 drain its local queue. 4101 4102 (3) Global Mark Stack. This handles local queue overflow. During 4103 marking only sets of entries are moved between it and the local 4104 queues, as access to it requires a mutex and more fine-grain 4105 interaction with it which might cause contention. If it 4106 overflows, then the marking phase should restart and iterate 4107 over the bitmap to identify gray objects. Throughout the marking 4108 phase, tasks attempt to keep the global mark stack at a small 4109 length but not totally empty, so that entries are available for 4110 popping by other tasks. Only when there is no more work, tasks 4111 will totally drain the global mark stack. 4112 4113 (4) SATB Buffer Queue. This is where completed SATB buffers are 4114 made available. Buffers are regularly removed from this queue 4115 and scanned for roots, so that the queue doesn't get too 4116 long. During remark, all completed buffers are processed, as 4117 well as the filled in parts of any uncompleted buffers. 4118 4119 The do_marking_step() method tries to abort when the time target 4120 has been reached. There are a few other cases when the 4121 do_marking_step() method also aborts: 4122 4123 (1) When the marking phase has been aborted (after a Full GC). 4124 4125 (2) When a global overflow (on the global stack) has been 4126 triggered. Before the task aborts, it will actually sync up with 4127 the other tasks to ensure that all the marking data structures 4128 (local queues, stacks, fingers etc.) are re-initialized so that 4129 when do_marking_step() completes, the marking phase can 4130 immediately restart. 4131 4132 (3) When enough completed SATB buffers are available. The 4133 do_marking_step() method only tries to drain SATB buffers right 4134 at the beginning. So, if enough buffers are available, the 4135 marking step aborts and the SATB buffers are processed at 4136 the beginning of the next invocation. 4137 4138 (4) To yield. when we have to yield then we abort and yield 4139 right at the end of do_marking_step(). This saves us from a lot 4140 of hassle as, by yielding we might allow a Full GC. If this 4141 happens then objects will be compacted underneath our feet, the 4142 heap might shrink, etc. We save checking for this by just 4143 aborting and doing the yield right at the end. 4144 4145 From the above it follows that the do_marking_step() method should 4146 be called in a loop (or, otherwise, regularly) until it completes. 4147 4148 If a marking step completes without its has_aborted() flag being 4149 true, it means it has completed the current marking phase (and 4150 also all other marking tasks have done so and have all synced up). 4151 4152 A method called regular_clock_call() is invoked "regularly" (in 4153 sub ms intervals) throughout marking. It is this clock method that 4154 checks all the abort conditions which were mentioned above and 4155 decides when the task should abort. A work-based scheme is used to 4156 trigger this clock method: when the number of object words the 4157 marking phase has scanned or the number of references the marking 4158 phase has visited reach a given limit. Additional invocations to 4159 the method clock have been planted in a few other strategic places 4160 too. The initial reason for the clock method was to avoid calling 4161 vtime too regularly, as it is quite expensive. So, once it was in 4162 place, it was natural to piggy-back all the other conditions on it 4163 too and not constantly check them throughout the code. 4164 4165 If do_termination is true then do_marking_step will enter its 4166 termination protocol. 4167 4168 The value of is_serial must be true when do_marking_step is being 4169 called serially (i.e. by the VMThread) and do_marking_step should 4170 skip any synchronization in the termination and overflow code. 4171 Examples include the serial remark code and the serial reference 4172 processing closures. 4173 4174 The value of is_serial must be false when do_marking_step is 4175 being called by any of the worker threads in a work gang. 4176 Examples include the concurrent marking code (CMMarkingTask), 4177 the MT remark code, and the MT reference processing closures. 4178 4179 *****************************************************************************/ 4180 4181 void CMTask::do_marking_step(double time_target_ms, 4182 bool do_termination, 4183 bool is_serial) { 4184 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 4185 assert(concurrent() == _cm->concurrent(), "they should be the same"); 4186 4187 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4188 assert(_task_queues != NULL, "invariant"); 4189 assert(_task_queue != NULL, "invariant"); 4190 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 4191 4192 assert(!_claimed, 4193 "only one thread should claim this task at any one time"); 4194 4195 // OK, this doesn't safeguard again all possible scenarios, as it is 4196 // possible for two threads to set the _claimed flag at the same 4197 // time. But it is only for debugging purposes anyway and it will 4198 // catch most problems. 4199 _claimed = true; 4200 4201 _start_time_ms = os::elapsedVTime() * 1000.0; 4202 statsOnly( _interval_start_time_ms = _start_time_ms ); 4203 4204 // If do_stealing is true then do_marking_step will attempt to 4205 // steal work from the other CMTasks. It only makes sense to 4206 // enable stealing when the termination protocol is enabled 4207 // and do_marking_step() is not being called serially. 4208 bool do_stealing = do_termination && !is_serial; 4209 4210 double diff_prediction_ms = 4211 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4212 _time_target_ms = time_target_ms - diff_prediction_ms; 4213 4214 // set up the variables that are used in the work-based scheme to 4215 // call the regular clock method 4216 _words_scanned = 0; 4217 _refs_reached = 0; 4218 recalculate_limits(); 4219 4220 // clear all flags 4221 clear_has_aborted(); 4222 _has_timed_out = false; 4223 _draining_satb_buffers = false; 4224 4225 ++_calls; 4226 4227 if (_cm->verbose_low()) { 4228 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, " 4229 "target = %1.2lfms >>>>>>>>>>", 4230 _worker_id, _calls, _time_target_ms); 4231 } 4232 4233 // Set up the bitmap and oop closures. Anything that uses them is 4234 // eventually called from this method, so it is OK to allocate these 4235 // statically. 4236 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4237 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 4238 set_cm_oop_closure(&cm_oop_closure); 4239 4240 if (_cm->has_overflown()) { 4241 // This can happen if the mark stack overflows during a GC pause 4242 // and this task, after a yield point, restarts. We have to abort 4243 // as we need to get into the overflow protocol which happens 4244 // right at the end of this task. 4245 set_has_aborted(); 4246 } 4247 4248 // First drain any available SATB buffers. After this, we will not 4249 // look at SATB buffers before the next invocation of this method. 4250 // If enough completed SATB buffers are queued up, the regular clock 4251 // will abort this task so that it restarts. 4252 drain_satb_buffers(); 4253 // ...then partially drain the local queue and the global stack 4254 drain_local_queue(true); 4255 drain_global_stack(true); 4256 4257 do { 4258 if (!has_aborted() && _curr_region != NULL) { 4259 // This means that we're already holding on to a region. 4260 assert(_finger != NULL, "if region is not NULL, then the finger " 4261 "should not be NULL either"); 4262 4263 // We might have restarted this task after an evacuation pause 4264 // which might have evacuated the region we're holding on to 4265 // underneath our feet. Let's read its limit again to make sure 4266 // that we do not iterate over a region of the heap that 4267 // contains garbage (update_region_limit() will also move 4268 // _finger to the start of the region if it is found empty). 4269 update_region_limit(); 4270 // We will start from _finger not from the start of the region, 4271 // as we might be restarting this task after aborting half-way 4272 // through scanning this region. In this case, _finger points to 4273 // the address where we last found a marked object. If this is a 4274 // fresh region, _finger points to start(). 4275 MemRegion mr = MemRegion(_finger, _region_limit); 4276 4277 if (_cm->verbose_low()) { 4278 gclog_or_tty->print_cr("[%u] we're scanning part " 4279 "["PTR_FORMAT", "PTR_FORMAT") " 4280 "of region "HR_FORMAT, 4281 _worker_id, p2i(_finger), p2i(_region_limit), 4282 HR_FORMAT_PARAMS(_curr_region)); 4283 } 4284 4285 assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(), 4286 "humongous regions should go around loop once only"); 4287 4288 // Some special cases: 4289 // If the memory region is empty, we can just give up the region. 4290 // If the current region is humongous then we only need to check 4291 // the bitmap for the bit associated with the start of the object, 4292 // scan the object if it's live, and give up the region. 4293 // Otherwise, let's iterate over the bitmap of the part of the region 4294 // that is left. 4295 // If the iteration is successful, give up the region. 4296 if (mr.is_empty()) { 4297 giveup_current_region(); 4298 regular_clock_call(); 4299 } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) { 4300 if (_nextMarkBitMap->isMarked(mr.start())) { 4301 // The object is marked - apply the closure 4302 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 4303 bitmap_closure.do_bit(offset); 4304 } 4305 // Even if this task aborted while scanning the humongous object 4306 // we can (and should) give up the current region. 4307 giveup_current_region(); 4308 regular_clock_call(); 4309 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4310 giveup_current_region(); 4311 regular_clock_call(); 4312 } else { 4313 assert(has_aborted(), "currently the only way to do so"); 4314 // The only way to abort the bitmap iteration is to return 4315 // false from the do_bit() method. However, inside the 4316 // do_bit() method we move the _finger to point to the 4317 // object currently being looked at. So, if we bail out, we 4318 // have definitely set _finger to something non-null. 4319 assert(_finger != NULL, "invariant"); 4320 4321 // Region iteration was actually aborted. So now _finger 4322 // points to the address of the object we last scanned. If we 4323 // leave it there, when we restart this task, we will rescan 4324 // the object. It is easy to avoid this. We move the finger by 4325 // enough to point to the next possible object header (the 4326 // bitmap knows by how much we need to move it as it knows its 4327 // granularity). 4328 assert(_finger < _region_limit, "invariant"); 4329 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 4330 // Check if bitmap iteration was aborted while scanning the last object 4331 if (new_finger >= _region_limit) { 4332 giveup_current_region(); 4333 } else { 4334 move_finger_to(new_finger); 4335 } 4336 } 4337 } 4338 // At this point we have either completed iterating over the 4339 // region we were holding on to, or we have aborted. 4340 4341 // We then partially drain the local queue and the global stack. 4342 // (Do we really need this?) 4343 drain_local_queue(true); 4344 drain_global_stack(true); 4345 4346 // Read the note on the claim_region() method on why it might 4347 // return NULL with potentially more regions available for 4348 // claiming and why we have to check out_of_regions() to determine 4349 // whether we're done or not. 4350 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4351 // We are going to try to claim a new region. We should have 4352 // given up on the previous one. 4353 // Separated the asserts so that we know which one fires. 4354 assert(_curr_region == NULL, "invariant"); 4355 assert(_finger == NULL, "invariant"); 4356 assert(_region_limit == NULL, "invariant"); 4357 if (_cm->verbose_low()) { 4358 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id); 4359 } 4360 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 4361 if (claimed_region != NULL) { 4362 // Yes, we managed to claim one 4363 statsOnly( ++_regions_claimed ); 4364 4365 if (_cm->verbose_low()) { 4366 gclog_or_tty->print_cr("[%u] we successfully claimed " 4367 "region "PTR_FORMAT, 4368 _worker_id, p2i(claimed_region)); 4369 } 4370 4371 setup_for_region(claimed_region); 4372 assert(_curr_region == claimed_region, "invariant"); 4373 } 4374 // It is important to call the regular clock here. It might take 4375 // a while to claim a region if, for example, we hit a large 4376 // block of empty regions. So we need to call the regular clock 4377 // method once round the loop to make sure it's called 4378 // frequently enough. 4379 regular_clock_call(); 4380 } 4381 4382 if (!has_aborted() && _curr_region == NULL) { 4383 assert(_cm->out_of_regions(), 4384 "at this point we should be out of regions"); 4385 } 4386 } while ( _curr_region != NULL && !has_aborted()); 4387 4388 if (!has_aborted()) { 4389 // We cannot check whether the global stack is empty, since other 4390 // tasks might be pushing objects to it concurrently. 4391 assert(_cm->out_of_regions(), 4392 "at this point we should be out of regions"); 4393 4394 if (_cm->verbose_low()) { 4395 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id); 4396 } 4397 4398 // Try to reduce the number of available SATB buffers so that 4399 // remark has less work to do. 4400 drain_satb_buffers(); 4401 } 4402 4403 // Since we've done everything else, we can now totally drain the 4404 // local queue and global stack. 4405 drain_local_queue(false); 4406 drain_global_stack(false); 4407 4408 // Attempt at work stealing from other task's queues. 4409 if (do_stealing && !has_aborted()) { 4410 // We have not aborted. This means that we have finished all that 4411 // we could. Let's try to do some stealing... 4412 4413 // We cannot check whether the global stack is empty, since other 4414 // tasks might be pushing objects to it concurrently. 4415 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4416 "only way to reach here"); 4417 4418 if (_cm->verbose_low()) { 4419 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id); 4420 } 4421 4422 while (!has_aborted()) { 4423 oop obj; 4424 statsOnly( ++_steal_attempts ); 4425 4426 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 4427 if (_cm->verbose_medium()) { 4428 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully", 4429 _worker_id, p2i((void*) obj)); 4430 } 4431 4432 statsOnly( ++_steals ); 4433 4434 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4435 "any stolen object should be marked"); 4436 scan_object(obj); 4437 4438 // And since we're towards the end, let's totally drain the 4439 // local queue and global stack. 4440 drain_local_queue(false); 4441 drain_global_stack(false); 4442 } else { 4443 break; 4444 } 4445 } 4446 } 4447 4448 // If we are about to wrap up and go into termination, check if we 4449 // should raise the overflow flag. 4450 if (do_termination && !has_aborted()) { 4451 if (_cm->force_overflow()->should_force()) { 4452 _cm->set_has_overflown(); 4453 regular_clock_call(); 4454 } 4455 } 4456 4457 // We still haven't aborted. Now, let's try to get into the 4458 // termination protocol. 4459 if (do_termination && !has_aborted()) { 4460 // We cannot check whether the global stack is empty, since other 4461 // tasks might be concurrently pushing objects on it. 4462 // Separated the asserts so that we know which one fires. 4463 assert(_cm->out_of_regions(), "only way to reach here"); 4464 assert(_task_queue->size() == 0, "only way to reach here"); 4465 4466 if (_cm->verbose_low()) { 4467 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id); 4468 } 4469 4470 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4471 4472 // The CMTask class also extends the TerminatorTerminator class, 4473 // hence its should_exit_termination() method will also decide 4474 // whether to exit the termination protocol or not. 4475 bool finished = (is_serial || 4476 _cm->terminator()->offer_termination(this)); 4477 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4478 _termination_time_ms += 4479 termination_end_time_ms - _termination_start_time_ms; 4480 4481 if (finished) { 4482 // We're all done. 4483 4484 if (_worker_id == 0) { 4485 // let's allow task 0 to do this 4486 if (concurrent()) { 4487 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4488 // we need to set this to false before the next 4489 // safepoint. This way we ensure that the marking phase 4490 // doesn't observe any more heap expansions. 4491 _cm->clear_concurrent_marking_in_progress(); 4492 } 4493 } 4494 4495 // We can now guarantee that the global stack is empty, since 4496 // all other tasks have finished. We separated the guarantees so 4497 // that, if a condition is false, we can immediately find out 4498 // which one. 4499 guarantee(_cm->out_of_regions(), "only way to reach here"); 4500 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4501 guarantee(_task_queue->size() == 0, "only way to reach here"); 4502 guarantee(!_cm->has_overflown(), "only way to reach here"); 4503 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4504 4505 if (_cm->verbose_low()) { 4506 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id); 4507 } 4508 } else { 4509 // Apparently there's more work to do. Let's abort this task. It 4510 // will restart it and we can hopefully find more things to do. 4511 4512 if (_cm->verbose_low()) { 4513 gclog_or_tty->print_cr("[%u] apparently there is more work to do", 4514 _worker_id); 4515 } 4516 4517 set_has_aborted(); 4518 statsOnly( ++_aborted_termination ); 4519 } 4520 } 4521 4522 // Mainly for debugging purposes to make sure that a pointer to the 4523 // closure which was statically allocated in this frame doesn't 4524 // escape it by accident. 4525 set_cm_oop_closure(NULL); 4526 double end_time_ms = os::elapsedVTime() * 1000.0; 4527 double elapsed_time_ms = end_time_ms - _start_time_ms; 4528 // Update the step history. 4529 _step_times_ms.add(elapsed_time_ms); 4530 4531 if (has_aborted()) { 4532 // The task was aborted for some reason. 4533 4534 statsOnly( ++_aborted ); 4535 4536 if (_has_timed_out) { 4537 double diff_ms = elapsed_time_ms - _time_target_ms; 4538 // Keep statistics of how well we did with respect to hitting 4539 // our target only if we actually timed out (if we aborted for 4540 // other reasons, then the results might get skewed). 4541 _marking_step_diffs_ms.add(diff_ms); 4542 } 4543 4544 if (_cm->has_overflown()) { 4545 // This is the interesting one. We aborted because a global 4546 // overflow was raised. This means we have to restart the 4547 // marking phase and start iterating over regions. However, in 4548 // order to do this we have to make sure that all tasks stop 4549 // what they are doing and re-initialize in a safe manner. We 4550 // will achieve this with the use of two barrier sync points. 4551 4552 if (_cm->verbose_low()) { 4553 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id); 4554 } 4555 4556 if (!is_serial) { 4557 // We only need to enter the sync barrier if being called 4558 // from a parallel context 4559 _cm->enter_first_sync_barrier(_worker_id); 4560 4561 // When we exit this sync barrier we know that all tasks have 4562 // stopped doing marking work. So, it's now safe to 4563 // re-initialize our data structures. At the end of this method, 4564 // task 0 will clear the global data structures. 4565 } 4566 4567 statsOnly( ++_aborted_overflow ); 4568 4569 // We clear the local state of this task... 4570 clear_region_fields(); 4571 4572 if (!is_serial) { 4573 // ...and enter the second barrier. 4574 _cm->enter_second_sync_barrier(_worker_id); 4575 } 4576 // At this point, if we're during the concurrent phase of 4577 // marking, everything has been re-initialized and we're 4578 // ready to restart. 4579 } 4580 4581 if (_cm->verbose_low()) { 4582 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4583 "elapsed = %1.2lfms <<<<<<<<<<", 4584 _worker_id, _time_target_ms, elapsed_time_ms); 4585 if (_cm->has_aborted()) { 4586 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========", 4587 _worker_id); 4588 } 4589 } 4590 } else { 4591 if (_cm->verbose_low()) { 4592 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4593 "elapsed = %1.2lfms <<<<<<<<<<", 4594 _worker_id, _time_target_ms, elapsed_time_ms); 4595 } 4596 } 4597 4598 _claimed = false; 4599 } 4600 4601 CMTask::CMTask(uint worker_id, 4602 ConcurrentMark* cm, 4603 size_t* marked_bytes, 4604 BitMap* card_bm, 4605 CMTaskQueue* task_queue, 4606 CMTaskQueueSet* task_queues) 4607 : _g1h(G1CollectedHeap::heap()), 4608 _worker_id(worker_id), _cm(cm), 4609 _claimed(false), 4610 _nextMarkBitMap(NULL), _hash_seed(17), 4611 _task_queue(task_queue), 4612 _task_queues(task_queues), 4613 _cm_oop_closure(NULL), 4614 _marked_bytes_array(marked_bytes), 4615 _card_bm(card_bm) { 4616 guarantee(task_queue != NULL, "invariant"); 4617 guarantee(task_queues != NULL, "invariant"); 4618 4619 statsOnly( _clock_due_to_scanning = 0; 4620 _clock_due_to_marking = 0 ); 4621 4622 _marking_step_diffs_ms.add(0.5); 4623 } 4624 4625 // These are formatting macros that are used below to ensure 4626 // consistent formatting. The *_H_* versions are used to format the 4627 // header for a particular value and they should be kept consistent 4628 // with the corresponding macro. Also note that most of the macros add 4629 // the necessary white space (as a prefix) which makes them a bit 4630 // easier to compose. 4631 4632 // All the output lines are prefixed with this string to be able to 4633 // identify them easily in a large log file. 4634 #define G1PPRL_LINE_PREFIX "###" 4635 4636 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4637 #ifdef _LP64 4638 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4639 #else // _LP64 4640 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4641 #endif // _LP64 4642 4643 // For per-region info 4644 #define G1PPRL_TYPE_FORMAT " %-4s" 4645 #define G1PPRL_TYPE_H_FORMAT " %4s" 4646 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4647 #define G1PPRL_BYTE_H_FORMAT " %9s" 4648 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4649 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4650 4651 // For summary info 4652 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4653 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4654 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4655 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4656 4657 G1PrintRegionLivenessInfoClosure:: 4658 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4659 : _out(out), 4660 _total_used_bytes(0), _total_capacity_bytes(0), 4661 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4662 _hum_used_bytes(0), _hum_capacity_bytes(0), 4663 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 4664 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 4665 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4666 MemRegion g1_committed = g1h->g1_committed(); 4667 MemRegion g1_reserved = g1h->g1_reserved(); 4668 double now = os::elapsedTime(); 4669 4670 // Print the header of the output. 4671 _out->cr(); 4672 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4673 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4674 G1PPRL_SUM_ADDR_FORMAT("committed") 4675 G1PPRL_SUM_ADDR_FORMAT("reserved") 4676 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4677 p2i(g1_committed.start()), p2i(g1_committed.end()), 4678 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 4679 HeapRegion::GrainBytes); 4680 _out->print_cr(G1PPRL_LINE_PREFIX); 4681 _out->print_cr(G1PPRL_LINE_PREFIX 4682 G1PPRL_TYPE_H_FORMAT 4683 G1PPRL_ADDR_BASE_H_FORMAT 4684 G1PPRL_BYTE_H_FORMAT 4685 G1PPRL_BYTE_H_FORMAT 4686 G1PPRL_BYTE_H_FORMAT 4687 G1PPRL_DOUBLE_H_FORMAT 4688 G1PPRL_BYTE_H_FORMAT 4689 G1PPRL_BYTE_H_FORMAT, 4690 "type", "address-range", 4691 "used", "prev-live", "next-live", "gc-eff", 4692 "remset", "code-roots"); 4693 _out->print_cr(G1PPRL_LINE_PREFIX 4694 G1PPRL_TYPE_H_FORMAT 4695 G1PPRL_ADDR_BASE_H_FORMAT 4696 G1PPRL_BYTE_H_FORMAT 4697 G1PPRL_BYTE_H_FORMAT 4698 G1PPRL_BYTE_H_FORMAT 4699 G1PPRL_DOUBLE_H_FORMAT 4700 G1PPRL_BYTE_H_FORMAT 4701 G1PPRL_BYTE_H_FORMAT, 4702 "", "", 4703 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 4704 "(bytes)", "(bytes)"); 4705 } 4706 4707 // It takes as a parameter a reference to one of the _hum_* fields, it 4708 // deduces the corresponding value for a region in a humongous region 4709 // series (either the region size, or what's left if the _hum_* field 4710 // is < the region size), and updates the _hum_* field accordingly. 4711 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4712 size_t bytes = 0; 4713 // The > 0 check is to deal with the prev and next live bytes which 4714 // could be 0. 4715 if (*hum_bytes > 0) { 4716 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4717 *hum_bytes -= bytes; 4718 } 4719 return bytes; 4720 } 4721 4722 // It deduces the values for a region in a humongous region series 4723 // from the _hum_* fields and updates those accordingly. It assumes 4724 // that that _hum_* fields have already been set up from the "starts 4725 // humongous" region and we visit the regions in address order. 4726 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4727 size_t* capacity_bytes, 4728 size_t* prev_live_bytes, 4729 size_t* next_live_bytes) { 4730 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4731 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4732 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4733 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4734 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4735 } 4736 4737 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4738 const char* type = ""; 4739 HeapWord* bottom = r->bottom(); 4740 HeapWord* end = r->end(); 4741 size_t capacity_bytes = r->capacity(); 4742 size_t used_bytes = r->used(); 4743 size_t prev_live_bytes = r->live_bytes(); 4744 size_t next_live_bytes = r->next_live_bytes(); 4745 double gc_eff = r->gc_efficiency(); 4746 size_t remset_bytes = r->rem_set()->mem_size(); 4747 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 4748 4749 if (r->used() == 0) { 4750 type = "FREE"; 4751 } else if (r->is_survivor()) { 4752 type = "SURV"; 4753 } else if (r->is_young()) { 4754 type = "EDEN"; 4755 } else if (r->startsHumongous()) { 4756 type = "HUMS"; 4757 4758 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4759 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4760 "they should have been zeroed after the last time we used them"); 4761 // Set up the _hum_* fields. 4762 _hum_capacity_bytes = capacity_bytes; 4763 _hum_used_bytes = used_bytes; 4764 _hum_prev_live_bytes = prev_live_bytes; 4765 _hum_next_live_bytes = next_live_bytes; 4766 get_hum_bytes(&used_bytes, &capacity_bytes, 4767 &prev_live_bytes, &next_live_bytes); 4768 end = bottom + HeapRegion::GrainWords; 4769 } else if (r->continuesHumongous()) { 4770 type = "HUMC"; 4771 get_hum_bytes(&used_bytes, &capacity_bytes, 4772 &prev_live_bytes, &next_live_bytes); 4773 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4774 } else { 4775 type = "OLD"; 4776 } 4777 4778 _total_used_bytes += used_bytes; 4779 _total_capacity_bytes += capacity_bytes; 4780 _total_prev_live_bytes += prev_live_bytes; 4781 _total_next_live_bytes += next_live_bytes; 4782 _total_remset_bytes += remset_bytes; 4783 _total_strong_code_roots_bytes += strong_code_roots_bytes; 4784 4785 // Print a line for this particular region. 4786 _out->print_cr(G1PPRL_LINE_PREFIX 4787 G1PPRL_TYPE_FORMAT 4788 G1PPRL_ADDR_BASE_FORMAT 4789 G1PPRL_BYTE_FORMAT 4790 G1PPRL_BYTE_FORMAT 4791 G1PPRL_BYTE_FORMAT 4792 G1PPRL_DOUBLE_FORMAT 4793 G1PPRL_BYTE_FORMAT 4794 G1PPRL_BYTE_FORMAT, 4795 type, p2i(bottom), p2i(end), 4796 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 4797 remset_bytes, strong_code_roots_bytes); 4798 4799 return false; 4800 } 4801 4802 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4803 // add static memory usages to remembered set sizes 4804 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 4805 // Print the footer of the output. 4806 _out->print_cr(G1PPRL_LINE_PREFIX); 4807 _out->print_cr(G1PPRL_LINE_PREFIX 4808 " SUMMARY" 4809 G1PPRL_SUM_MB_FORMAT("capacity") 4810 G1PPRL_SUM_MB_PERC_FORMAT("used") 4811 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4812 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 4813 G1PPRL_SUM_MB_FORMAT("remset") 4814 G1PPRL_SUM_MB_FORMAT("code-roots"), 4815 bytes_to_mb(_total_capacity_bytes), 4816 bytes_to_mb(_total_used_bytes), 4817 perc(_total_used_bytes, _total_capacity_bytes), 4818 bytes_to_mb(_total_prev_live_bytes), 4819 perc(_total_prev_live_bytes, _total_capacity_bytes), 4820 bytes_to_mb(_total_next_live_bytes), 4821 perc(_total_next_live_bytes, _total_capacity_bytes), 4822 bytes_to_mb(_total_remset_bytes), 4823 bytes_to_mb(_total_strong_code_roots_bytes)); 4824 _out->cr(); 4825 }