1 /* 2 * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "code/codeCache.hpp" 28 #include "gc_implementation/g1/concurrentMark.inline.hpp" 29 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 30 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 31 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 32 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 33 #include "gc_implementation/g1/g1Log.hpp" 34 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 35 #include "gc_implementation/g1/g1RemSet.hpp" 36 #include "gc_implementation/g1/heapRegion.inline.hpp" 37 #include "gc_implementation/g1/heapRegionRemSet.hpp" 38 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 39 #include "gc_implementation/shared/vmGCOperations.hpp" 40 #include "gc_implementation/shared/gcTimer.hpp" 41 #include "gc_implementation/shared/gcTrace.hpp" 42 #include "gc_implementation/shared/gcTraceTime.hpp" 43 #include "memory/allocation.hpp" 44 #include "memory/genOopClosures.inline.hpp" 45 #include "memory/referencePolicy.hpp" 46 #include "memory/resourceArea.hpp" 47 #include "oops/oop.inline.hpp" 48 #include "runtime/handles.inline.hpp" 49 #include "runtime/java.hpp" 50 #include "runtime/atomic.inline.hpp" 51 #include "runtime/prefetch.inline.hpp" 52 #include "services/memTracker.hpp" 53 54 // Concurrent marking bit map wrapper 55 56 CMBitMapRO::CMBitMapRO(int shifter) : 57 _bm(), 58 _shifter(shifter) { 59 _bmStartWord = 0; 60 _bmWordSize = 0; 61 } 62 63 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 64 const HeapWord* limit) const { 65 // First we must round addr *up* to a possible object boundary. 66 addr = (HeapWord*)align_size_up((intptr_t)addr, 67 HeapWordSize << _shifter); 68 size_t addrOffset = heapWordToOffset(addr); 69 if (limit == NULL) { 70 limit = _bmStartWord + _bmWordSize; 71 } 72 size_t limitOffset = heapWordToOffset(limit); 73 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 74 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 75 assert(nextAddr >= addr, "get_next_one postcondition"); 76 assert(nextAddr == limit || isMarked(nextAddr), 77 "get_next_one postcondition"); 78 return nextAddr; 79 } 80 81 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr, 82 const HeapWord* limit) const { 83 size_t addrOffset = heapWordToOffset(addr); 84 if (limit == NULL) { 85 limit = _bmStartWord + _bmWordSize; 86 } 87 size_t limitOffset = heapWordToOffset(limit); 88 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 89 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 90 assert(nextAddr >= addr, "get_next_one postcondition"); 91 assert(nextAddr == limit || !isMarked(nextAddr), 92 "get_next_one postcondition"); 93 return nextAddr; 94 } 95 96 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 97 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 98 return (int) (diff >> _shifter); 99 } 100 101 #ifndef PRODUCT 102 bool CMBitMapRO::covers(ReservedSpace heap_rs) const { 103 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 104 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 105 "size inconsistency"); 106 return _bmStartWord == (HeapWord*)(heap_rs.base()) && 107 _bmWordSize == heap_rs.size()>>LogHeapWordSize; 108 } 109 #endif 110 111 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 112 _bm.print_on_error(st, prefix); 113 } 114 115 bool CMBitMap::allocate(ReservedSpace heap_rs) { 116 _bmStartWord = (HeapWord*)(heap_rs.base()); 117 _bmWordSize = heap_rs.size()/HeapWordSize; // heap_rs.size() is in bytes 118 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 119 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 120 if (!brs.is_reserved()) { 121 warning("ConcurrentMark marking bit map allocation failure"); 122 return false; 123 } 124 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC); 125 // For now we'll just commit all of the bit map up front. 126 // Later on we'll try to be more parsimonious with swap. 127 if (!_virtual_space.initialize(brs, brs.size())) { 128 warning("ConcurrentMark marking bit map backing store failure"); 129 return false; 130 } 131 assert(_virtual_space.committed_size() == brs.size(), 132 "didn't reserve backing store for all of concurrent marking bit map?"); 133 _bm.set_map((BitMap::bm_word_t*)_virtual_space.low()); 134 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 135 _bmWordSize, "inconsistency in bit map sizing"); 136 _bm.set_size(_bmWordSize >> _shifter); 137 return true; 138 } 139 140 void CMBitMap::clearAll() { 141 _bm.clear(); 142 return; 143 } 144 145 void CMBitMap::markRange(MemRegion mr) { 146 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 147 assert(!mr.is_empty(), "unexpected empty region"); 148 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 149 ((HeapWord *) mr.end())), 150 "markRange memory region end is not card aligned"); 151 // convert address range into offset range 152 _bm.at_put_range(heapWordToOffset(mr.start()), 153 heapWordToOffset(mr.end()), true); 154 } 155 156 void CMBitMap::clearRange(MemRegion mr) { 157 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 158 assert(!mr.is_empty(), "unexpected empty region"); 159 // convert address range into offset range 160 _bm.at_put_range(heapWordToOffset(mr.start()), 161 heapWordToOffset(mr.end()), false); 162 } 163 164 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 165 HeapWord* end_addr) { 166 HeapWord* start = getNextMarkedWordAddress(addr); 167 start = MIN2(start, end_addr); 168 HeapWord* end = getNextUnmarkedWordAddress(start); 169 end = MIN2(end, end_addr); 170 assert(start <= end, "Consistency check"); 171 MemRegion mr(start, end); 172 if (!mr.is_empty()) { 173 clearRange(mr); 174 } 175 return mr; 176 } 177 178 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 179 _base(NULL), _cm(cm) 180 #ifdef ASSERT 181 , _drain_in_progress(false) 182 , _drain_in_progress_yields(false) 183 #endif 184 {} 185 186 bool CMMarkStack::allocate(size_t capacity) { 187 // allocate a stack of the requisite depth 188 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 189 if (!rs.is_reserved()) { 190 warning("ConcurrentMark MarkStack allocation failure"); 191 return false; 192 } 193 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 194 if (!_virtual_space.initialize(rs, rs.size())) { 195 warning("ConcurrentMark MarkStack backing store failure"); 196 // Release the virtual memory reserved for the marking stack 197 rs.release(); 198 return false; 199 } 200 assert(_virtual_space.committed_size() == rs.size(), 201 "Didn't reserve backing store for all of ConcurrentMark stack?"); 202 _base = (oop*) _virtual_space.low(); 203 setEmpty(); 204 _capacity = (jint) capacity; 205 _saved_index = -1; 206 _should_expand = false; 207 NOT_PRODUCT(_max_depth = 0); 208 return true; 209 } 210 211 void CMMarkStack::expand() { 212 // Called, during remark, if we've overflown the marking stack during marking. 213 assert(isEmpty(), "stack should been emptied while handling overflow"); 214 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 215 // Clear expansion flag 216 _should_expand = false; 217 if (_capacity == (jint) MarkStackSizeMax) { 218 if (PrintGCDetails && Verbose) { 219 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 220 } 221 return; 222 } 223 // Double capacity if possible 224 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 225 // Do not give up existing stack until we have managed to 226 // get the double capacity that we desired. 227 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 228 sizeof(oop))); 229 if (rs.is_reserved()) { 230 // Release the backing store associated with old stack 231 _virtual_space.release(); 232 // Reinitialize virtual space for new stack 233 if (!_virtual_space.initialize(rs, rs.size())) { 234 fatal("Not enough swap for expanded marking stack capacity"); 235 } 236 _base = (oop*)(_virtual_space.low()); 237 _index = 0; 238 _capacity = new_capacity; 239 } else { 240 if (PrintGCDetails && Verbose) { 241 // Failed to double capacity, continue; 242 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 243 SIZE_FORMAT"K to " SIZE_FORMAT"K", 244 _capacity / K, new_capacity / K); 245 } 246 } 247 } 248 249 void CMMarkStack::set_should_expand() { 250 // If we're resetting the marking state because of an 251 // marking stack overflow, record that we should, if 252 // possible, expand the stack. 253 _should_expand = _cm->has_overflown(); 254 } 255 256 CMMarkStack::~CMMarkStack() { 257 if (_base != NULL) { 258 _base = NULL; 259 _virtual_space.release(); 260 } 261 } 262 263 void CMMarkStack::par_push(oop ptr) { 264 while (true) { 265 if (isFull()) { 266 _overflow = true; 267 return; 268 } 269 // Otherwise... 270 jint index = _index; 271 jint next_index = index+1; 272 jint res = Atomic::cmpxchg(next_index, &_index, index); 273 if (res == index) { 274 _base[index] = ptr; 275 // Note that we don't maintain this atomically. We could, but it 276 // doesn't seem necessary. 277 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 278 return; 279 } 280 // Otherwise, we need to try again. 281 } 282 } 283 284 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 285 while (true) { 286 if (isFull()) { 287 _overflow = true; 288 return; 289 } 290 // Otherwise... 291 jint index = _index; 292 jint next_index = index + n; 293 if (next_index > _capacity) { 294 _overflow = true; 295 return; 296 } 297 jint res = Atomic::cmpxchg(next_index, &_index, index); 298 if (res == index) { 299 for (int i = 0; i < n; i++) { 300 int ind = index + i; 301 assert(ind < _capacity, "By overflow test above."); 302 _base[ind] = ptr_arr[i]; 303 } 304 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 305 return; 306 } 307 // Otherwise, we need to try again. 308 } 309 } 310 311 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 312 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 313 jint start = _index; 314 jint next_index = start + n; 315 if (next_index > _capacity) { 316 _overflow = true; 317 return; 318 } 319 // Otherwise. 320 _index = next_index; 321 for (int i = 0; i < n; i++) { 322 int ind = start + i; 323 assert(ind < _capacity, "By overflow test above."); 324 _base[ind] = ptr_arr[i]; 325 } 326 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 327 } 328 329 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 330 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 331 jint index = _index; 332 if (index == 0) { 333 *n = 0; 334 return false; 335 } else { 336 int k = MIN2(max, index); 337 jint new_ind = index - k; 338 for (int j = 0; j < k; j++) { 339 ptr_arr[j] = _base[new_ind + j]; 340 } 341 _index = new_ind; 342 *n = k; 343 return true; 344 } 345 } 346 347 template<class OopClosureClass> 348 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 349 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 350 || SafepointSynchronize::is_at_safepoint(), 351 "Drain recursion must be yield-safe."); 352 bool res = true; 353 debug_only(_drain_in_progress = true); 354 debug_only(_drain_in_progress_yields = yield_after); 355 while (!isEmpty()) { 356 oop newOop = pop(); 357 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 358 assert(newOop->is_oop(), "Expected an oop"); 359 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 360 "only grey objects on this stack"); 361 newOop->oop_iterate(cl); 362 if (yield_after && _cm->do_yield_check()) { 363 res = false; 364 break; 365 } 366 } 367 debug_only(_drain_in_progress = false); 368 return res; 369 } 370 371 void CMMarkStack::note_start_of_gc() { 372 assert(_saved_index == -1, 373 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 374 _saved_index = _index; 375 } 376 377 void CMMarkStack::note_end_of_gc() { 378 // This is intentionally a guarantee, instead of an assert. If we 379 // accidentally add something to the mark stack during GC, it 380 // will be a correctness issue so it's better if we crash. we'll 381 // only check this once per GC anyway, so it won't be a performance 382 // issue in any way. 383 guarantee(_saved_index == _index, 384 err_msg("saved index: %d index: %d", _saved_index, _index)); 385 _saved_index = -1; 386 } 387 388 void CMMarkStack::oops_do(OopClosure* f) { 389 assert(_saved_index == _index, 390 err_msg("saved index: %d index: %d", _saved_index, _index)); 391 for (int i = 0; i < _index; i += 1) { 392 f->do_oop(&_base[i]); 393 } 394 } 395 396 bool ConcurrentMark::not_yet_marked(oop obj) const { 397 return _g1h->is_obj_ill(obj); 398 } 399 400 CMRootRegions::CMRootRegions() : 401 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 402 _should_abort(false), _next_survivor(NULL) { } 403 404 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 405 _young_list = g1h->young_list(); 406 _cm = cm; 407 } 408 409 void CMRootRegions::prepare_for_scan() { 410 assert(!scan_in_progress(), "pre-condition"); 411 412 // Currently, only survivors can be root regions. 413 assert(_next_survivor == NULL, "pre-condition"); 414 _next_survivor = _young_list->first_survivor_region(); 415 _scan_in_progress = (_next_survivor != NULL); 416 _should_abort = false; 417 } 418 419 HeapRegion* CMRootRegions::claim_next() { 420 if (_should_abort) { 421 // If someone has set the should_abort flag, we return NULL to 422 // force the caller to bail out of their loop. 423 return NULL; 424 } 425 426 // Currently, only survivors can be root regions. 427 HeapRegion* res = _next_survivor; 428 if (res != NULL) { 429 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 430 // Read it again in case it changed while we were waiting for the lock. 431 res = _next_survivor; 432 if (res != NULL) { 433 if (res == _young_list->last_survivor_region()) { 434 // We just claimed the last survivor so store NULL to indicate 435 // that we're done. 436 _next_survivor = NULL; 437 } else { 438 _next_survivor = res->get_next_young_region(); 439 } 440 } else { 441 // Someone else claimed the last survivor while we were trying 442 // to take the lock so nothing else to do. 443 } 444 } 445 assert(res == NULL || res->is_survivor(), "post-condition"); 446 447 return res; 448 } 449 450 void CMRootRegions::scan_finished() { 451 assert(scan_in_progress(), "pre-condition"); 452 453 // Currently, only survivors can be root regions. 454 if (!_should_abort) { 455 assert(_next_survivor == NULL, "we should have claimed all survivors"); 456 } 457 _next_survivor = NULL; 458 459 { 460 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 461 _scan_in_progress = false; 462 RootRegionScan_lock->notify_all(); 463 } 464 } 465 466 bool CMRootRegions::wait_until_scan_finished() { 467 if (!scan_in_progress()) return false; 468 469 { 470 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 471 while (scan_in_progress()) { 472 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 473 } 474 } 475 return true; 476 } 477 478 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 479 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 480 #endif // _MSC_VER 481 482 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 483 return MAX2((n_par_threads + 2) / 4, 1U); 484 } 485 486 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) : 487 _g1h(g1h), 488 _markBitMap1(log2_intptr(MinObjAlignment)), 489 _markBitMap2(log2_intptr(MinObjAlignment)), 490 _parallel_marking_threads(0), 491 _max_parallel_marking_threads(0), 492 _sleep_factor(0.0), 493 _marking_task_overhead(1.0), 494 _cleanup_sleep_factor(0.0), 495 _cleanup_task_overhead(1.0), 496 _cleanup_list("Cleanup List"), 497 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 498 _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >> 499 CardTableModRefBS::card_shift, 500 false /* in_resource_area*/), 501 502 _prevMarkBitMap(&_markBitMap1), 503 _nextMarkBitMap(&_markBitMap2), 504 505 _markStack(this), 506 // _finger set in set_non_marking_state 507 508 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)), 509 // _active_tasks set in set_non_marking_state 510 // _tasks set inside the constructor 511 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 512 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 513 514 _has_overflown(false), 515 _concurrent(false), 516 _has_aborted(false), 517 _aborted_gc_id(GCId::undefined()), 518 _restart_for_overflow(false), 519 _concurrent_marking_in_progress(false), 520 521 // _verbose_level set below 522 523 _init_times(), 524 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 525 _cleanup_times(), 526 _total_counting_time(0.0), 527 _total_rs_scrub_time(0.0), 528 529 _parallel_workers(NULL), 530 531 _count_card_bitmaps(NULL), 532 _count_marked_bytes(NULL), 533 _completed_initialization(false) { 534 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 535 if (verbose_level < no_verbose) { 536 verbose_level = no_verbose; 537 } 538 if (verbose_level > high_verbose) { 539 verbose_level = high_verbose; 540 } 541 _verbose_level = verbose_level; 542 543 if (verbose_low()) { 544 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 545 "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end)); 546 } 547 548 if (!_markBitMap1.allocate(heap_rs)) { 549 warning("Failed to allocate first CM bit map"); 550 return; 551 } 552 if (!_markBitMap2.allocate(heap_rs)) { 553 warning("Failed to allocate second CM bit map"); 554 return; 555 } 556 557 // Create & start a ConcurrentMark thread. 558 _cmThread = new ConcurrentMarkThread(this); 559 assert(cmThread() != NULL, "CM Thread should have been created"); 560 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 561 if (_cmThread->osthread() == NULL) { 562 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 563 } 564 565 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 566 assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency"); 567 assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency"); 568 569 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 570 satb_qs.set_buffer_size(G1SATBBufferSize); 571 572 _root_regions.init(_g1h, this); 573 574 if (ConcGCThreads > ParallelGCThreads) { 575 warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") " 576 "than ParallelGCThreads (" UINTX_FORMAT ").", 577 ConcGCThreads, ParallelGCThreads); 578 return; 579 } 580 if (ParallelGCThreads == 0) { 581 // if we are not running with any parallel GC threads we will not 582 // spawn any marking threads either 583 _parallel_marking_threads = 0; 584 _max_parallel_marking_threads = 0; 585 _sleep_factor = 0.0; 586 _marking_task_overhead = 1.0; 587 } else { 588 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 589 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 590 // if both are set 591 _sleep_factor = 0.0; 592 _marking_task_overhead = 1.0; 593 } else if (G1MarkingOverheadPercent > 0) { 594 // We will calculate the number of parallel marking threads based 595 // on a target overhead with respect to the soft real-time goal 596 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 597 double overall_cm_overhead = 598 (double) MaxGCPauseMillis * marking_overhead / 599 (double) GCPauseIntervalMillis; 600 double cpu_ratio = 1.0 / (double) os::processor_count(); 601 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 602 double marking_task_overhead = 603 overall_cm_overhead / marking_thread_num * 604 (double) os::processor_count(); 605 double sleep_factor = 606 (1.0 - marking_task_overhead) / marking_task_overhead; 607 608 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num); 609 _sleep_factor = sleep_factor; 610 _marking_task_overhead = marking_task_overhead; 611 } else { 612 // Calculate the number of parallel marking threads by scaling 613 // the number of parallel GC threads. 614 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads); 615 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num); 616 _sleep_factor = 0.0; 617 _marking_task_overhead = 1.0; 618 } 619 620 assert(ConcGCThreads > 0, "Should have been set"); 621 _parallel_marking_threads = (uint) ConcGCThreads; 622 _max_parallel_marking_threads = _parallel_marking_threads; 623 624 if (parallel_marking_threads() > 1) { 625 _cleanup_task_overhead = 1.0; 626 } else { 627 _cleanup_task_overhead = marking_task_overhead(); 628 } 629 _cleanup_sleep_factor = 630 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 631 632 #if 0 633 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 634 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 635 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 636 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 637 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 638 #endif 639 640 guarantee(parallel_marking_threads() > 0, "peace of mind"); 641 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 642 _max_parallel_marking_threads, false, true); 643 if (_parallel_workers == NULL) { 644 vm_exit_during_initialization("Failed necessary allocation."); 645 } else { 646 _parallel_workers->initialize_workers(); 647 } 648 } 649 650 if (FLAG_IS_DEFAULT(MarkStackSize)) { 651 uintx mark_stack_size = 652 MIN2(MarkStackSizeMax, 653 MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE))); 654 // Verify that the calculated value for MarkStackSize is in range. 655 // It would be nice to use the private utility routine from Arguments. 656 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 657 warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): " 658 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 659 mark_stack_size, (uintx) 1, MarkStackSizeMax); 660 return; 661 } 662 FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size); 663 } else { 664 // Verify MarkStackSize is in range. 665 if (FLAG_IS_CMDLINE(MarkStackSize)) { 666 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 667 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 668 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): " 669 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 670 MarkStackSize, (uintx) 1, MarkStackSizeMax); 671 return; 672 } 673 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 674 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 675 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")" 676 " or for MarkStackSizeMax (" UINTX_FORMAT ")", 677 MarkStackSize, MarkStackSizeMax); 678 return; 679 } 680 } 681 } 682 } 683 684 if (!_markStack.allocate(MarkStackSize)) { 685 warning("Failed to allocate CM marking stack"); 686 return; 687 } 688 689 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 690 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 691 692 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 693 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 694 695 BitMap::idx_t card_bm_size = _card_bm.size(); 696 697 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 698 _active_tasks = _max_worker_id; 699 700 size_t max_regions = (size_t) _g1h->max_regions(); 701 for (uint i = 0; i < _max_worker_id; ++i) { 702 CMTaskQueue* task_queue = new CMTaskQueue(); 703 task_queue->initialize(); 704 _task_queues->register_queue(i, task_queue); 705 706 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 707 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 708 709 _tasks[i] = new CMTask(i, this, 710 _count_marked_bytes[i], 711 &_count_card_bitmaps[i], 712 task_queue, _task_queues); 713 714 _accum_task_vtime[i] = 0.0; 715 } 716 717 // Calculate the card number for the bottom of the heap. Used 718 // in biasing indexes into the accounting card bitmaps. 719 _heap_bottom_card_num = 720 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 721 CardTableModRefBS::card_shift); 722 723 // Clear all the liveness counting data 724 clear_all_count_data(); 725 726 // so that the call below can read a sensible value 727 _heap_start = (HeapWord*) heap_rs.base(); 728 set_non_marking_state(); 729 _completed_initialization = true; 730 } 731 732 void ConcurrentMark::update_g1_committed(bool force) { 733 // If concurrent marking is not in progress, then we do not need to 734 // update _heap_end. 735 if (!concurrent_marking_in_progress() && !force) return; 736 737 MemRegion committed = _g1h->g1_committed(); 738 assert(committed.start() == _heap_start, "start shouldn't change"); 739 HeapWord* new_end = committed.end(); 740 if (new_end > _heap_end) { 741 // The heap has been expanded. 742 743 _heap_end = new_end; 744 } 745 // Notice that the heap can also shrink. However, this only happens 746 // during a Full GC (at least currently) and the entire marking 747 // phase will bail out and the task will not be restarted. So, let's 748 // do nothing. 749 } 750 751 void ConcurrentMark::reset() { 752 // Starting values for these two. This should be called in a STW 753 // phase. CM will be notified of any future g1_committed expansions 754 // will be at the end of evacuation pauses, when tasks are 755 // inactive. 756 MemRegion committed = _g1h->g1_committed(); 757 _heap_start = committed.start(); 758 _heap_end = committed.end(); 759 760 // Separated the asserts so that we know which one fires. 761 assert(_heap_start != NULL, "heap bounds should look ok"); 762 assert(_heap_end != NULL, "heap bounds should look ok"); 763 assert(_heap_start < _heap_end, "heap bounds should look ok"); 764 765 // Reset all the marking data structures and any necessary flags 766 reset_marking_state(); 767 768 if (verbose_low()) { 769 gclog_or_tty->print_cr("[global] resetting"); 770 } 771 772 // We do reset all of them, since different phases will use 773 // different number of active threads. So, it's easiest to have all 774 // of them ready. 775 for (uint i = 0; i < _max_worker_id; ++i) { 776 _tasks[i]->reset(_nextMarkBitMap); 777 } 778 779 // we need this to make sure that the flag is on during the evac 780 // pause with initial mark piggy-backed 781 set_concurrent_marking_in_progress(); 782 } 783 784 785 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 786 _markStack.set_should_expand(); 787 _markStack.setEmpty(); // Also clears the _markStack overflow flag 788 if (clear_overflow) { 789 clear_has_overflown(); 790 } else { 791 assert(has_overflown(), "pre-condition"); 792 } 793 _finger = _heap_start; 794 795 for (uint i = 0; i < _max_worker_id; ++i) { 796 CMTaskQueue* queue = _task_queues->queue(i); 797 queue->set_empty(); 798 } 799 } 800 801 void ConcurrentMark::set_concurrency(uint active_tasks) { 802 assert(active_tasks <= _max_worker_id, "we should not have more"); 803 804 _active_tasks = active_tasks; 805 // Need to update the three data structures below according to the 806 // number of active threads for this phase. 807 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 808 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 809 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 810 } 811 812 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 813 set_concurrency(active_tasks); 814 815 _concurrent = concurrent; 816 // We propagate this to all tasks, not just the active ones. 817 for (uint i = 0; i < _max_worker_id; ++i) 818 _tasks[i]->set_concurrent(concurrent); 819 820 if (concurrent) { 821 set_concurrent_marking_in_progress(); 822 } else { 823 // We currently assume that the concurrent flag has been set to 824 // false before we start remark. At this point we should also be 825 // in a STW phase. 826 assert(!concurrent_marking_in_progress(), "invariant"); 827 assert(out_of_regions(), 828 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT, 829 p2i(_finger), p2i(_heap_end))); 830 update_g1_committed(true); 831 } 832 } 833 834 void ConcurrentMark::set_non_marking_state() { 835 // We set the global marking state to some default values when we're 836 // not doing marking. 837 reset_marking_state(); 838 _active_tasks = 0; 839 clear_concurrent_marking_in_progress(); 840 } 841 842 ConcurrentMark::~ConcurrentMark() { 843 // The ConcurrentMark instance is never freed. 844 ShouldNotReachHere(); 845 } 846 847 void ConcurrentMark::clearNextBitmap() { 848 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 849 G1CollectorPolicy* g1p = g1h->g1_policy(); 850 851 // Make sure that the concurrent mark thread looks to still be in 852 // the current cycle. 853 guarantee(cmThread()->during_cycle(), "invariant"); 854 855 // We are finishing up the current cycle by clearing the next 856 // marking bitmap and getting it ready for the next cycle. During 857 // this time no other cycle can start. So, let's make sure that this 858 // is the case. 859 guarantee(!g1h->mark_in_progress(), "invariant"); 860 861 // clear the mark bitmap (no grey objects to start with). 862 // We need to do this in chunks and offer to yield in between 863 // each chunk. 864 HeapWord* start = _nextMarkBitMap->startWord(); 865 HeapWord* end = _nextMarkBitMap->endWord(); 866 HeapWord* cur = start; 867 size_t chunkSize = M; 868 while (cur < end) { 869 HeapWord* next = cur + chunkSize; 870 if (next > end) { 871 next = end; 872 } 873 MemRegion mr(cur,next); 874 _nextMarkBitMap->clearRange(mr); 875 cur = next; 876 do_yield_check(); 877 878 // Repeat the asserts from above. We'll do them as asserts here to 879 // minimize their overhead on the product. However, we'll have 880 // them as guarantees at the beginning / end of the bitmap 881 // clearing to get some checking in the product. 882 assert(cmThread()->during_cycle(), "invariant"); 883 assert(!g1h->mark_in_progress(), "invariant"); 884 } 885 886 // Clear the liveness counting data 887 clear_all_count_data(); 888 889 // Repeat the asserts from above. 890 guarantee(cmThread()->during_cycle(), "invariant"); 891 guarantee(!g1h->mark_in_progress(), "invariant"); 892 } 893 894 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 895 public: 896 bool doHeapRegion(HeapRegion* r) { 897 if (!r->continuesHumongous()) { 898 r->note_start_of_marking(); 899 } 900 return false; 901 } 902 }; 903 904 void ConcurrentMark::checkpointRootsInitialPre() { 905 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 906 G1CollectorPolicy* g1p = g1h->g1_policy(); 907 908 _has_aborted = false; 909 910 #ifndef PRODUCT 911 if (G1PrintReachableAtInitialMark) { 912 print_reachable("at-cycle-start", 913 VerifyOption_G1UsePrevMarking, true /* all */); 914 } 915 #endif 916 917 // Initialize marking structures. This has to be done in a STW phase. 918 reset(); 919 920 // For each region note start of marking. 921 NoteStartOfMarkHRClosure startcl; 922 g1h->heap_region_iterate(&startcl); 923 } 924 925 926 void ConcurrentMark::checkpointRootsInitialPost() { 927 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 928 929 // If we force an overflow during remark, the remark operation will 930 // actually abort and we'll restart concurrent marking. If we always 931 // force an overflow during remark we'll never actually complete the 932 // marking phase. So, we initialize this here, at the start of the 933 // cycle, so that at the remaining overflow number will decrease at 934 // every remark and we'll eventually not need to cause one. 935 force_overflow_stw()->init(); 936 937 // Start Concurrent Marking weak-reference discovery. 938 ReferenceProcessor* rp = g1h->ref_processor_cm(); 939 // enable ("weak") refs discovery 940 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 941 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 942 943 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 944 // This is the start of the marking cycle, we're expected all 945 // threads to have SATB queues with active set to false. 946 satb_mq_set.set_active_all_threads(true, /* new active value */ 947 false /* expected_active */); 948 949 _root_regions.prepare_for_scan(); 950 951 // update_g1_committed() will be called at the end of an evac pause 952 // when marking is on. So, it's also called at the end of the 953 // initial-mark pause to update the heap end, if the heap expands 954 // during it. No need to call it here. 955 } 956 957 /* 958 * Notice that in the next two methods, we actually leave the STS 959 * during the barrier sync and join it immediately afterwards. If we 960 * do not do this, the following deadlock can occur: one thread could 961 * be in the barrier sync code, waiting for the other thread to also 962 * sync up, whereas another one could be trying to yield, while also 963 * waiting for the other threads to sync up too. 964 * 965 * Note, however, that this code is also used during remark and in 966 * this case we should not attempt to leave / enter the STS, otherwise 967 * we'll either hit an assert (debug / fastdebug) or deadlock 968 * (product). So we should only leave / enter the STS if we are 969 * operating concurrently. 970 * 971 * Because the thread that does the sync barrier has left the STS, it 972 * is possible to be suspended for a Full GC or an evacuation pause 973 * could occur. This is actually safe, since the entering the sync 974 * barrier is one of the last things do_marking_step() does, and it 975 * doesn't manipulate any data structures afterwards. 976 */ 977 978 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 979 if (verbose_low()) { 980 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id); 981 } 982 983 if (concurrent()) { 984 SuspendibleThreadSet::leave(); 985 } 986 987 bool barrier_aborted = !_first_overflow_barrier_sync.enter(); 988 989 if (concurrent()) { 990 SuspendibleThreadSet::join(); 991 } 992 // at this point everyone should have synced up and not be doing any 993 // more work 994 995 if (verbose_low()) { 996 if (barrier_aborted) { 997 gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id); 998 } else { 999 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); 1000 } 1001 } 1002 1003 if (barrier_aborted) { 1004 // If the barrier aborted we ignore the overflow condition and 1005 // just abort the whole marking phase as quickly as possible. 1006 return; 1007 } 1008 1009 // If we're executing the concurrent phase of marking, reset the marking 1010 // state; otherwise the marking state is reset after reference processing, 1011 // during the remark pause. 1012 // If we reset here as a result of an overflow during the remark we will 1013 // see assertion failures from any subsequent set_concurrency_and_phase() 1014 // calls. 1015 if (concurrent()) { 1016 // let the task associated with with worker 0 do this 1017 if (worker_id == 0) { 1018 // task 0 is responsible for clearing the global data structures 1019 // We should be here because of an overflow. During STW we should 1020 // not clear the overflow flag since we rely on it being true when 1021 // we exit this method to abort the pause and restart concurrent 1022 // marking. 1023 reset_marking_state(true /* clear_overflow */); 1024 force_overflow()->update(); 1025 1026 if (G1Log::fine()) { 1027 gclog_or_tty->gclog_stamp(concurrent_gc_id()); 1028 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 1029 } 1030 } 1031 } 1032 1033 // after this, each task should reset its own data structures then 1034 // then go into the second barrier 1035 } 1036 1037 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 1038 if (verbose_low()) { 1039 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id); 1040 } 1041 1042 if (concurrent()) { 1043 SuspendibleThreadSet::leave(); 1044 } 1045 1046 bool barrier_aborted = !_second_overflow_barrier_sync.enter(); 1047 1048 if (concurrent()) { 1049 SuspendibleThreadSet::join(); 1050 } 1051 // at this point everything should be re-initialized and ready to go 1052 1053 if (verbose_low()) { 1054 if (barrier_aborted) { 1055 gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id); 1056 } else { 1057 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); 1058 } 1059 } 1060 } 1061 1062 #ifndef PRODUCT 1063 void ForceOverflowSettings::init() { 1064 _num_remaining = G1ConcMarkForceOverflow; 1065 _force = false; 1066 update(); 1067 } 1068 1069 void ForceOverflowSettings::update() { 1070 if (_num_remaining > 0) { 1071 _num_remaining -= 1; 1072 _force = true; 1073 } else { 1074 _force = false; 1075 } 1076 } 1077 1078 bool ForceOverflowSettings::should_force() { 1079 if (_force) { 1080 _force = false; 1081 return true; 1082 } else { 1083 return false; 1084 } 1085 } 1086 #endif // !PRODUCT 1087 1088 class CMConcurrentMarkingTask: public AbstractGangTask { 1089 private: 1090 ConcurrentMark* _cm; 1091 ConcurrentMarkThread* _cmt; 1092 1093 public: 1094 void work(uint worker_id) { 1095 assert(Thread::current()->is_ConcurrentGC_thread(), 1096 "this should only be done by a conc GC thread"); 1097 ResourceMark rm; 1098 1099 double start_vtime = os::elapsedVTime(); 1100 1101 SuspendibleThreadSet::join(); 1102 1103 assert(worker_id < _cm->active_tasks(), "invariant"); 1104 CMTask* the_task = _cm->task(worker_id); 1105 the_task->record_start_time(); 1106 if (!_cm->has_aborted()) { 1107 do { 1108 double start_vtime_sec = os::elapsedVTime(); 1109 double start_time_sec = os::elapsedTime(); 1110 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1111 1112 the_task->do_marking_step(mark_step_duration_ms, 1113 true /* do_termination */, 1114 false /* is_serial*/); 1115 1116 double end_time_sec = os::elapsedTime(); 1117 double end_vtime_sec = os::elapsedVTime(); 1118 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1119 double elapsed_time_sec = end_time_sec - start_time_sec; 1120 _cm->clear_has_overflown(); 1121 1122 bool ret = _cm->do_yield_check(worker_id); 1123 1124 jlong sleep_time_ms; 1125 if (!_cm->has_aborted() && the_task->has_aborted()) { 1126 sleep_time_ms = 1127 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1128 SuspendibleThreadSet::leave(); 1129 os::sleep(Thread::current(), sleep_time_ms, false); 1130 SuspendibleThreadSet::join(); 1131 } 1132 double end_time2_sec = os::elapsedTime(); 1133 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1134 1135 #if 0 1136 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1137 "overhead %1.4lf", 1138 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1139 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1140 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1141 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1142 #endif 1143 } while (!_cm->has_aborted() && the_task->has_aborted()); 1144 } 1145 the_task->record_end_time(); 1146 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1147 1148 SuspendibleThreadSet::leave(); 1149 1150 double end_vtime = os::elapsedVTime(); 1151 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1152 } 1153 1154 CMConcurrentMarkingTask(ConcurrentMark* cm, 1155 ConcurrentMarkThread* cmt) : 1156 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1157 1158 ~CMConcurrentMarkingTask() { } 1159 }; 1160 1161 // Calculates the number of active workers for a concurrent 1162 // phase. 1163 uint ConcurrentMark::calc_parallel_marking_threads() { 1164 if (G1CollectedHeap::use_parallel_gc_threads()) { 1165 uint n_conc_workers = 0; 1166 if (!UseDynamicNumberOfGCThreads || 1167 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1168 !ForceDynamicNumberOfGCThreads)) { 1169 n_conc_workers = max_parallel_marking_threads(); 1170 } else { 1171 n_conc_workers = 1172 AdaptiveSizePolicy::calc_default_active_workers( 1173 max_parallel_marking_threads(), 1174 1, /* Minimum workers */ 1175 parallel_marking_threads(), 1176 Threads::number_of_non_daemon_threads()); 1177 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1178 // that scaling has already gone into "_max_parallel_marking_threads". 1179 } 1180 assert(n_conc_workers > 0, "Always need at least 1"); 1181 return n_conc_workers; 1182 } 1183 // If we are not running with any parallel GC threads we will not 1184 // have spawned any marking threads either. Hence the number of 1185 // concurrent workers should be 0. 1186 return 0; 1187 } 1188 1189 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1190 // Currently, only survivors can be root regions. 1191 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1192 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1193 1194 const uintx interval = PrefetchScanIntervalInBytes; 1195 HeapWord* curr = hr->bottom(); 1196 const HeapWord* end = hr->top(); 1197 while (curr < end) { 1198 Prefetch::read(curr, interval); 1199 oop obj = oop(curr); 1200 int size = obj->oop_iterate(&cl); 1201 assert(size == obj->size(), "sanity"); 1202 curr += size; 1203 } 1204 } 1205 1206 class CMRootRegionScanTask : public AbstractGangTask { 1207 private: 1208 ConcurrentMark* _cm; 1209 1210 public: 1211 CMRootRegionScanTask(ConcurrentMark* cm) : 1212 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1213 1214 void work(uint worker_id) { 1215 assert(Thread::current()->is_ConcurrentGC_thread(), 1216 "this should only be done by a conc GC thread"); 1217 1218 CMRootRegions* root_regions = _cm->root_regions(); 1219 HeapRegion* hr = root_regions->claim_next(); 1220 while (hr != NULL) { 1221 _cm->scanRootRegion(hr, worker_id); 1222 hr = root_regions->claim_next(); 1223 } 1224 } 1225 }; 1226 1227 void ConcurrentMark::scanRootRegions() { 1228 // Start of concurrent marking. 1229 ClassLoaderDataGraph::clear_claimed_marks(); 1230 1231 // scan_in_progress() will have been set to true only if there was 1232 // at least one root region to scan. So, if it's false, we 1233 // should not attempt to do any further work. 1234 if (root_regions()->scan_in_progress()) { 1235 _parallel_marking_threads = calc_parallel_marking_threads(); 1236 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1237 "Maximum number of marking threads exceeded"); 1238 uint active_workers = MAX2(1U, parallel_marking_threads()); 1239 1240 CMRootRegionScanTask task(this); 1241 if (use_parallel_marking_threads()) { 1242 _parallel_workers->set_active_workers((int) active_workers); 1243 _parallel_workers->run_task(&task); 1244 } else { 1245 task.work(0); 1246 } 1247 1248 // It's possible that has_aborted() is true here without actually 1249 // aborting the survivor scan earlier. This is OK as it's 1250 // mainly used for sanity checking. 1251 root_regions()->scan_finished(); 1252 } 1253 } 1254 1255 void ConcurrentMark::markFromRoots() { 1256 // we might be tempted to assert that: 1257 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1258 // "inconsistent argument?"); 1259 // However that wouldn't be right, because it's possible that 1260 // a safepoint is indeed in progress as a younger generation 1261 // stop-the-world GC happens even as we mark in this generation. 1262 1263 _restart_for_overflow = false; 1264 force_overflow_conc()->init(); 1265 1266 // _g1h has _n_par_threads 1267 _parallel_marking_threads = calc_parallel_marking_threads(); 1268 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1269 "Maximum number of marking threads exceeded"); 1270 1271 uint active_workers = MAX2(1U, parallel_marking_threads()); 1272 1273 // Parallel task terminator is set in "set_concurrency_and_phase()" 1274 set_concurrency_and_phase(active_workers, true /* concurrent */); 1275 1276 CMConcurrentMarkingTask markingTask(this, cmThread()); 1277 if (use_parallel_marking_threads()) { 1278 _parallel_workers->set_active_workers((int)active_workers); 1279 // Don't set _n_par_threads because it affects MT in process_roots() 1280 // and the decisions on that MT processing is made elsewhere. 1281 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1282 _parallel_workers->run_task(&markingTask); 1283 } else { 1284 markingTask.work(0); 1285 } 1286 print_stats(); 1287 } 1288 1289 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1290 // world is stopped at this checkpoint 1291 assert(SafepointSynchronize::is_at_safepoint(), 1292 "world should be stopped"); 1293 1294 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1295 1296 // If a full collection has happened, we shouldn't do this. 1297 if (has_aborted()) { 1298 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1299 return; 1300 } 1301 1302 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1303 1304 if (VerifyDuringGC) { 1305 HandleMark hm; // handle scope 1306 Universe::heap()->prepare_for_verify(); 1307 Universe::verify(VerifyOption_G1UsePrevMarking, 1308 " VerifyDuringGC:(before)"); 1309 } 1310 g1h->check_bitmaps("Remark Start"); 1311 1312 G1CollectorPolicy* g1p = g1h->g1_policy(); 1313 g1p->record_concurrent_mark_remark_start(); 1314 1315 double start = os::elapsedTime(); 1316 1317 checkpointRootsFinalWork(); 1318 1319 double mark_work_end = os::elapsedTime(); 1320 1321 weakRefsWork(clear_all_soft_refs); 1322 1323 if (has_overflown()) { 1324 // Oops. We overflowed. Restart concurrent marking. 1325 _restart_for_overflow = true; 1326 if (G1TraceMarkStackOverflow) { 1327 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1328 } 1329 1330 // Verify the heap w.r.t. the previous marking bitmap. 1331 if (VerifyDuringGC) { 1332 HandleMark hm; // handle scope 1333 Universe::heap()->prepare_for_verify(); 1334 Universe::verify(VerifyOption_G1UsePrevMarking, 1335 " VerifyDuringGC:(overflow)"); 1336 } 1337 1338 // Clear the marking state because we will be restarting 1339 // marking due to overflowing the global mark stack. 1340 reset_marking_state(); 1341 } else { 1342 // Aggregate the per-task counting data that we have accumulated 1343 // while marking. 1344 aggregate_count_data(); 1345 1346 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1347 // We're done with marking. 1348 // This is the end of the marking cycle, we're expected all 1349 // threads to have SATB queues with active set to true. 1350 satb_mq_set.set_active_all_threads(false, /* new active value */ 1351 true /* expected_active */); 1352 1353 if (VerifyDuringGC) { 1354 HandleMark hm; // handle scope 1355 Universe::heap()->prepare_for_verify(); 1356 Universe::verify(VerifyOption_G1UseNextMarking, 1357 " VerifyDuringGC:(after)"); 1358 } 1359 g1h->check_bitmaps("Remark End"); 1360 assert(!restart_for_overflow(), "sanity"); 1361 // Completely reset the marking state since marking completed 1362 set_non_marking_state(); 1363 } 1364 1365 // Expand the marking stack, if we have to and if we can. 1366 if (_markStack.should_expand()) { 1367 _markStack.expand(); 1368 } 1369 1370 // Statistics 1371 double now = os::elapsedTime(); 1372 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1373 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1374 _remark_times.add((now - start) * 1000.0); 1375 1376 g1p->record_concurrent_mark_remark_end(); 1377 1378 G1CMIsAliveClosure is_alive(g1h); 1379 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1380 } 1381 1382 // Base class of the closures that finalize and verify the 1383 // liveness counting data. 1384 class CMCountDataClosureBase: public HeapRegionClosure { 1385 protected: 1386 G1CollectedHeap* _g1h; 1387 ConcurrentMark* _cm; 1388 CardTableModRefBS* _ct_bs; 1389 1390 BitMap* _region_bm; 1391 BitMap* _card_bm; 1392 1393 // Takes a region that's not empty (i.e., it has at least one 1394 // live object in it and sets its corresponding bit on the region 1395 // bitmap to 1. If the region is "starts humongous" it will also set 1396 // to 1 the bits on the region bitmap that correspond to its 1397 // associated "continues humongous" regions. 1398 void set_bit_for_region(HeapRegion* hr) { 1399 assert(!hr->continuesHumongous(), "should have filtered those out"); 1400 1401 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1402 if (!hr->startsHumongous()) { 1403 // Normal (non-humongous) case: just set the bit. 1404 _region_bm->par_at_put(index, true); 1405 } else { 1406 // Starts humongous case: calculate how many regions are part of 1407 // this humongous region and then set the bit range. 1408 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1409 _region_bm->par_at_put_range(index, end_index, true); 1410 } 1411 } 1412 1413 public: 1414 CMCountDataClosureBase(G1CollectedHeap* g1h, 1415 BitMap* region_bm, BitMap* card_bm): 1416 _g1h(g1h), _cm(g1h->concurrent_mark()), 1417 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 1418 _region_bm(region_bm), _card_bm(card_bm) { } 1419 }; 1420 1421 // Closure that calculates the # live objects per region. Used 1422 // for verification purposes during the cleanup pause. 1423 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1424 CMBitMapRO* _bm; 1425 size_t _region_marked_bytes; 1426 1427 public: 1428 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1429 BitMap* region_bm, BitMap* card_bm) : 1430 CMCountDataClosureBase(g1h, region_bm, card_bm), 1431 _bm(bm), _region_marked_bytes(0) { } 1432 1433 bool doHeapRegion(HeapRegion* hr) { 1434 1435 if (hr->continuesHumongous()) { 1436 // We will ignore these here and process them when their 1437 // associated "starts humongous" region is processed (see 1438 // set_bit_for_heap_region()). Note that we cannot rely on their 1439 // associated "starts humongous" region to have their bit set to 1440 // 1 since, due to the region chunking in the parallel region 1441 // iteration, a "continues humongous" region might be visited 1442 // before its associated "starts humongous". 1443 return false; 1444 } 1445 1446 HeapWord* ntams = hr->next_top_at_mark_start(); 1447 HeapWord* start = hr->bottom(); 1448 1449 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1450 err_msg("Preconditions not met - " 1451 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT, 1452 p2i(start), p2i(ntams), p2i(hr->end()))); 1453 1454 // Find the first marked object at or after "start". 1455 start = _bm->getNextMarkedWordAddress(start, ntams); 1456 1457 size_t marked_bytes = 0; 1458 1459 while (start < ntams) { 1460 oop obj = oop(start); 1461 int obj_sz = obj->size(); 1462 HeapWord* obj_end = start + obj_sz; 1463 1464 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1465 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1466 1467 // Note: if we're looking at the last region in heap - obj_end 1468 // could be actually just beyond the end of the heap; end_idx 1469 // will then correspond to a (non-existent) card that is also 1470 // just beyond the heap. 1471 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1472 // end of object is not card aligned - increment to cover 1473 // all the cards spanned by the object 1474 end_idx += 1; 1475 } 1476 1477 // Set the bits in the card BM for the cards spanned by this object. 1478 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1479 1480 // Add the size of this object to the number of marked bytes. 1481 marked_bytes += (size_t)obj_sz * HeapWordSize; 1482 1483 // Find the next marked object after this one. 1484 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1485 } 1486 1487 // Mark the allocated-since-marking portion... 1488 HeapWord* top = hr->top(); 1489 if (ntams < top) { 1490 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1491 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1492 1493 // Note: if we're looking at the last region in heap - top 1494 // could be actually just beyond the end of the heap; end_idx 1495 // will then correspond to a (non-existent) card that is also 1496 // just beyond the heap. 1497 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1498 // end of object is not card aligned - increment to cover 1499 // all the cards spanned by the object 1500 end_idx += 1; 1501 } 1502 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1503 1504 // This definitely means the region has live objects. 1505 set_bit_for_region(hr); 1506 } 1507 1508 // Update the live region bitmap. 1509 if (marked_bytes > 0) { 1510 set_bit_for_region(hr); 1511 } 1512 1513 // Set the marked bytes for the current region so that 1514 // it can be queried by a calling verification routine 1515 _region_marked_bytes = marked_bytes; 1516 1517 return false; 1518 } 1519 1520 size_t region_marked_bytes() const { return _region_marked_bytes; } 1521 }; 1522 1523 // Heap region closure used for verifying the counting data 1524 // that was accumulated concurrently and aggregated during 1525 // the remark pause. This closure is applied to the heap 1526 // regions during the STW cleanup pause. 1527 1528 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1529 G1CollectedHeap* _g1h; 1530 ConcurrentMark* _cm; 1531 CalcLiveObjectsClosure _calc_cl; 1532 BitMap* _region_bm; // Region BM to be verified 1533 BitMap* _card_bm; // Card BM to be verified 1534 bool _verbose; // verbose output? 1535 1536 BitMap* _exp_region_bm; // Expected Region BM values 1537 BitMap* _exp_card_bm; // Expected card BM values 1538 1539 int _failures; 1540 1541 public: 1542 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1543 BitMap* region_bm, 1544 BitMap* card_bm, 1545 BitMap* exp_region_bm, 1546 BitMap* exp_card_bm, 1547 bool verbose) : 1548 _g1h(g1h), _cm(g1h->concurrent_mark()), 1549 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1550 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1551 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1552 _failures(0) { } 1553 1554 int failures() const { return _failures; } 1555 1556 bool doHeapRegion(HeapRegion* hr) { 1557 if (hr->continuesHumongous()) { 1558 // We will ignore these here and process them when their 1559 // associated "starts humongous" region is processed (see 1560 // set_bit_for_heap_region()). Note that we cannot rely on their 1561 // associated "starts humongous" region to have their bit set to 1562 // 1 since, due to the region chunking in the parallel region 1563 // iteration, a "continues humongous" region might be visited 1564 // before its associated "starts humongous". 1565 return false; 1566 } 1567 1568 int failures = 0; 1569 1570 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1571 // this region and set the corresponding bits in the expected region 1572 // and card bitmaps. 1573 bool res = _calc_cl.doHeapRegion(hr); 1574 assert(res == false, "should be continuing"); 1575 1576 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1577 Mutex::_no_safepoint_check_flag); 1578 1579 // Verify the marked bytes for this region. 1580 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1581 size_t act_marked_bytes = hr->next_marked_bytes(); 1582 1583 // We're not OK if expected marked bytes > actual marked bytes. It means 1584 // we have missed accounting some objects during the actual marking. 1585 if (exp_marked_bytes > act_marked_bytes) { 1586 if (_verbose) { 1587 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1588 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1589 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1590 } 1591 failures += 1; 1592 } 1593 1594 // Verify the bit, for this region, in the actual and expected 1595 // (which was just calculated) region bit maps. 1596 // We're not OK if the bit in the calculated expected region 1597 // bitmap is set and the bit in the actual region bitmap is not. 1598 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1599 1600 bool expected = _exp_region_bm->at(index); 1601 bool actual = _region_bm->at(index); 1602 if (expected && !actual) { 1603 if (_verbose) { 1604 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1605 "expected: %s, actual: %s", 1606 hr->hrs_index(), 1607 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1608 } 1609 failures += 1; 1610 } 1611 1612 // Verify that the card bit maps for the cards spanned by the current 1613 // region match. We have an error if we have a set bit in the expected 1614 // bit map and the corresponding bit in the actual bitmap is not set. 1615 1616 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1617 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1618 1619 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1620 expected = _exp_card_bm->at(i); 1621 actual = _card_bm->at(i); 1622 1623 if (expected && !actual) { 1624 if (_verbose) { 1625 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1626 "expected: %s, actual: %s", 1627 hr->hrs_index(), i, 1628 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1629 } 1630 failures += 1; 1631 } 1632 } 1633 1634 if (failures > 0 && _verbose) { 1635 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1636 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1637 HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()), 1638 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1639 } 1640 1641 _failures += failures; 1642 1643 // We could stop iteration over the heap when we 1644 // find the first violating region by returning true. 1645 return false; 1646 } 1647 }; 1648 1649 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1650 protected: 1651 G1CollectedHeap* _g1h; 1652 ConcurrentMark* _cm; 1653 BitMap* _actual_region_bm; 1654 BitMap* _actual_card_bm; 1655 1656 uint _n_workers; 1657 1658 BitMap* _expected_region_bm; 1659 BitMap* _expected_card_bm; 1660 1661 int _failures; 1662 bool _verbose; 1663 1664 public: 1665 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1666 BitMap* region_bm, BitMap* card_bm, 1667 BitMap* expected_region_bm, BitMap* expected_card_bm) 1668 : AbstractGangTask("G1 verify final counting"), 1669 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1670 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1671 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1672 _failures(0), _verbose(false), 1673 _n_workers(0) { 1674 assert(VerifyDuringGC, "don't call this otherwise"); 1675 1676 // Use the value already set as the number of active threads 1677 // in the call to run_task(). 1678 if (G1CollectedHeap::use_parallel_gc_threads()) { 1679 assert( _g1h->workers()->active_workers() > 0, 1680 "Should have been previously set"); 1681 _n_workers = _g1h->workers()->active_workers(); 1682 } else { 1683 _n_workers = 1; 1684 } 1685 1686 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1687 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1688 1689 _verbose = _cm->verbose_medium(); 1690 } 1691 1692 void work(uint worker_id) { 1693 assert(worker_id < _n_workers, "invariant"); 1694 1695 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1696 _actual_region_bm, _actual_card_bm, 1697 _expected_region_bm, 1698 _expected_card_bm, 1699 _verbose); 1700 1701 if (G1CollectedHeap::use_parallel_gc_threads()) { 1702 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1703 worker_id, 1704 _n_workers, 1705 HeapRegion::VerifyCountClaimValue); 1706 } else { 1707 _g1h->heap_region_iterate(&verify_cl); 1708 } 1709 1710 Atomic::add(verify_cl.failures(), &_failures); 1711 } 1712 1713 int failures() const { return _failures; } 1714 }; 1715 1716 // Closure that finalizes the liveness counting data. 1717 // Used during the cleanup pause. 1718 // Sets the bits corresponding to the interval [NTAMS, top] 1719 // (which contains the implicitly live objects) in the 1720 // card liveness bitmap. Also sets the bit for each region, 1721 // containing live data, in the region liveness bitmap. 1722 1723 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1724 public: 1725 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1726 BitMap* region_bm, 1727 BitMap* card_bm) : 1728 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1729 1730 bool doHeapRegion(HeapRegion* hr) { 1731 1732 if (hr->continuesHumongous()) { 1733 // We will ignore these here and process them when their 1734 // associated "starts humongous" region is processed (see 1735 // set_bit_for_heap_region()). Note that we cannot rely on their 1736 // associated "starts humongous" region to have their bit set to 1737 // 1 since, due to the region chunking in the parallel region 1738 // iteration, a "continues humongous" region might be visited 1739 // before its associated "starts humongous". 1740 return false; 1741 } 1742 1743 HeapWord* ntams = hr->next_top_at_mark_start(); 1744 HeapWord* top = hr->top(); 1745 1746 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1747 1748 // Mark the allocated-since-marking portion... 1749 if (ntams < top) { 1750 // This definitely means the region has live objects. 1751 set_bit_for_region(hr); 1752 1753 // Now set the bits in the card bitmap for [ntams, top) 1754 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1755 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1756 1757 // Note: if we're looking at the last region in heap - top 1758 // could be actually just beyond the end of the heap; end_idx 1759 // will then correspond to a (non-existent) card that is also 1760 // just beyond the heap. 1761 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1762 // end of object is not card aligned - increment to cover 1763 // all the cards spanned by the object 1764 end_idx += 1; 1765 } 1766 1767 assert(end_idx <= _card_bm->size(), 1768 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1769 end_idx, _card_bm->size())); 1770 assert(start_idx < _card_bm->size(), 1771 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1772 start_idx, _card_bm->size())); 1773 1774 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1775 } 1776 1777 // Set the bit for the region if it contains live data 1778 if (hr->next_marked_bytes() > 0) { 1779 set_bit_for_region(hr); 1780 } 1781 1782 return false; 1783 } 1784 }; 1785 1786 class G1ParFinalCountTask: public AbstractGangTask { 1787 protected: 1788 G1CollectedHeap* _g1h; 1789 ConcurrentMark* _cm; 1790 BitMap* _actual_region_bm; 1791 BitMap* _actual_card_bm; 1792 1793 uint _n_workers; 1794 1795 public: 1796 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1797 : AbstractGangTask("G1 final counting"), 1798 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1799 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1800 _n_workers(0) { 1801 // Use the value already set as the number of active threads 1802 // in the call to run_task(). 1803 if (G1CollectedHeap::use_parallel_gc_threads()) { 1804 assert( _g1h->workers()->active_workers() > 0, 1805 "Should have been previously set"); 1806 _n_workers = _g1h->workers()->active_workers(); 1807 } else { 1808 _n_workers = 1; 1809 } 1810 } 1811 1812 void work(uint worker_id) { 1813 assert(worker_id < _n_workers, "invariant"); 1814 1815 FinalCountDataUpdateClosure final_update_cl(_g1h, 1816 _actual_region_bm, 1817 _actual_card_bm); 1818 1819 if (G1CollectedHeap::use_parallel_gc_threads()) { 1820 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1821 worker_id, 1822 _n_workers, 1823 HeapRegion::FinalCountClaimValue); 1824 } else { 1825 _g1h->heap_region_iterate(&final_update_cl); 1826 } 1827 } 1828 }; 1829 1830 class G1ParNoteEndTask; 1831 1832 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1833 G1CollectedHeap* _g1; 1834 size_t _max_live_bytes; 1835 uint _regions_claimed; 1836 size_t _freed_bytes; 1837 FreeRegionList* _local_cleanup_list; 1838 HeapRegionSetCount _old_regions_removed; 1839 HeapRegionSetCount _humongous_regions_removed; 1840 HRRSCleanupTask* _hrrs_cleanup_task; 1841 double _claimed_region_time; 1842 double _max_region_time; 1843 1844 public: 1845 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1846 FreeRegionList* local_cleanup_list, 1847 HRRSCleanupTask* hrrs_cleanup_task) : 1848 _g1(g1), 1849 _max_live_bytes(0), _regions_claimed(0), 1850 _freed_bytes(0), 1851 _claimed_region_time(0.0), _max_region_time(0.0), 1852 _local_cleanup_list(local_cleanup_list), 1853 _old_regions_removed(), 1854 _humongous_regions_removed(), 1855 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1856 1857 size_t freed_bytes() { return _freed_bytes; } 1858 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; } 1859 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } 1860 1861 bool doHeapRegion(HeapRegion *hr) { 1862 if (hr->continuesHumongous()) { 1863 return false; 1864 } 1865 // We use a claim value of zero here because all regions 1866 // were claimed with value 1 in the FinalCount task. 1867 _g1->reset_gc_time_stamps(hr); 1868 double start = os::elapsedTime(); 1869 _regions_claimed++; 1870 hr->note_end_of_marking(); 1871 _max_live_bytes += hr->max_live_bytes(); 1872 1873 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1874 _freed_bytes += hr->used(); 1875 hr->set_containing_set(NULL); 1876 if (hr->isHumongous()) { 1877 assert(hr->startsHumongous(), "we should only see starts humongous"); 1878 _humongous_regions_removed.increment(1u, hr->capacity()); 1879 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1880 } else { 1881 _old_regions_removed.increment(1u, hr->capacity()); 1882 _g1->free_region(hr, _local_cleanup_list, true); 1883 } 1884 } else { 1885 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1886 } 1887 1888 double region_time = (os::elapsedTime() - start); 1889 _claimed_region_time += region_time; 1890 if (region_time > _max_region_time) { 1891 _max_region_time = region_time; 1892 } 1893 return false; 1894 } 1895 1896 size_t max_live_bytes() { return _max_live_bytes; } 1897 uint regions_claimed() { return _regions_claimed; } 1898 double claimed_region_time_sec() { return _claimed_region_time; } 1899 double max_region_time_sec() { return _max_region_time; } 1900 }; 1901 1902 class G1ParNoteEndTask: public AbstractGangTask { 1903 friend class G1NoteEndOfConcMarkClosure; 1904 1905 protected: 1906 G1CollectedHeap* _g1h; 1907 size_t _max_live_bytes; 1908 size_t _freed_bytes; 1909 FreeRegionList* _cleanup_list; 1910 1911 public: 1912 G1ParNoteEndTask(G1CollectedHeap* g1h, 1913 FreeRegionList* cleanup_list) : 1914 AbstractGangTask("G1 note end"), _g1h(g1h), 1915 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1916 1917 void work(uint worker_id) { 1918 double start = os::elapsedTime(); 1919 FreeRegionList local_cleanup_list("Local Cleanup List"); 1920 HRRSCleanupTask hrrs_cleanup_task; 1921 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1922 &hrrs_cleanup_task); 1923 if (G1CollectedHeap::use_parallel_gc_threads()) { 1924 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1925 _g1h->workers()->active_workers(), 1926 HeapRegion::NoteEndClaimValue); 1927 } else { 1928 _g1h->heap_region_iterate(&g1_note_end); 1929 } 1930 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1931 1932 // Now update the lists 1933 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1934 { 1935 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1936 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1937 _max_live_bytes += g1_note_end.max_live_bytes(); 1938 _freed_bytes += g1_note_end.freed_bytes(); 1939 1940 // If we iterate over the global cleanup list at the end of 1941 // cleanup to do this printing we will not guarantee to only 1942 // generate output for the newly-reclaimed regions (the list 1943 // might not be empty at the beginning of cleanup; we might 1944 // still be working on its previous contents). So we do the 1945 // printing here, before we append the new regions to the global 1946 // cleanup list. 1947 1948 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1949 if (hr_printer->is_active()) { 1950 FreeRegionListIterator iter(&local_cleanup_list); 1951 while (iter.more_available()) { 1952 HeapRegion* hr = iter.get_next(); 1953 hr_printer->cleanup(hr); 1954 } 1955 } 1956 1957 _cleanup_list->add_ordered(&local_cleanup_list); 1958 assert(local_cleanup_list.is_empty(), "post-condition"); 1959 1960 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1961 } 1962 } 1963 size_t max_live_bytes() { return _max_live_bytes; } 1964 size_t freed_bytes() { return _freed_bytes; } 1965 }; 1966 1967 class G1ParScrubRemSetTask: public AbstractGangTask { 1968 protected: 1969 G1RemSet* _g1rs; 1970 BitMap* _region_bm; 1971 BitMap* _card_bm; 1972 public: 1973 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1974 BitMap* region_bm, BitMap* card_bm) : 1975 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1976 _region_bm(region_bm), _card_bm(card_bm) { } 1977 1978 void work(uint worker_id) { 1979 if (G1CollectedHeap::use_parallel_gc_threads()) { 1980 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1981 HeapRegion::ScrubRemSetClaimValue); 1982 } else { 1983 _g1rs->scrub(_region_bm, _card_bm); 1984 } 1985 } 1986 1987 }; 1988 1989 void ConcurrentMark::cleanup() { 1990 // world is stopped at this checkpoint 1991 assert(SafepointSynchronize::is_at_safepoint(), 1992 "world should be stopped"); 1993 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1994 1995 // If a full collection has happened, we shouldn't do this. 1996 if (has_aborted()) { 1997 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1998 return; 1999 } 2000 2001 g1h->verify_region_sets_optional(); 2002 2003 if (VerifyDuringGC) { 2004 HandleMark hm; // handle scope 2005 Universe::heap()->prepare_for_verify(); 2006 Universe::verify(VerifyOption_G1UsePrevMarking, 2007 " VerifyDuringGC:(before)"); 2008 } 2009 g1h->check_bitmaps("Cleanup Start"); 2010 2011 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 2012 g1p->record_concurrent_mark_cleanup_start(); 2013 2014 double start = os::elapsedTime(); 2015 2016 HeapRegionRemSet::reset_for_cleanup_tasks(); 2017 2018 uint n_workers; 2019 2020 // Do counting once more with the world stopped for good measure. 2021 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 2022 2023 if (G1CollectedHeap::use_parallel_gc_threads()) { 2024 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 2025 "sanity check"); 2026 2027 g1h->set_par_threads(); 2028 n_workers = g1h->n_par_threads(); 2029 assert(g1h->n_par_threads() == n_workers, 2030 "Should not have been reset"); 2031 g1h->workers()->run_task(&g1_par_count_task); 2032 // Done with the parallel phase so reset to 0. 2033 g1h->set_par_threads(0); 2034 2035 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 2036 "sanity check"); 2037 } else { 2038 n_workers = 1; 2039 g1_par_count_task.work(0); 2040 } 2041 2042 if (VerifyDuringGC) { 2043 // Verify that the counting data accumulated during marking matches 2044 // that calculated by walking the marking bitmap. 2045 2046 // Bitmaps to hold expected values 2047 BitMap expected_region_bm(_region_bm.size(), true); 2048 BitMap expected_card_bm(_card_bm.size(), true); 2049 2050 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 2051 &_region_bm, 2052 &_card_bm, 2053 &expected_region_bm, 2054 &expected_card_bm); 2055 2056 if (G1CollectedHeap::use_parallel_gc_threads()) { 2057 g1h->set_par_threads((int)n_workers); 2058 g1h->workers()->run_task(&g1_par_verify_task); 2059 // Done with the parallel phase so reset to 0. 2060 g1h->set_par_threads(0); 2061 2062 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 2063 "sanity check"); 2064 } else { 2065 g1_par_verify_task.work(0); 2066 } 2067 2068 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 2069 } 2070 2071 size_t start_used_bytes = g1h->used(); 2072 g1h->set_marking_complete(); 2073 2074 double count_end = os::elapsedTime(); 2075 double this_final_counting_time = (count_end - start); 2076 _total_counting_time += this_final_counting_time; 2077 2078 if (G1PrintRegionLivenessInfo) { 2079 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 2080 _g1h->heap_region_iterate(&cl); 2081 } 2082 2083 // Install newly created mark bitMap as "prev". 2084 swapMarkBitMaps(); 2085 2086 g1h->reset_gc_time_stamp(); 2087 2088 // Note end of marking in all heap regions. 2089 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 2090 if (G1CollectedHeap::use_parallel_gc_threads()) { 2091 g1h->set_par_threads((int)n_workers); 2092 g1h->workers()->run_task(&g1_par_note_end_task); 2093 g1h->set_par_threads(0); 2094 2095 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 2096 "sanity check"); 2097 } else { 2098 g1_par_note_end_task.work(0); 2099 } 2100 g1h->check_gc_time_stamps(); 2101 2102 if (!cleanup_list_is_empty()) { 2103 // The cleanup list is not empty, so we'll have to process it 2104 // concurrently. Notify anyone else that might be wanting free 2105 // regions that there will be more free regions coming soon. 2106 g1h->set_free_regions_coming(); 2107 } 2108 2109 // call below, since it affects the metric by which we sort the heap 2110 // regions. 2111 if (G1ScrubRemSets) { 2112 double rs_scrub_start = os::elapsedTime(); 2113 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 2114 if (G1CollectedHeap::use_parallel_gc_threads()) { 2115 g1h->set_par_threads((int)n_workers); 2116 g1h->workers()->run_task(&g1_par_scrub_rs_task); 2117 g1h->set_par_threads(0); 2118 2119 assert(g1h->check_heap_region_claim_values( 2120 HeapRegion::ScrubRemSetClaimValue), 2121 "sanity check"); 2122 } else { 2123 g1_par_scrub_rs_task.work(0); 2124 } 2125 2126 double rs_scrub_end = os::elapsedTime(); 2127 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 2128 _total_rs_scrub_time += this_rs_scrub_time; 2129 } 2130 2131 // this will also free any regions totally full of garbage objects, 2132 // and sort the regions. 2133 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 2134 2135 // Statistics. 2136 double end = os::elapsedTime(); 2137 _cleanup_times.add((end - start) * 1000.0); 2138 2139 if (G1Log::fine()) { 2140 g1h->print_size_transition(gclog_or_tty, 2141 start_used_bytes, 2142 g1h->used(), 2143 g1h->capacity()); 2144 } 2145 2146 // Clean up will have freed any regions completely full of garbage. 2147 // Update the soft reference policy with the new heap occupancy. 2148 Universe::update_heap_info_at_gc(); 2149 2150 if (VerifyDuringGC) { 2151 HandleMark hm; // handle scope 2152 Universe::heap()->prepare_for_verify(); 2153 Universe::verify(VerifyOption_G1UsePrevMarking, 2154 " VerifyDuringGC:(after)"); 2155 } 2156 2157 g1h->check_bitmaps("Cleanup End"); 2158 2159 g1h->verify_region_sets_optional(); 2160 2161 // We need to make this be a "collection" so any collection pause that 2162 // races with it goes around and waits for completeCleanup to finish. 2163 g1h->increment_total_collections(); 2164 2165 // Clean out dead classes and update Metaspace sizes. 2166 if (G1ClassUnloadingEnabled) { 2167 ClassLoaderDataGraph::purge(); 2168 } 2169 MetaspaceGC::compute_new_size(); 2170 2171 // We reclaimed old regions so we should calculate the sizes to make 2172 // sure we update the old gen/space data. 2173 g1h->g1mm()->update_sizes(); 2174 2175 g1h->trace_heap_after_concurrent_cycle(); 2176 } 2177 2178 void ConcurrentMark::completeCleanup() { 2179 if (has_aborted()) return; 2180 2181 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2182 2183 _cleanup_list.verify_optional(); 2184 FreeRegionList tmp_free_list("Tmp Free List"); 2185 2186 if (G1ConcRegionFreeingVerbose) { 2187 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2188 "cleanup list has %u entries", 2189 _cleanup_list.length()); 2190 } 2191 2192 // Noone else should be accessing the _cleanup_list at this point, 2193 // so it's not necessary to take any locks 2194 while (!_cleanup_list.is_empty()) { 2195 HeapRegion* hr = _cleanup_list.remove_head(); 2196 assert(hr != NULL, "Got NULL from a non-empty list"); 2197 hr->par_clear(); 2198 tmp_free_list.add_ordered(hr); 2199 2200 // Instead of adding one region at a time to the secondary_free_list, 2201 // we accumulate them in the local list and move them a few at a 2202 // time. This also cuts down on the number of notify_all() calls 2203 // we do during this process. We'll also append the local list when 2204 // _cleanup_list is empty (which means we just removed the last 2205 // region from the _cleanup_list). 2206 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2207 _cleanup_list.is_empty()) { 2208 if (G1ConcRegionFreeingVerbose) { 2209 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2210 "appending %u entries to the secondary_free_list, " 2211 "cleanup list still has %u entries", 2212 tmp_free_list.length(), 2213 _cleanup_list.length()); 2214 } 2215 2216 { 2217 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2218 g1h->secondary_free_list_add(&tmp_free_list); 2219 SecondaryFreeList_lock->notify_all(); 2220 } 2221 2222 if (G1StressConcRegionFreeing) { 2223 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2224 os::sleep(Thread::current(), (jlong) 1, false); 2225 } 2226 } 2227 } 2228 } 2229 assert(tmp_free_list.is_empty(), "post-condition"); 2230 } 2231 2232 // Supporting Object and Oop closures for reference discovery 2233 // and processing in during marking 2234 2235 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2236 HeapWord* addr = (HeapWord*)obj; 2237 return addr != NULL && 2238 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2239 } 2240 2241 // 'Keep Alive' oop closure used by both serial parallel reference processing. 2242 // Uses the CMTask associated with a worker thread (for serial reference 2243 // processing the CMTask for worker 0 is used) to preserve (mark) and 2244 // trace referent objects. 2245 // 2246 // Using the CMTask and embedded local queues avoids having the worker 2247 // threads operating on the global mark stack. This reduces the risk 2248 // of overflowing the stack - which we would rather avoid at this late 2249 // state. Also using the tasks' local queues removes the potential 2250 // of the workers interfering with each other that could occur if 2251 // operating on the global stack. 2252 2253 class G1CMKeepAliveAndDrainClosure: public OopClosure { 2254 ConcurrentMark* _cm; 2255 CMTask* _task; 2256 int _ref_counter_limit; 2257 int _ref_counter; 2258 bool _is_serial; 2259 public: 2260 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2261 _cm(cm), _task(task), _is_serial(is_serial), 2262 _ref_counter_limit(G1RefProcDrainInterval) { 2263 assert(_ref_counter_limit > 0, "sanity"); 2264 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2265 _ref_counter = _ref_counter_limit; 2266 } 2267 2268 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2269 virtual void do_oop( oop* p) { do_oop_work(p); } 2270 2271 template <class T> void do_oop_work(T* p) { 2272 if (!_cm->has_overflown()) { 2273 oop obj = oopDesc::load_decode_heap_oop(p); 2274 if (_cm->verbose_high()) { 2275 gclog_or_tty->print_cr("\t[%u] we're looking at location " 2276 "*"PTR_FORMAT" = "PTR_FORMAT, 2277 _task->worker_id(), p2i(p), p2i((void*) obj)); 2278 } 2279 2280 _task->deal_with_reference(obj); 2281 _ref_counter--; 2282 2283 if (_ref_counter == 0) { 2284 // We have dealt with _ref_counter_limit references, pushing them 2285 // and objects reachable from them on to the local stack (and 2286 // possibly the global stack). Call CMTask::do_marking_step() to 2287 // process these entries. 2288 // 2289 // We call CMTask::do_marking_step() in a loop, which we'll exit if 2290 // there's nothing more to do (i.e. we're done with the entries that 2291 // were pushed as a result of the CMTask::deal_with_reference() calls 2292 // above) or we overflow. 2293 // 2294 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2295 // flag while there may still be some work to do. (See the comment at 2296 // the beginning of CMTask::do_marking_step() for those conditions - 2297 // one of which is reaching the specified time target.) It is only 2298 // when CMTask::do_marking_step() returns without setting the 2299 // has_aborted() flag that the marking step has completed. 2300 do { 2301 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2302 _task->do_marking_step(mark_step_duration_ms, 2303 false /* do_termination */, 2304 _is_serial); 2305 } while (_task->has_aborted() && !_cm->has_overflown()); 2306 _ref_counter = _ref_counter_limit; 2307 } 2308 } else { 2309 if (_cm->verbose_high()) { 2310 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id()); 2311 } 2312 } 2313 } 2314 }; 2315 2316 // 'Drain' oop closure used by both serial and parallel reference processing. 2317 // Uses the CMTask associated with a given worker thread (for serial 2318 // reference processing the CMtask for worker 0 is used). Calls the 2319 // do_marking_step routine, with an unbelievably large timeout value, 2320 // to drain the marking data structures of the remaining entries 2321 // added by the 'keep alive' oop closure above. 2322 2323 class G1CMDrainMarkingStackClosure: public VoidClosure { 2324 ConcurrentMark* _cm; 2325 CMTask* _task; 2326 bool _is_serial; 2327 public: 2328 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2329 _cm(cm), _task(task), _is_serial(is_serial) { 2330 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2331 } 2332 2333 void do_void() { 2334 do { 2335 if (_cm->verbose_high()) { 2336 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s", 2337 _task->worker_id(), BOOL_TO_STR(_is_serial)); 2338 } 2339 2340 // We call CMTask::do_marking_step() to completely drain the local 2341 // and global marking stacks of entries pushed by the 'keep alive' 2342 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2343 // 2344 // CMTask::do_marking_step() is called in a loop, which we'll exit 2345 // if there's nothing more to do (i.e. we've completely drained the 2346 // entries that were pushed as a a result of applying the 'keep alive' 2347 // closure to the entries on the discovered ref lists) or we overflow 2348 // the global marking stack. 2349 // 2350 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2351 // flag while there may still be some work to do. (See the comment at 2352 // the beginning of CMTask::do_marking_step() for those conditions - 2353 // one of which is reaching the specified time target.) It is only 2354 // when CMTask::do_marking_step() returns without setting the 2355 // has_aborted() flag that the marking step has completed. 2356 2357 _task->do_marking_step(1000000000.0 /* something very large */, 2358 true /* do_termination */, 2359 _is_serial); 2360 } while (_task->has_aborted() && !_cm->has_overflown()); 2361 } 2362 }; 2363 2364 // Implementation of AbstractRefProcTaskExecutor for parallel 2365 // reference processing at the end of G1 concurrent marking 2366 2367 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2368 private: 2369 G1CollectedHeap* _g1h; 2370 ConcurrentMark* _cm; 2371 WorkGang* _workers; 2372 int _active_workers; 2373 2374 public: 2375 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2376 ConcurrentMark* cm, 2377 WorkGang* workers, 2378 int n_workers) : 2379 _g1h(g1h), _cm(cm), 2380 _workers(workers), _active_workers(n_workers) { } 2381 2382 // Executes the given task using concurrent marking worker threads. 2383 virtual void execute(ProcessTask& task); 2384 virtual void execute(EnqueueTask& task); 2385 }; 2386 2387 class G1CMRefProcTaskProxy: public AbstractGangTask { 2388 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2389 ProcessTask& _proc_task; 2390 G1CollectedHeap* _g1h; 2391 ConcurrentMark* _cm; 2392 2393 public: 2394 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2395 G1CollectedHeap* g1h, 2396 ConcurrentMark* cm) : 2397 AbstractGangTask("Process reference objects in parallel"), 2398 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2399 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2400 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2401 } 2402 2403 virtual void work(uint worker_id) { 2404 CMTask* task = _cm->task(worker_id); 2405 G1CMIsAliveClosure g1_is_alive(_g1h); 2406 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2407 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2408 2409 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2410 } 2411 }; 2412 2413 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2414 assert(_workers != NULL, "Need parallel worker threads."); 2415 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2416 2417 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2418 2419 // We need to reset the concurrency level before each 2420 // proxy task execution, so that the termination protocol 2421 // and overflow handling in CMTask::do_marking_step() knows 2422 // how many workers to wait for. 2423 _cm->set_concurrency(_active_workers); 2424 _g1h->set_par_threads(_active_workers); 2425 _workers->run_task(&proc_task_proxy); 2426 _g1h->set_par_threads(0); 2427 } 2428 2429 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2430 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2431 EnqueueTask& _enq_task; 2432 2433 public: 2434 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2435 AbstractGangTask("Enqueue reference objects in parallel"), 2436 _enq_task(enq_task) { } 2437 2438 virtual void work(uint worker_id) { 2439 _enq_task.work(worker_id); 2440 } 2441 }; 2442 2443 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2444 assert(_workers != NULL, "Need parallel worker threads."); 2445 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2446 2447 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2448 2449 // Not strictly necessary but... 2450 // 2451 // We need to reset the concurrency level before each 2452 // proxy task execution, so that the termination protocol 2453 // and overflow handling in CMTask::do_marking_step() knows 2454 // how many workers to wait for. 2455 _cm->set_concurrency(_active_workers); 2456 _g1h->set_par_threads(_active_workers); 2457 _workers->run_task(&enq_task_proxy); 2458 _g1h->set_par_threads(0); 2459 } 2460 2461 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 2462 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 2463 } 2464 2465 // Helper class to get rid of some boilerplate code. 2466 class G1RemarkGCTraceTime : public GCTraceTime { 2467 static bool doit_and_prepend(bool doit) { 2468 if (doit) { 2469 gclog_or_tty->put(' '); 2470 } 2471 return doit; 2472 } 2473 2474 public: 2475 G1RemarkGCTraceTime(const char* title, bool doit) 2476 : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(), 2477 G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) { 2478 } 2479 }; 2480 2481 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2482 if (has_overflown()) { 2483 // Skip processing the discovered references if we have 2484 // overflown the global marking stack. Reference objects 2485 // only get discovered once so it is OK to not 2486 // de-populate the discovered reference lists. We could have, 2487 // but the only benefit would be that, when marking restarts, 2488 // less reference objects are discovered. 2489 return; 2490 } 2491 2492 ResourceMark rm; 2493 HandleMark hm; 2494 2495 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2496 2497 // Is alive closure. 2498 G1CMIsAliveClosure g1_is_alive(g1h); 2499 2500 // Inner scope to exclude the cleaning of the string and symbol 2501 // tables from the displayed time. 2502 { 2503 if (G1Log::finer()) { 2504 gclog_or_tty->put(' '); 2505 } 2506 GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm(), concurrent_gc_id()); 2507 2508 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2509 2510 // See the comment in G1CollectedHeap::ref_processing_init() 2511 // about how reference processing currently works in G1. 2512 2513 // Set the soft reference policy 2514 rp->setup_policy(clear_all_soft_refs); 2515 assert(_markStack.isEmpty(), "mark stack should be empty"); 2516 2517 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2518 // in serial reference processing. Note these closures are also 2519 // used for serially processing (by the the current thread) the 2520 // JNI references during parallel reference processing. 2521 // 2522 // These closures do not need to synchronize with the worker 2523 // threads involved in parallel reference processing as these 2524 // instances are executed serially by the current thread (e.g. 2525 // reference processing is not multi-threaded and is thus 2526 // performed by the current thread instead of a gang worker). 2527 // 2528 // The gang tasks involved in parallel reference processing create 2529 // their own instances of these closures, which do their own 2530 // synchronization among themselves. 2531 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2532 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2533 2534 // We need at least one active thread. If reference processing 2535 // is not multi-threaded we use the current (VMThread) thread, 2536 // otherwise we use the work gang from the G1CollectedHeap and 2537 // we utilize all the worker threads we can. 2538 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL; 2539 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2540 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2541 2542 // Parallel processing task executor. 2543 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2544 g1h->workers(), active_workers); 2545 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2546 2547 // Set the concurrency level. The phase was already set prior to 2548 // executing the remark task. 2549 set_concurrency(active_workers); 2550 2551 // Set the degree of MT processing here. If the discovery was done MT, 2552 // the number of threads involved during discovery could differ from 2553 // the number of active workers. This is OK as long as the discovered 2554 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2555 rp->set_active_mt_degree(active_workers); 2556 2557 // Process the weak references. 2558 const ReferenceProcessorStats& stats = 2559 rp->process_discovered_references(&g1_is_alive, 2560 &g1_keep_alive, 2561 &g1_drain_mark_stack, 2562 executor, 2563 g1h->gc_timer_cm(), 2564 concurrent_gc_id()); 2565 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2566 2567 // The do_oop work routines of the keep_alive and drain_marking_stack 2568 // oop closures will set the has_overflown flag if we overflow the 2569 // global marking stack. 2570 2571 assert(_markStack.overflow() || _markStack.isEmpty(), 2572 "mark stack should be empty (unless it overflowed)"); 2573 2574 if (_markStack.overflow()) { 2575 // This should have been done already when we tried to push an 2576 // entry on to the global mark stack. But let's do it again. 2577 set_has_overflown(); 2578 } 2579 2580 assert(rp->num_q() == active_workers, "why not"); 2581 2582 rp->enqueue_discovered_references(executor); 2583 2584 rp->verify_no_references_recorded(); 2585 assert(!rp->discovery_enabled(), "Post condition"); 2586 } 2587 2588 if (has_overflown()) { 2589 // We can not trust g1_is_alive if the marking stack overflowed 2590 return; 2591 } 2592 2593 assert(_markStack.isEmpty(), "Marking should have completed"); 2594 2595 // Unload Klasses, String, Symbols, Code Cache, etc. 2596 { 2597 G1RemarkGCTraceTime trace("Unloading", G1Log::finer()); 2598 2599 if (G1ClassUnloadingEnabled) { 2600 bool purged_classes; 2601 2602 { 2603 G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest()); 2604 purged_classes = SystemDictionary::do_unloading(&g1_is_alive); 2605 } 2606 2607 { 2608 G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest()); 2609 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2610 } 2611 } 2612 2613 if (G1StringDedup::is_enabled()) { 2614 G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest()); 2615 G1StringDedup::unlink(&g1_is_alive); 2616 } 2617 } 2618 } 2619 2620 void ConcurrentMark::swapMarkBitMaps() { 2621 CMBitMapRO* temp = _prevMarkBitMap; 2622 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2623 _nextMarkBitMap = (CMBitMap*) temp; 2624 } 2625 2626 class CMObjectClosure; 2627 2628 // Closure for iterating over objects, currently only used for 2629 // processing SATB buffers. 2630 class CMObjectClosure : public ObjectClosure { 2631 private: 2632 CMTask* _task; 2633 2634 public: 2635 void do_object(oop obj) { 2636 _task->deal_with_reference(obj); 2637 } 2638 2639 CMObjectClosure(CMTask* task) : _task(task) { } 2640 }; 2641 2642 class G1RemarkThreadsClosure : public ThreadClosure { 2643 CMObjectClosure _cm_obj; 2644 G1CMOopClosure _cm_cl; 2645 MarkingCodeBlobClosure _code_cl; 2646 int _thread_parity; 2647 bool _is_par; 2648 2649 public: 2650 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) : 2651 _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2652 _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {} 2653 2654 void do_thread(Thread* thread) { 2655 if (thread->is_Java_thread()) { 2656 if (thread->claim_oops_do(_is_par, _thread_parity)) { 2657 JavaThread* jt = (JavaThread*)thread; 2658 2659 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2660 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2661 // * Alive if on the stack of an executing method 2662 // * Weakly reachable otherwise 2663 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2664 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2665 jt->nmethods_do(&_code_cl); 2666 2667 jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj); 2668 } 2669 } else if (thread->is_VM_thread()) { 2670 if (thread->claim_oops_do(_is_par, _thread_parity)) { 2671 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj); 2672 } 2673 } 2674 } 2675 }; 2676 2677 class CMRemarkTask: public AbstractGangTask { 2678 private: 2679 ConcurrentMark* _cm; 2680 bool _is_serial; 2681 public: 2682 void work(uint worker_id) { 2683 // Since all available tasks are actually started, we should 2684 // only proceed if we're supposed to be active. 2685 if (worker_id < _cm->active_tasks()) { 2686 CMTask* task = _cm->task(worker_id); 2687 task->record_start_time(); 2688 { 2689 ResourceMark rm; 2690 HandleMark hm; 2691 2692 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task, !_is_serial); 2693 Threads::threads_do(&threads_f); 2694 } 2695 2696 do { 2697 task->do_marking_step(1000000000.0 /* something very large */, 2698 true /* do_termination */, 2699 _is_serial); 2700 } while (task->has_aborted() && !_cm->has_overflown()); 2701 // If we overflow, then we do not want to restart. We instead 2702 // want to abort remark and do concurrent marking again. 2703 task->record_end_time(); 2704 } 2705 } 2706 2707 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) : 2708 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) { 2709 _cm->terminator()->reset_for_reuse(active_workers); 2710 } 2711 }; 2712 2713 void ConcurrentMark::checkpointRootsFinalWork() { 2714 ResourceMark rm; 2715 HandleMark hm; 2716 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2717 2718 G1RemarkGCTraceTime trace("Finalize Marking", G1Log::finer()); 2719 2720 g1h->ensure_parsability(false); 2721 2722 if (G1CollectedHeap::use_parallel_gc_threads()) { 2723 G1CollectedHeap::StrongRootsScope srs(g1h); 2724 // this is remark, so we'll use up all active threads 2725 uint active_workers = g1h->workers()->active_workers(); 2726 if (active_workers == 0) { 2727 assert(active_workers > 0, "Should have been set earlier"); 2728 active_workers = (uint) ParallelGCThreads; 2729 g1h->workers()->set_active_workers(active_workers); 2730 } 2731 set_concurrency_and_phase(active_workers, false /* concurrent */); 2732 // Leave _parallel_marking_threads at it's 2733 // value originally calculated in the ConcurrentMark 2734 // constructor and pass values of the active workers 2735 // through the gang in the task. 2736 2737 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */); 2738 // We will start all available threads, even if we decide that the 2739 // active_workers will be fewer. The extra ones will just bail out 2740 // immediately. 2741 g1h->set_par_threads(active_workers); 2742 g1h->workers()->run_task(&remarkTask); 2743 g1h->set_par_threads(0); 2744 } else { 2745 G1CollectedHeap::StrongRootsScope srs(g1h); 2746 uint active_workers = 1; 2747 set_concurrency_and_phase(active_workers, false /* concurrent */); 2748 2749 // Note - if there's no work gang then the VMThread will be 2750 // the thread to execute the remark - serially. We have 2751 // to pass true for the is_serial parameter so that 2752 // CMTask::do_marking_step() doesn't enter the sync 2753 // barriers in the event of an overflow. Doing so will 2754 // cause an assert that the current thread is not a 2755 // concurrent GC thread. 2756 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/); 2757 remarkTask.work(0); 2758 } 2759 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2760 guarantee(has_overflown() || 2761 satb_mq_set.completed_buffers_num() == 0, 2762 err_msg("Invariant: has_overflown = %s, num buffers = %d", 2763 BOOL_TO_STR(has_overflown()), 2764 satb_mq_set.completed_buffers_num())); 2765 2766 print_stats(); 2767 } 2768 2769 #ifndef PRODUCT 2770 2771 class PrintReachableOopClosure: public OopClosure { 2772 private: 2773 G1CollectedHeap* _g1h; 2774 outputStream* _out; 2775 VerifyOption _vo; 2776 bool _all; 2777 2778 public: 2779 PrintReachableOopClosure(outputStream* out, 2780 VerifyOption vo, 2781 bool all) : 2782 _g1h(G1CollectedHeap::heap()), 2783 _out(out), _vo(vo), _all(all) { } 2784 2785 void do_oop(narrowOop* p) { do_oop_work(p); } 2786 void do_oop( oop* p) { do_oop_work(p); } 2787 2788 template <class T> void do_oop_work(T* p) { 2789 oop obj = oopDesc::load_decode_heap_oop(p); 2790 const char* str = NULL; 2791 const char* str2 = ""; 2792 2793 if (obj == NULL) { 2794 str = ""; 2795 } else if (!_g1h->is_in_g1_reserved(obj)) { 2796 str = " O"; 2797 } else { 2798 HeapRegion* hr = _g1h->heap_region_containing(obj); 2799 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2800 bool marked = _g1h->is_marked(obj, _vo); 2801 2802 if (over_tams) { 2803 str = " >"; 2804 if (marked) { 2805 str2 = " AND MARKED"; 2806 } 2807 } else if (marked) { 2808 str = " M"; 2809 } else { 2810 str = " NOT"; 2811 } 2812 } 2813 2814 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2815 p2i(p), p2i((void*) obj), str, str2); 2816 } 2817 }; 2818 2819 class PrintReachableObjectClosure : public ObjectClosure { 2820 private: 2821 G1CollectedHeap* _g1h; 2822 outputStream* _out; 2823 VerifyOption _vo; 2824 bool _all; 2825 HeapRegion* _hr; 2826 2827 public: 2828 PrintReachableObjectClosure(outputStream* out, 2829 VerifyOption vo, 2830 bool all, 2831 HeapRegion* hr) : 2832 _g1h(G1CollectedHeap::heap()), 2833 _out(out), _vo(vo), _all(all), _hr(hr) { } 2834 2835 void do_object(oop o) { 2836 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2837 bool marked = _g1h->is_marked(o, _vo); 2838 bool print_it = _all || over_tams || marked; 2839 2840 if (print_it) { 2841 _out->print_cr(" "PTR_FORMAT"%s", 2842 p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : ""); 2843 PrintReachableOopClosure oopCl(_out, _vo, _all); 2844 o->oop_iterate_no_header(&oopCl); 2845 } 2846 } 2847 }; 2848 2849 class PrintReachableRegionClosure : public HeapRegionClosure { 2850 private: 2851 G1CollectedHeap* _g1h; 2852 outputStream* _out; 2853 VerifyOption _vo; 2854 bool _all; 2855 2856 public: 2857 bool doHeapRegion(HeapRegion* hr) { 2858 HeapWord* b = hr->bottom(); 2859 HeapWord* e = hr->end(); 2860 HeapWord* t = hr->top(); 2861 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2862 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2863 "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p)); 2864 _out->cr(); 2865 2866 HeapWord* from = b; 2867 HeapWord* to = t; 2868 2869 if (to > from) { 2870 _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to)); 2871 _out->cr(); 2872 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2873 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2874 _out->cr(); 2875 } 2876 2877 return false; 2878 } 2879 2880 PrintReachableRegionClosure(outputStream* out, 2881 VerifyOption vo, 2882 bool all) : 2883 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2884 }; 2885 2886 void ConcurrentMark::print_reachable(const char* str, 2887 VerifyOption vo, 2888 bool all) { 2889 gclog_or_tty->cr(); 2890 gclog_or_tty->print_cr("== Doing heap dump... "); 2891 2892 if (G1PrintReachableBaseFile == NULL) { 2893 gclog_or_tty->print_cr(" #### error: no base file defined"); 2894 return; 2895 } 2896 2897 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2898 (JVM_MAXPATHLEN - 1)) { 2899 gclog_or_tty->print_cr(" #### error: file name too long"); 2900 return; 2901 } 2902 2903 char file_name[JVM_MAXPATHLEN]; 2904 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2905 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2906 2907 fileStream fout(file_name); 2908 if (!fout.is_open()) { 2909 gclog_or_tty->print_cr(" #### error: could not open file"); 2910 return; 2911 } 2912 2913 outputStream* out = &fout; 2914 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2915 out->cr(); 2916 2917 out->print_cr("--- ITERATING OVER REGIONS"); 2918 out->cr(); 2919 PrintReachableRegionClosure rcl(out, vo, all); 2920 _g1h->heap_region_iterate(&rcl); 2921 out->cr(); 2922 2923 gclog_or_tty->print_cr(" done"); 2924 gclog_or_tty->flush(); 2925 } 2926 2927 #endif // PRODUCT 2928 2929 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2930 // Note we are overriding the read-only view of the prev map here, via 2931 // the cast. 2932 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2933 } 2934 2935 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2936 _nextMarkBitMap->clearRange(mr); 2937 } 2938 2939 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2940 clearRangePrevBitmap(mr); 2941 clearRangeNextBitmap(mr); 2942 } 2943 2944 HeapRegion* 2945 ConcurrentMark::claim_region(uint worker_id) { 2946 // "checkpoint" the finger 2947 HeapWord* finger = _finger; 2948 2949 // _heap_end will not change underneath our feet; it only changes at 2950 // yield points. 2951 while (finger < _heap_end) { 2952 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2953 2954 // Note on how this code handles humongous regions. In the 2955 // normal case the finger will reach the start of a "starts 2956 // humongous" (SH) region. Its end will either be the end of the 2957 // last "continues humongous" (CH) region in the sequence, or the 2958 // standard end of the SH region (if the SH is the only region in 2959 // the sequence). That way claim_region() will skip over the CH 2960 // regions. However, there is a subtle race between a CM thread 2961 // executing this method and a mutator thread doing a humongous 2962 // object allocation. The two are not mutually exclusive as the CM 2963 // thread does not need to hold the Heap_lock when it gets 2964 // here. So there is a chance that claim_region() will come across 2965 // a free region that's in the progress of becoming a SH or a CH 2966 // region. In the former case, it will either 2967 // a) Miss the update to the region's end, in which case it will 2968 // visit every subsequent CH region, will find their bitmaps 2969 // empty, and do nothing, or 2970 // b) Will observe the update of the region's end (in which case 2971 // it will skip the subsequent CH regions). 2972 // If it comes across a region that suddenly becomes CH, the 2973 // scenario will be similar to b). So, the race between 2974 // claim_region() and a humongous object allocation might force us 2975 // to do a bit of unnecessary work (due to some unnecessary bitmap 2976 // iterations) but it should not introduce and correctness issues. 2977 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2978 HeapWord* bottom = curr_region->bottom(); 2979 HeapWord* end = curr_region->end(); 2980 HeapWord* limit = curr_region->next_top_at_mark_start(); 2981 2982 if (verbose_low()) { 2983 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" " 2984 "["PTR_FORMAT", "PTR_FORMAT"), " 2985 "limit = "PTR_FORMAT, 2986 worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit)); 2987 } 2988 2989 // Is the gap between reading the finger and doing the CAS too long? 2990 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2991 if (res == finger) { 2992 // we succeeded 2993 2994 // notice that _finger == end cannot be guaranteed here since, 2995 // someone else might have moved the finger even further 2996 assert(_finger >= end, "the finger should have moved forward"); 2997 2998 if (verbose_low()) { 2999 gclog_or_tty->print_cr("[%u] we were successful with region = " 3000 PTR_FORMAT, worker_id, p2i(curr_region)); 3001 } 3002 3003 if (limit > bottom) { 3004 if (verbose_low()) { 3005 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, " 3006 "returning it ", worker_id, p2i(curr_region)); 3007 } 3008 return curr_region; 3009 } else { 3010 assert(limit == bottom, 3011 "the region limit should be at bottom"); 3012 if (verbose_low()) { 3013 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, " 3014 "returning NULL", worker_id, p2i(curr_region)); 3015 } 3016 // we return NULL and the caller should try calling 3017 // claim_region() again. 3018 return NULL; 3019 } 3020 } else { 3021 assert(_finger > finger, "the finger should have moved forward"); 3022 if (verbose_low()) { 3023 gclog_or_tty->print_cr("[%u] somebody else moved the finger, " 3024 "global finger = "PTR_FORMAT", " 3025 "our finger = "PTR_FORMAT, 3026 worker_id, p2i(_finger), p2i(finger)); 3027 } 3028 3029 // read it again 3030 finger = _finger; 3031 } 3032 } 3033 3034 return NULL; 3035 } 3036 3037 #ifndef PRODUCT 3038 enum VerifyNoCSetOopsPhase { 3039 VerifyNoCSetOopsStack, 3040 VerifyNoCSetOopsQueues, 3041 VerifyNoCSetOopsSATBCompleted, 3042 VerifyNoCSetOopsSATBThread 3043 }; 3044 3045 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 3046 private: 3047 G1CollectedHeap* _g1h; 3048 VerifyNoCSetOopsPhase _phase; 3049 int _info; 3050 3051 const char* phase_str() { 3052 switch (_phase) { 3053 case VerifyNoCSetOopsStack: return "Stack"; 3054 case VerifyNoCSetOopsQueues: return "Queue"; 3055 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 3056 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 3057 default: ShouldNotReachHere(); 3058 } 3059 return NULL; 3060 } 3061 3062 void do_object_work(oop obj) { 3063 guarantee(!_g1h->obj_in_cs(obj), 3064 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 3065 p2i((void*) obj), phase_str(), _info)); 3066 } 3067 3068 public: 3069 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 3070 3071 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 3072 _phase = phase; 3073 _info = info; 3074 } 3075 3076 virtual void do_oop(oop* p) { 3077 oop obj = oopDesc::load_decode_heap_oop(p); 3078 do_object_work(obj); 3079 } 3080 3081 virtual void do_oop(narrowOop* p) { 3082 // We should not come across narrow oops while scanning marking 3083 // stacks and SATB buffers. 3084 ShouldNotReachHere(); 3085 } 3086 3087 virtual void do_object(oop obj) { 3088 do_object_work(obj); 3089 } 3090 }; 3091 3092 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 3093 bool verify_enqueued_buffers, 3094 bool verify_thread_buffers, 3095 bool verify_fingers) { 3096 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 3097 if (!G1CollectedHeap::heap()->mark_in_progress()) { 3098 return; 3099 } 3100 3101 VerifyNoCSetOopsClosure cl; 3102 3103 if (verify_stacks) { 3104 // Verify entries on the global mark stack 3105 cl.set_phase(VerifyNoCSetOopsStack); 3106 _markStack.oops_do(&cl); 3107 3108 // Verify entries on the task queues 3109 for (uint i = 0; i < _max_worker_id; i += 1) { 3110 cl.set_phase(VerifyNoCSetOopsQueues, i); 3111 CMTaskQueue* queue = _task_queues->queue(i); 3112 queue->oops_do(&cl); 3113 } 3114 } 3115 3116 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 3117 3118 // Verify entries on the enqueued SATB buffers 3119 if (verify_enqueued_buffers) { 3120 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 3121 satb_qs.iterate_completed_buffers_read_only(&cl); 3122 } 3123 3124 // Verify entries on the per-thread SATB buffers 3125 if (verify_thread_buffers) { 3126 cl.set_phase(VerifyNoCSetOopsSATBThread); 3127 satb_qs.iterate_thread_buffers_read_only(&cl); 3128 } 3129 3130 if (verify_fingers) { 3131 // Verify the global finger 3132 HeapWord* global_finger = finger(); 3133 if (global_finger != NULL && global_finger < _heap_end) { 3134 // The global finger always points to a heap region boundary. We 3135 // use heap_region_containing_raw() to get the containing region 3136 // given that the global finger could be pointing to a free region 3137 // which subsequently becomes continues humongous. If that 3138 // happens, heap_region_containing() will return the bottom of the 3139 // corresponding starts humongous region and the check below will 3140 // not hold any more. 3141 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 3142 guarantee(global_finger == global_hr->bottom(), 3143 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 3144 p2i(global_finger), HR_FORMAT_PARAMS(global_hr))); 3145 } 3146 3147 // Verify the task fingers 3148 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 3149 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 3150 CMTask* task = _tasks[i]; 3151 HeapWord* task_finger = task->finger(); 3152 if (task_finger != NULL && task_finger < _heap_end) { 3153 // See above note on the global finger verification. 3154 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 3155 guarantee(task_finger == task_hr->bottom() || 3156 !task_hr->in_collection_set(), 3157 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 3158 p2i(task_finger), HR_FORMAT_PARAMS(task_hr))); 3159 } 3160 } 3161 } 3162 } 3163 #endif // PRODUCT 3164 3165 // Aggregate the counting data that was constructed concurrently 3166 // with marking. 3167 class AggregateCountDataHRClosure: public HeapRegionClosure { 3168 G1CollectedHeap* _g1h; 3169 ConcurrentMark* _cm; 3170 CardTableModRefBS* _ct_bs; 3171 BitMap* _cm_card_bm; 3172 uint _max_worker_id; 3173 3174 public: 3175 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 3176 BitMap* cm_card_bm, 3177 uint max_worker_id) : 3178 _g1h(g1h), _cm(g1h->concurrent_mark()), 3179 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 3180 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 3181 3182 bool doHeapRegion(HeapRegion* hr) { 3183 if (hr->continuesHumongous()) { 3184 // We will ignore these here and process them when their 3185 // associated "starts humongous" region is processed. 3186 // Note that we cannot rely on their associated 3187 // "starts humongous" region to have their bit set to 1 3188 // since, due to the region chunking in the parallel region 3189 // iteration, a "continues humongous" region might be visited 3190 // before its associated "starts humongous". 3191 return false; 3192 } 3193 3194 HeapWord* start = hr->bottom(); 3195 HeapWord* limit = hr->next_top_at_mark_start(); 3196 HeapWord* end = hr->end(); 3197 3198 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 3199 err_msg("Preconditions not met - " 3200 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 3201 "top: "PTR_FORMAT", end: "PTR_FORMAT, 3202 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()))); 3203 3204 assert(hr->next_marked_bytes() == 0, "Precondition"); 3205 3206 if (start == limit) { 3207 // NTAMS of this region has not been set so nothing to do. 3208 return false; 3209 } 3210 3211 // 'start' should be in the heap. 3212 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 3213 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 3214 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 3215 3216 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 3217 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 3218 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 3219 3220 // If ntams is not card aligned then we bump card bitmap index 3221 // for limit so that we get the all the cards spanned by 3222 // the object ending at ntams. 3223 // Note: if this is the last region in the heap then ntams 3224 // could be actually just beyond the end of the the heap; 3225 // limit_idx will then correspond to a (non-existent) card 3226 // that is also outside the heap. 3227 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 3228 limit_idx += 1; 3229 } 3230 3231 assert(limit_idx <= end_idx, "or else use atomics"); 3232 3233 // Aggregate the "stripe" in the count data associated with hr. 3234 uint hrs_index = hr->hrs_index(); 3235 size_t marked_bytes = 0; 3236 3237 for (uint i = 0; i < _max_worker_id; i += 1) { 3238 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 3239 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 3240 3241 // Fetch the marked_bytes in this region for task i and 3242 // add it to the running total for this region. 3243 marked_bytes += marked_bytes_array[hrs_index]; 3244 3245 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 3246 // into the global card bitmap. 3247 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 3248 3249 while (scan_idx < limit_idx) { 3250 assert(task_card_bm->at(scan_idx) == true, "should be"); 3251 _cm_card_bm->set_bit(scan_idx); 3252 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 3253 3254 // BitMap::get_next_one_offset() can handle the case when 3255 // its left_offset parameter is greater than its right_offset 3256 // parameter. It does, however, have an early exit if 3257 // left_offset == right_offset. So let's limit the value 3258 // passed in for left offset here. 3259 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 3260 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 3261 } 3262 } 3263 3264 // Update the marked bytes for this region. 3265 hr->add_to_marked_bytes(marked_bytes); 3266 3267 // Next heap region 3268 return false; 3269 } 3270 }; 3271 3272 class G1AggregateCountDataTask: public AbstractGangTask { 3273 protected: 3274 G1CollectedHeap* _g1h; 3275 ConcurrentMark* _cm; 3276 BitMap* _cm_card_bm; 3277 uint _max_worker_id; 3278 int _active_workers; 3279 3280 public: 3281 G1AggregateCountDataTask(G1CollectedHeap* g1h, 3282 ConcurrentMark* cm, 3283 BitMap* cm_card_bm, 3284 uint max_worker_id, 3285 int n_workers) : 3286 AbstractGangTask("Count Aggregation"), 3287 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 3288 _max_worker_id(max_worker_id), 3289 _active_workers(n_workers) { } 3290 3291 void work(uint worker_id) { 3292 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 3293 3294 if (G1CollectedHeap::use_parallel_gc_threads()) { 3295 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 3296 _active_workers, 3297 HeapRegion::AggregateCountClaimValue); 3298 } else { 3299 _g1h->heap_region_iterate(&cl); 3300 } 3301 } 3302 }; 3303 3304 3305 void ConcurrentMark::aggregate_count_data() { 3306 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3307 _g1h->workers()->active_workers() : 3308 1); 3309 3310 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3311 _max_worker_id, n_workers); 3312 3313 if (G1CollectedHeap::use_parallel_gc_threads()) { 3314 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3315 "sanity check"); 3316 _g1h->set_par_threads(n_workers); 3317 _g1h->workers()->run_task(&g1_par_agg_task); 3318 _g1h->set_par_threads(0); 3319 3320 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3321 "sanity check"); 3322 _g1h->reset_heap_region_claim_values(); 3323 } else { 3324 g1_par_agg_task.work(0); 3325 } 3326 } 3327 3328 // Clear the per-worker arrays used to store the per-region counting data 3329 void ConcurrentMark::clear_all_count_data() { 3330 // Clear the global card bitmap - it will be filled during 3331 // liveness count aggregation (during remark) and the 3332 // final counting task. 3333 _card_bm.clear(); 3334 3335 // Clear the global region bitmap - it will be filled as part 3336 // of the final counting task. 3337 _region_bm.clear(); 3338 3339 uint max_regions = _g1h->max_regions(); 3340 assert(_max_worker_id > 0, "uninitialized"); 3341 3342 for (uint i = 0; i < _max_worker_id; i += 1) { 3343 BitMap* task_card_bm = count_card_bitmap_for(i); 3344 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3345 3346 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3347 assert(marked_bytes_array != NULL, "uninitialized"); 3348 3349 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3350 task_card_bm->clear(); 3351 } 3352 } 3353 3354 void ConcurrentMark::print_stats() { 3355 if (verbose_stats()) { 3356 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3357 for (size_t i = 0; i < _active_tasks; ++i) { 3358 _tasks[i]->print_stats(); 3359 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3360 } 3361 } 3362 } 3363 3364 // abandon current marking iteration due to a Full GC 3365 void ConcurrentMark::abort() { 3366 // Clear all marks to force marking thread to do nothing 3367 _nextMarkBitMap->clearAll(); 3368 3369 // Note we cannot clear the previous marking bitmap here 3370 // since VerifyDuringGC verifies the objects marked during 3371 // a full GC against the previous bitmap. 3372 3373 // Clear the liveness counting data 3374 clear_all_count_data(); 3375 // Empty mark stack 3376 reset_marking_state(); 3377 for (uint i = 0; i < _max_worker_id; ++i) { 3378 _tasks[i]->clear_region_fields(); 3379 } 3380 _first_overflow_barrier_sync.abort(); 3381 _second_overflow_barrier_sync.abort(); 3382 const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id(); 3383 if (!gc_id.is_undefined()) { 3384 // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance 3385 // to detect that it was aborted. Only keep track of the first GC id that we aborted. 3386 _aborted_gc_id = gc_id; 3387 } 3388 _has_aborted = true; 3389 3390 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3391 satb_mq_set.abandon_partial_marking(); 3392 // This can be called either during or outside marking, we'll read 3393 // the expected_active value from the SATB queue set. 3394 satb_mq_set.set_active_all_threads( 3395 false, /* new active value */ 3396 satb_mq_set.is_active() /* expected_active */); 3397 3398 _g1h->trace_heap_after_concurrent_cycle(); 3399 _g1h->register_concurrent_cycle_end(); 3400 } 3401 3402 const GCId& ConcurrentMark::concurrent_gc_id() { 3403 if (has_aborted()) { 3404 return _aborted_gc_id; 3405 } 3406 return _g1h->gc_tracer_cm()->gc_id(); 3407 } 3408 3409 static void print_ms_time_info(const char* prefix, const char* name, 3410 NumberSeq& ns) { 3411 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3412 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3413 if (ns.num() > 0) { 3414 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3415 prefix, ns.sd(), ns.maximum()); 3416 } 3417 } 3418 3419 void ConcurrentMark::print_summary_info() { 3420 gclog_or_tty->print_cr(" Concurrent marking:"); 3421 print_ms_time_info(" ", "init marks", _init_times); 3422 print_ms_time_info(" ", "remarks", _remark_times); 3423 { 3424 print_ms_time_info(" ", "final marks", _remark_mark_times); 3425 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3426 3427 } 3428 print_ms_time_info(" ", "cleanups", _cleanup_times); 3429 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3430 _total_counting_time, 3431 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3432 (double)_cleanup_times.num() 3433 : 0.0)); 3434 if (G1ScrubRemSets) { 3435 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3436 _total_rs_scrub_time, 3437 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3438 (double)_cleanup_times.num() 3439 : 0.0)); 3440 } 3441 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3442 (_init_times.sum() + _remark_times.sum() + 3443 _cleanup_times.sum())/1000.0); 3444 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3445 "(%8.2f s marking).", 3446 cmThread()->vtime_accum(), 3447 cmThread()->vtime_mark_accum()); 3448 } 3449 3450 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3451 if (use_parallel_marking_threads()) { 3452 _parallel_workers->print_worker_threads_on(st); 3453 } 3454 } 3455 3456 void ConcurrentMark::print_on_error(outputStream* st) const { 3457 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 3458 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 3459 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 3460 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 3461 } 3462 3463 // We take a break if someone is trying to stop the world. 3464 bool ConcurrentMark::do_yield_check(uint worker_id) { 3465 if (SuspendibleThreadSet::should_yield()) { 3466 if (worker_id == 0) { 3467 _g1h->g1_policy()->record_concurrent_pause(); 3468 } 3469 SuspendibleThreadSet::yield(); 3470 return true; 3471 } else { 3472 return false; 3473 } 3474 } 3475 3476 bool ConcurrentMark::containing_card_is_marked(void* p) { 3477 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3478 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3479 } 3480 3481 bool ConcurrentMark::containing_cards_are_marked(void* start, 3482 void* last) { 3483 return containing_card_is_marked(start) && 3484 containing_card_is_marked(last); 3485 } 3486 3487 #ifndef PRODUCT 3488 // for debugging purposes 3489 void ConcurrentMark::print_finger() { 3490 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3491 p2i(_heap_start), p2i(_heap_end), p2i(_finger)); 3492 for (uint i = 0; i < _max_worker_id; ++i) { 3493 gclog_or_tty->print(" %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger())); 3494 } 3495 gclog_or_tty->cr(); 3496 } 3497 #endif 3498 3499 void CMTask::scan_object(oop obj) { 3500 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3501 3502 if (_cm->verbose_high()) { 3503 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT, 3504 _worker_id, p2i((void*) obj)); 3505 } 3506 3507 size_t obj_size = obj->size(); 3508 _words_scanned += obj_size; 3509 3510 obj->oop_iterate(_cm_oop_closure); 3511 statsOnly( ++_objs_scanned ); 3512 check_limits(); 3513 } 3514 3515 // Closure for iteration over bitmaps 3516 class CMBitMapClosure : public BitMapClosure { 3517 private: 3518 // the bitmap that is being iterated over 3519 CMBitMap* _nextMarkBitMap; 3520 ConcurrentMark* _cm; 3521 CMTask* _task; 3522 3523 public: 3524 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3525 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3526 3527 bool do_bit(size_t offset) { 3528 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3529 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3530 assert( addr < _cm->finger(), "invariant"); 3531 3532 statsOnly( _task->increase_objs_found_on_bitmap() ); 3533 assert(addr >= _task->finger(), "invariant"); 3534 3535 // We move that task's local finger along. 3536 _task->move_finger_to(addr); 3537 3538 _task->scan_object(oop(addr)); 3539 // we only partially drain the local queue and global stack 3540 _task->drain_local_queue(true); 3541 _task->drain_global_stack(true); 3542 3543 // if the has_aborted flag has been raised, we need to bail out of 3544 // the iteration 3545 return !_task->has_aborted(); 3546 } 3547 }; 3548 3549 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3550 ConcurrentMark* cm, 3551 CMTask* task) 3552 : _g1h(g1h), _cm(cm), _task(task) { 3553 assert(_ref_processor == NULL, "should be initialized to NULL"); 3554 3555 if (G1UseConcMarkReferenceProcessing) { 3556 _ref_processor = g1h->ref_processor_cm(); 3557 assert(_ref_processor != NULL, "should not be NULL"); 3558 } 3559 } 3560 3561 void CMTask::setup_for_region(HeapRegion* hr) { 3562 assert(hr != NULL, 3563 "claim_region() should have filtered out NULL regions"); 3564 assert(!hr->continuesHumongous(), 3565 "claim_region() should have filtered out continues humongous regions"); 3566 3567 if (_cm->verbose_low()) { 3568 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT, 3569 _worker_id, p2i(hr)); 3570 } 3571 3572 _curr_region = hr; 3573 _finger = hr->bottom(); 3574 update_region_limit(); 3575 } 3576 3577 void CMTask::update_region_limit() { 3578 HeapRegion* hr = _curr_region; 3579 HeapWord* bottom = hr->bottom(); 3580 HeapWord* limit = hr->next_top_at_mark_start(); 3581 3582 if (limit == bottom) { 3583 if (_cm->verbose_low()) { 3584 gclog_or_tty->print_cr("[%u] found an empty region " 3585 "["PTR_FORMAT", "PTR_FORMAT")", 3586 _worker_id, p2i(bottom), p2i(limit)); 3587 } 3588 // The region was collected underneath our feet. 3589 // We set the finger to bottom to ensure that the bitmap 3590 // iteration that will follow this will not do anything. 3591 // (this is not a condition that holds when we set the region up, 3592 // as the region is not supposed to be empty in the first place) 3593 _finger = bottom; 3594 } else if (limit >= _region_limit) { 3595 assert(limit >= _finger, "peace of mind"); 3596 } else { 3597 assert(limit < _region_limit, "only way to get here"); 3598 // This can happen under some pretty unusual circumstances. An 3599 // evacuation pause empties the region underneath our feet (NTAMS 3600 // at bottom). We then do some allocation in the region (NTAMS 3601 // stays at bottom), followed by the region being used as a GC 3602 // alloc region (NTAMS will move to top() and the objects 3603 // originally below it will be grayed). All objects now marked in 3604 // the region are explicitly grayed, if below the global finger, 3605 // and we do not need in fact to scan anything else. So, we simply 3606 // set _finger to be limit to ensure that the bitmap iteration 3607 // doesn't do anything. 3608 _finger = limit; 3609 } 3610 3611 _region_limit = limit; 3612 } 3613 3614 void CMTask::giveup_current_region() { 3615 assert(_curr_region != NULL, "invariant"); 3616 if (_cm->verbose_low()) { 3617 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT, 3618 _worker_id, p2i(_curr_region)); 3619 } 3620 clear_region_fields(); 3621 } 3622 3623 void CMTask::clear_region_fields() { 3624 // Values for these three fields that indicate that we're not 3625 // holding on to a region. 3626 _curr_region = NULL; 3627 _finger = NULL; 3628 _region_limit = NULL; 3629 } 3630 3631 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3632 if (cm_oop_closure == NULL) { 3633 assert(_cm_oop_closure != NULL, "invariant"); 3634 } else { 3635 assert(_cm_oop_closure == NULL, "invariant"); 3636 } 3637 _cm_oop_closure = cm_oop_closure; 3638 } 3639 3640 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3641 guarantee(nextMarkBitMap != NULL, "invariant"); 3642 3643 if (_cm->verbose_low()) { 3644 gclog_or_tty->print_cr("[%u] resetting", _worker_id); 3645 } 3646 3647 _nextMarkBitMap = nextMarkBitMap; 3648 clear_region_fields(); 3649 3650 _calls = 0; 3651 _elapsed_time_ms = 0.0; 3652 _termination_time_ms = 0.0; 3653 _termination_start_time_ms = 0.0; 3654 3655 #if _MARKING_STATS_ 3656 _local_pushes = 0; 3657 _local_pops = 0; 3658 _local_max_size = 0; 3659 _objs_scanned = 0; 3660 _global_pushes = 0; 3661 _global_pops = 0; 3662 _global_max_size = 0; 3663 _global_transfers_to = 0; 3664 _global_transfers_from = 0; 3665 _regions_claimed = 0; 3666 _objs_found_on_bitmap = 0; 3667 _satb_buffers_processed = 0; 3668 _steal_attempts = 0; 3669 _steals = 0; 3670 _aborted = 0; 3671 _aborted_overflow = 0; 3672 _aborted_cm_aborted = 0; 3673 _aborted_yield = 0; 3674 _aborted_timed_out = 0; 3675 _aborted_satb = 0; 3676 _aborted_termination = 0; 3677 #endif // _MARKING_STATS_ 3678 } 3679 3680 bool CMTask::should_exit_termination() { 3681 regular_clock_call(); 3682 // This is called when we are in the termination protocol. We should 3683 // quit if, for some reason, this task wants to abort or the global 3684 // stack is not empty (this means that we can get work from it). 3685 return !_cm->mark_stack_empty() || has_aborted(); 3686 } 3687 3688 void CMTask::reached_limit() { 3689 assert(_words_scanned >= _words_scanned_limit || 3690 _refs_reached >= _refs_reached_limit , 3691 "shouldn't have been called otherwise"); 3692 regular_clock_call(); 3693 } 3694 3695 void CMTask::regular_clock_call() { 3696 if (has_aborted()) return; 3697 3698 // First, we need to recalculate the words scanned and refs reached 3699 // limits for the next clock call. 3700 recalculate_limits(); 3701 3702 // During the regular clock call we do the following 3703 3704 // (1) If an overflow has been flagged, then we abort. 3705 if (_cm->has_overflown()) { 3706 set_has_aborted(); 3707 return; 3708 } 3709 3710 // If we are not concurrent (i.e. we're doing remark) we don't need 3711 // to check anything else. The other steps are only needed during 3712 // the concurrent marking phase. 3713 if (!concurrent()) return; 3714 3715 // (2) If marking has been aborted for Full GC, then we also abort. 3716 if (_cm->has_aborted()) { 3717 set_has_aborted(); 3718 statsOnly( ++_aborted_cm_aborted ); 3719 return; 3720 } 3721 3722 double curr_time_ms = os::elapsedVTime() * 1000.0; 3723 3724 // (3) If marking stats are enabled, then we update the step history. 3725 #if _MARKING_STATS_ 3726 if (_words_scanned >= _words_scanned_limit) { 3727 ++_clock_due_to_scanning; 3728 } 3729 if (_refs_reached >= _refs_reached_limit) { 3730 ++_clock_due_to_marking; 3731 } 3732 3733 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3734 _interval_start_time_ms = curr_time_ms; 3735 _all_clock_intervals_ms.add(last_interval_ms); 3736 3737 if (_cm->verbose_medium()) { 3738 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, " 3739 "scanned = %d%s, refs reached = %d%s", 3740 _worker_id, last_interval_ms, 3741 _words_scanned, 3742 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3743 _refs_reached, 3744 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3745 } 3746 #endif // _MARKING_STATS_ 3747 3748 // (4) We check whether we should yield. If we have to, then we abort. 3749 if (SuspendibleThreadSet::should_yield()) { 3750 // We should yield. To do this we abort the task. The caller is 3751 // responsible for yielding. 3752 set_has_aborted(); 3753 statsOnly( ++_aborted_yield ); 3754 return; 3755 } 3756 3757 // (5) We check whether we've reached our time quota. If we have, 3758 // then we abort. 3759 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3760 if (elapsed_time_ms > _time_target_ms) { 3761 set_has_aborted(); 3762 _has_timed_out = true; 3763 statsOnly( ++_aborted_timed_out ); 3764 return; 3765 } 3766 3767 // (6) Finally, we check whether there are enough completed STAB 3768 // buffers available for processing. If there are, we abort. 3769 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3770 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3771 if (_cm->verbose_low()) { 3772 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers", 3773 _worker_id); 3774 } 3775 // we do need to process SATB buffers, we'll abort and restart 3776 // the marking task to do so 3777 set_has_aborted(); 3778 statsOnly( ++_aborted_satb ); 3779 return; 3780 } 3781 } 3782 3783 void CMTask::recalculate_limits() { 3784 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3785 _words_scanned_limit = _real_words_scanned_limit; 3786 3787 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3788 _refs_reached_limit = _real_refs_reached_limit; 3789 } 3790 3791 void CMTask::decrease_limits() { 3792 // This is called when we believe that we're going to do an infrequent 3793 // operation which will increase the per byte scanned cost (i.e. move 3794 // entries to/from the global stack). It basically tries to decrease the 3795 // scanning limit so that the clock is called earlier. 3796 3797 if (_cm->verbose_medium()) { 3798 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id); 3799 } 3800 3801 _words_scanned_limit = _real_words_scanned_limit - 3802 3 * words_scanned_period / 4; 3803 _refs_reached_limit = _real_refs_reached_limit - 3804 3 * refs_reached_period / 4; 3805 } 3806 3807 void CMTask::move_entries_to_global_stack() { 3808 // local array where we'll store the entries that will be popped 3809 // from the local queue 3810 oop buffer[global_stack_transfer_size]; 3811 3812 int n = 0; 3813 oop obj; 3814 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3815 buffer[n] = obj; 3816 ++n; 3817 } 3818 3819 if (n > 0) { 3820 // we popped at least one entry from the local queue 3821 3822 statsOnly( ++_global_transfers_to; _local_pops += n ); 3823 3824 if (!_cm->mark_stack_push(buffer, n)) { 3825 if (_cm->verbose_low()) { 3826 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow", 3827 _worker_id); 3828 } 3829 set_has_aborted(); 3830 } else { 3831 // the transfer was successful 3832 3833 if (_cm->verbose_medium()) { 3834 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack", 3835 _worker_id, n); 3836 } 3837 statsOnly( int tmp_size = _cm->mark_stack_size(); 3838 if (tmp_size > _global_max_size) { 3839 _global_max_size = tmp_size; 3840 } 3841 _global_pushes += n ); 3842 } 3843 } 3844 3845 // this operation was quite expensive, so decrease the limits 3846 decrease_limits(); 3847 } 3848 3849 void CMTask::get_entries_from_global_stack() { 3850 // local array where we'll store the entries that will be popped 3851 // from the global stack. 3852 oop buffer[global_stack_transfer_size]; 3853 int n; 3854 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3855 assert(n <= global_stack_transfer_size, 3856 "we should not pop more than the given limit"); 3857 if (n > 0) { 3858 // yes, we did actually pop at least one entry 3859 3860 statsOnly( ++_global_transfers_from; _global_pops += n ); 3861 if (_cm->verbose_medium()) { 3862 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack", 3863 _worker_id, n); 3864 } 3865 for (int i = 0; i < n; ++i) { 3866 bool success = _task_queue->push(buffer[i]); 3867 // We only call this when the local queue is empty or under a 3868 // given target limit. So, we do not expect this push to fail. 3869 assert(success, "invariant"); 3870 } 3871 3872 statsOnly( int tmp_size = _task_queue->size(); 3873 if (tmp_size > _local_max_size) { 3874 _local_max_size = tmp_size; 3875 } 3876 _local_pushes += n ); 3877 } 3878 3879 // this operation was quite expensive, so decrease the limits 3880 decrease_limits(); 3881 } 3882 3883 void CMTask::drain_local_queue(bool partially) { 3884 if (has_aborted()) return; 3885 3886 // Decide what the target size is, depending whether we're going to 3887 // drain it partially (so that other tasks can steal if they run out 3888 // of things to do) or totally (at the very end). 3889 size_t target_size; 3890 if (partially) { 3891 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3892 } else { 3893 target_size = 0; 3894 } 3895 3896 if (_task_queue->size() > target_size) { 3897 if (_cm->verbose_high()) { 3898 gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT, 3899 _worker_id, target_size); 3900 } 3901 3902 oop obj; 3903 bool ret = _task_queue->pop_local(obj); 3904 while (ret) { 3905 statsOnly( ++_local_pops ); 3906 3907 if (_cm->verbose_high()) { 3908 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id, 3909 p2i((void*) obj)); 3910 } 3911 3912 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3913 assert(!_g1h->is_on_master_free_list( 3914 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3915 3916 scan_object(obj); 3917 3918 if (_task_queue->size() <= target_size || has_aborted()) { 3919 ret = false; 3920 } else { 3921 ret = _task_queue->pop_local(obj); 3922 } 3923 } 3924 3925 if (_cm->verbose_high()) { 3926 gclog_or_tty->print_cr("[%u] drained local queue, size = %u", 3927 _worker_id, _task_queue->size()); 3928 } 3929 } 3930 } 3931 3932 void CMTask::drain_global_stack(bool partially) { 3933 if (has_aborted()) return; 3934 3935 // We have a policy to drain the local queue before we attempt to 3936 // drain the global stack. 3937 assert(partially || _task_queue->size() == 0, "invariant"); 3938 3939 // Decide what the target size is, depending whether we're going to 3940 // drain it partially (so that other tasks can steal if they run out 3941 // of things to do) or totally (at the very end). Notice that, 3942 // because we move entries from the global stack in chunks or 3943 // because another task might be doing the same, we might in fact 3944 // drop below the target. But, this is not a problem. 3945 size_t target_size; 3946 if (partially) { 3947 target_size = _cm->partial_mark_stack_size_target(); 3948 } else { 3949 target_size = 0; 3950 } 3951 3952 if (_cm->mark_stack_size() > target_size) { 3953 if (_cm->verbose_low()) { 3954 gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT, 3955 _worker_id, target_size); 3956 } 3957 3958 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3959 get_entries_from_global_stack(); 3960 drain_local_queue(partially); 3961 } 3962 3963 if (_cm->verbose_low()) { 3964 gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT, 3965 _worker_id, _cm->mark_stack_size()); 3966 } 3967 } 3968 } 3969 3970 // SATB Queue has several assumptions on whether to call the par or 3971 // non-par versions of the methods. this is why some of the code is 3972 // replicated. We should really get rid of the single-threaded version 3973 // of the code to simplify things. 3974 void CMTask::drain_satb_buffers() { 3975 if (has_aborted()) return; 3976 3977 // We set this so that the regular clock knows that we're in the 3978 // middle of draining buffers and doesn't set the abort flag when it 3979 // notices that SATB buffers are available for draining. It'd be 3980 // very counter productive if it did that. :-) 3981 _draining_satb_buffers = true; 3982 3983 CMObjectClosure oc(this); 3984 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3985 if (G1CollectedHeap::use_parallel_gc_threads()) { 3986 satb_mq_set.set_par_closure(_worker_id, &oc); 3987 } else { 3988 satb_mq_set.set_closure(&oc); 3989 } 3990 3991 // This keeps claiming and applying the closure to completed buffers 3992 // until we run out of buffers or we need to abort. 3993 if (G1CollectedHeap::use_parallel_gc_threads()) { 3994 while (!has_aborted() && 3995 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) { 3996 if (_cm->verbose_medium()) { 3997 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3998 } 3999 statsOnly( ++_satb_buffers_processed ); 4000 regular_clock_call(); 4001 } 4002 } else { 4003 while (!has_aborted() && 4004 satb_mq_set.apply_closure_to_completed_buffer()) { 4005 if (_cm->verbose_medium()) { 4006 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 4007 } 4008 statsOnly( ++_satb_buffers_processed ); 4009 regular_clock_call(); 4010 } 4011 } 4012 4013 _draining_satb_buffers = false; 4014 4015 assert(has_aborted() || 4016 concurrent() || 4017 satb_mq_set.completed_buffers_num() == 0, "invariant"); 4018 4019 if (G1CollectedHeap::use_parallel_gc_threads()) { 4020 satb_mq_set.set_par_closure(_worker_id, NULL); 4021 } else { 4022 satb_mq_set.set_closure(NULL); 4023 } 4024 4025 // again, this was a potentially expensive operation, decrease the 4026 // limits to get the regular clock call early 4027 decrease_limits(); 4028 } 4029 4030 void CMTask::print_stats() { 4031 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 4032 _worker_id, _calls); 4033 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 4034 _elapsed_time_ms, _termination_time_ms); 4035 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4036 _step_times_ms.num(), _step_times_ms.avg(), 4037 _step_times_ms.sd()); 4038 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4039 _step_times_ms.maximum(), _step_times_ms.sum()); 4040 4041 #if _MARKING_STATS_ 4042 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4043 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 4044 _all_clock_intervals_ms.sd()); 4045 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4046 _all_clock_intervals_ms.maximum(), 4047 _all_clock_intervals_ms.sum()); 4048 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 4049 _clock_due_to_scanning, _clock_due_to_marking); 4050 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 4051 _objs_scanned, _objs_found_on_bitmap); 4052 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 4053 _local_pushes, _local_pops, _local_max_size); 4054 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 4055 _global_pushes, _global_pops, _global_max_size); 4056 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 4057 _global_transfers_to,_global_transfers_from); 4058 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 4059 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 4060 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 4061 _steal_attempts, _steals); 4062 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 4063 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 4064 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 4065 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 4066 _aborted_timed_out, _aborted_satb, _aborted_termination); 4067 #endif // _MARKING_STATS_ 4068 } 4069 4070 /***************************************************************************** 4071 4072 The do_marking_step(time_target_ms, ...) method is the building 4073 block of the parallel marking framework. It can be called in parallel 4074 with other invocations of do_marking_step() on different tasks 4075 (but only one per task, obviously) and concurrently with the 4076 mutator threads, or during remark, hence it eliminates the need 4077 for two versions of the code. When called during remark, it will 4078 pick up from where the task left off during the concurrent marking 4079 phase. Interestingly, tasks are also claimable during evacuation 4080 pauses too, since do_marking_step() ensures that it aborts before 4081 it needs to yield. 4082 4083 The data structures that it uses to do marking work are the 4084 following: 4085 4086 (1) Marking Bitmap. If there are gray objects that appear only 4087 on the bitmap (this happens either when dealing with an overflow 4088 or when the initial marking phase has simply marked the roots 4089 and didn't push them on the stack), then tasks claim heap 4090 regions whose bitmap they then scan to find gray objects. A 4091 global finger indicates where the end of the last claimed region 4092 is. A local finger indicates how far into the region a task has 4093 scanned. The two fingers are used to determine how to gray an 4094 object (i.e. whether simply marking it is OK, as it will be 4095 visited by a task in the future, or whether it needs to be also 4096 pushed on a stack). 4097 4098 (2) Local Queue. The local queue of the task which is accessed 4099 reasonably efficiently by the task. Other tasks can steal from 4100 it when they run out of work. Throughout the marking phase, a 4101 task attempts to keep its local queue short but not totally 4102 empty, so that entries are available for stealing by other 4103 tasks. Only when there is no more work, a task will totally 4104 drain its local queue. 4105 4106 (3) Global Mark Stack. This handles local queue overflow. During 4107 marking only sets of entries are moved between it and the local 4108 queues, as access to it requires a mutex and more fine-grain 4109 interaction with it which might cause contention. If it 4110 overflows, then the marking phase should restart and iterate 4111 over the bitmap to identify gray objects. Throughout the marking 4112 phase, tasks attempt to keep the global mark stack at a small 4113 length but not totally empty, so that entries are available for 4114 popping by other tasks. Only when there is no more work, tasks 4115 will totally drain the global mark stack. 4116 4117 (4) SATB Buffer Queue. This is where completed SATB buffers are 4118 made available. Buffers are regularly removed from this queue 4119 and scanned for roots, so that the queue doesn't get too 4120 long. During remark, all completed buffers are processed, as 4121 well as the filled in parts of any uncompleted buffers. 4122 4123 The do_marking_step() method tries to abort when the time target 4124 has been reached. There are a few other cases when the 4125 do_marking_step() method also aborts: 4126 4127 (1) When the marking phase has been aborted (after a Full GC). 4128 4129 (2) When a global overflow (on the global stack) has been 4130 triggered. Before the task aborts, it will actually sync up with 4131 the other tasks to ensure that all the marking data structures 4132 (local queues, stacks, fingers etc.) are re-initialized so that 4133 when do_marking_step() completes, the marking phase can 4134 immediately restart. 4135 4136 (3) When enough completed SATB buffers are available. The 4137 do_marking_step() method only tries to drain SATB buffers right 4138 at the beginning. So, if enough buffers are available, the 4139 marking step aborts and the SATB buffers are processed at 4140 the beginning of the next invocation. 4141 4142 (4) To yield. when we have to yield then we abort and yield 4143 right at the end of do_marking_step(). This saves us from a lot 4144 of hassle as, by yielding we might allow a Full GC. If this 4145 happens then objects will be compacted underneath our feet, the 4146 heap might shrink, etc. We save checking for this by just 4147 aborting and doing the yield right at the end. 4148 4149 From the above it follows that the do_marking_step() method should 4150 be called in a loop (or, otherwise, regularly) until it completes. 4151 4152 If a marking step completes without its has_aborted() flag being 4153 true, it means it has completed the current marking phase (and 4154 also all other marking tasks have done so and have all synced up). 4155 4156 A method called regular_clock_call() is invoked "regularly" (in 4157 sub ms intervals) throughout marking. It is this clock method that 4158 checks all the abort conditions which were mentioned above and 4159 decides when the task should abort. A work-based scheme is used to 4160 trigger this clock method: when the number of object words the 4161 marking phase has scanned or the number of references the marking 4162 phase has visited reach a given limit. Additional invocations to 4163 the method clock have been planted in a few other strategic places 4164 too. The initial reason for the clock method was to avoid calling 4165 vtime too regularly, as it is quite expensive. So, once it was in 4166 place, it was natural to piggy-back all the other conditions on it 4167 too and not constantly check them throughout the code. 4168 4169 If do_termination is true then do_marking_step will enter its 4170 termination protocol. 4171 4172 The value of is_serial must be true when do_marking_step is being 4173 called serially (i.e. by the VMThread) and do_marking_step should 4174 skip any synchronization in the termination and overflow code. 4175 Examples include the serial remark code and the serial reference 4176 processing closures. 4177 4178 The value of is_serial must be false when do_marking_step is 4179 being called by any of the worker threads in a work gang. 4180 Examples include the concurrent marking code (CMMarkingTask), 4181 the MT remark code, and the MT reference processing closures. 4182 4183 *****************************************************************************/ 4184 4185 void CMTask::do_marking_step(double time_target_ms, 4186 bool do_termination, 4187 bool is_serial) { 4188 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 4189 assert(concurrent() == _cm->concurrent(), "they should be the same"); 4190 4191 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4192 assert(_task_queues != NULL, "invariant"); 4193 assert(_task_queue != NULL, "invariant"); 4194 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 4195 4196 assert(!_claimed, 4197 "only one thread should claim this task at any one time"); 4198 4199 // OK, this doesn't safeguard again all possible scenarios, as it is 4200 // possible for two threads to set the _claimed flag at the same 4201 // time. But it is only for debugging purposes anyway and it will 4202 // catch most problems. 4203 _claimed = true; 4204 4205 _start_time_ms = os::elapsedVTime() * 1000.0; 4206 statsOnly( _interval_start_time_ms = _start_time_ms ); 4207 4208 // If do_stealing is true then do_marking_step will attempt to 4209 // steal work from the other CMTasks. It only makes sense to 4210 // enable stealing when the termination protocol is enabled 4211 // and do_marking_step() is not being called serially. 4212 bool do_stealing = do_termination && !is_serial; 4213 4214 double diff_prediction_ms = 4215 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4216 _time_target_ms = time_target_ms - diff_prediction_ms; 4217 4218 // set up the variables that are used in the work-based scheme to 4219 // call the regular clock method 4220 _words_scanned = 0; 4221 _refs_reached = 0; 4222 recalculate_limits(); 4223 4224 // clear all flags 4225 clear_has_aborted(); 4226 _has_timed_out = false; 4227 _draining_satb_buffers = false; 4228 4229 ++_calls; 4230 4231 if (_cm->verbose_low()) { 4232 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, " 4233 "target = %1.2lfms >>>>>>>>>>", 4234 _worker_id, _calls, _time_target_ms); 4235 } 4236 4237 // Set up the bitmap and oop closures. Anything that uses them is 4238 // eventually called from this method, so it is OK to allocate these 4239 // statically. 4240 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4241 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 4242 set_cm_oop_closure(&cm_oop_closure); 4243 4244 if (_cm->has_overflown()) { 4245 // This can happen if the mark stack overflows during a GC pause 4246 // and this task, after a yield point, restarts. We have to abort 4247 // as we need to get into the overflow protocol which happens 4248 // right at the end of this task. 4249 set_has_aborted(); 4250 } 4251 4252 // First drain any available SATB buffers. After this, we will not 4253 // look at SATB buffers before the next invocation of this method. 4254 // If enough completed SATB buffers are queued up, the regular clock 4255 // will abort this task so that it restarts. 4256 drain_satb_buffers(); 4257 // ...then partially drain the local queue and the global stack 4258 drain_local_queue(true); 4259 drain_global_stack(true); 4260 4261 do { 4262 if (!has_aborted() && _curr_region != NULL) { 4263 // This means that we're already holding on to a region. 4264 assert(_finger != NULL, "if region is not NULL, then the finger " 4265 "should not be NULL either"); 4266 4267 // We might have restarted this task after an evacuation pause 4268 // which might have evacuated the region we're holding on to 4269 // underneath our feet. Let's read its limit again to make sure 4270 // that we do not iterate over a region of the heap that 4271 // contains garbage (update_region_limit() will also move 4272 // _finger to the start of the region if it is found empty). 4273 update_region_limit(); 4274 // We will start from _finger not from the start of the region, 4275 // as we might be restarting this task after aborting half-way 4276 // through scanning this region. In this case, _finger points to 4277 // the address where we last found a marked object. If this is a 4278 // fresh region, _finger points to start(). 4279 MemRegion mr = MemRegion(_finger, _region_limit); 4280 4281 if (_cm->verbose_low()) { 4282 gclog_or_tty->print_cr("[%u] we're scanning part " 4283 "["PTR_FORMAT", "PTR_FORMAT") " 4284 "of region "HR_FORMAT, 4285 _worker_id, p2i(_finger), p2i(_region_limit), 4286 HR_FORMAT_PARAMS(_curr_region)); 4287 } 4288 4289 assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(), 4290 "humongous regions should go around loop once only"); 4291 4292 // Some special cases: 4293 // If the memory region is empty, we can just give up the region. 4294 // If the current region is humongous then we only need to check 4295 // the bitmap for the bit associated with the start of the object, 4296 // scan the object if it's live, and give up the region. 4297 // Otherwise, let's iterate over the bitmap of the part of the region 4298 // that is left. 4299 // If the iteration is successful, give up the region. 4300 if (mr.is_empty()) { 4301 giveup_current_region(); 4302 regular_clock_call(); 4303 } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) { 4304 if (_nextMarkBitMap->isMarked(mr.start())) { 4305 // The object is marked - apply the closure 4306 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 4307 bitmap_closure.do_bit(offset); 4308 } 4309 // Even if this task aborted while scanning the humongous object 4310 // we can (and should) give up the current region. 4311 giveup_current_region(); 4312 regular_clock_call(); 4313 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4314 giveup_current_region(); 4315 regular_clock_call(); 4316 } else { 4317 assert(has_aborted(), "currently the only way to do so"); 4318 // The only way to abort the bitmap iteration is to return 4319 // false from the do_bit() method. However, inside the 4320 // do_bit() method we move the _finger to point to the 4321 // object currently being looked at. So, if we bail out, we 4322 // have definitely set _finger to something non-null. 4323 assert(_finger != NULL, "invariant"); 4324 4325 // Region iteration was actually aborted. So now _finger 4326 // points to the address of the object we last scanned. If we 4327 // leave it there, when we restart this task, we will rescan 4328 // the object. It is easy to avoid this. We move the finger by 4329 // enough to point to the next possible object header (the 4330 // bitmap knows by how much we need to move it as it knows its 4331 // granularity). 4332 assert(_finger < _region_limit, "invariant"); 4333 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 4334 // Check if bitmap iteration was aborted while scanning the last object 4335 if (new_finger >= _region_limit) { 4336 giveup_current_region(); 4337 } else { 4338 move_finger_to(new_finger); 4339 } 4340 } 4341 } 4342 // At this point we have either completed iterating over the 4343 // region we were holding on to, or we have aborted. 4344 4345 // We then partially drain the local queue and the global stack. 4346 // (Do we really need this?) 4347 drain_local_queue(true); 4348 drain_global_stack(true); 4349 4350 // Read the note on the claim_region() method on why it might 4351 // return NULL with potentially more regions available for 4352 // claiming and why we have to check out_of_regions() to determine 4353 // whether we're done or not. 4354 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4355 // We are going to try to claim a new region. We should have 4356 // given up on the previous one. 4357 // Separated the asserts so that we know which one fires. 4358 assert(_curr_region == NULL, "invariant"); 4359 assert(_finger == NULL, "invariant"); 4360 assert(_region_limit == NULL, "invariant"); 4361 if (_cm->verbose_low()) { 4362 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id); 4363 } 4364 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 4365 if (claimed_region != NULL) { 4366 // Yes, we managed to claim one 4367 statsOnly( ++_regions_claimed ); 4368 4369 if (_cm->verbose_low()) { 4370 gclog_or_tty->print_cr("[%u] we successfully claimed " 4371 "region "PTR_FORMAT, 4372 _worker_id, p2i(claimed_region)); 4373 } 4374 4375 setup_for_region(claimed_region); 4376 assert(_curr_region == claimed_region, "invariant"); 4377 } 4378 // It is important to call the regular clock here. It might take 4379 // a while to claim a region if, for example, we hit a large 4380 // block of empty regions. So we need to call the regular clock 4381 // method once round the loop to make sure it's called 4382 // frequently enough. 4383 regular_clock_call(); 4384 } 4385 4386 if (!has_aborted() && _curr_region == NULL) { 4387 assert(_cm->out_of_regions(), 4388 "at this point we should be out of regions"); 4389 } 4390 } while ( _curr_region != NULL && !has_aborted()); 4391 4392 if (!has_aborted()) { 4393 // We cannot check whether the global stack is empty, since other 4394 // tasks might be pushing objects to it concurrently. 4395 assert(_cm->out_of_regions(), 4396 "at this point we should be out of regions"); 4397 4398 if (_cm->verbose_low()) { 4399 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id); 4400 } 4401 4402 // Try to reduce the number of available SATB buffers so that 4403 // remark has less work to do. 4404 drain_satb_buffers(); 4405 } 4406 4407 // Since we've done everything else, we can now totally drain the 4408 // local queue and global stack. 4409 drain_local_queue(false); 4410 drain_global_stack(false); 4411 4412 // Attempt at work stealing from other task's queues. 4413 if (do_stealing && !has_aborted()) { 4414 // We have not aborted. This means that we have finished all that 4415 // we could. Let's try to do some stealing... 4416 4417 // We cannot check whether the global stack is empty, since other 4418 // tasks might be pushing objects to it concurrently. 4419 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4420 "only way to reach here"); 4421 4422 if (_cm->verbose_low()) { 4423 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id); 4424 } 4425 4426 while (!has_aborted()) { 4427 oop obj; 4428 statsOnly( ++_steal_attempts ); 4429 4430 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 4431 if (_cm->verbose_medium()) { 4432 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully", 4433 _worker_id, p2i((void*) obj)); 4434 } 4435 4436 statsOnly( ++_steals ); 4437 4438 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4439 "any stolen object should be marked"); 4440 scan_object(obj); 4441 4442 // And since we're towards the end, let's totally drain the 4443 // local queue and global stack. 4444 drain_local_queue(false); 4445 drain_global_stack(false); 4446 } else { 4447 break; 4448 } 4449 } 4450 } 4451 4452 // If we are about to wrap up and go into termination, check if we 4453 // should raise the overflow flag. 4454 if (do_termination && !has_aborted()) { 4455 if (_cm->force_overflow()->should_force()) { 4456 _cm->set_has_overflown(); 4457 regular_clock_call(); 4458 } 4459 } 4460 4461 // We still haven't aborted. Now, let's try to get into the 4462 // termination protocol. 4463 if (do_termination && !has_aborted()) { 4464 // We cannot check whether the global stack is empty, since other 4465 // tasks might be concurrently pushing objects on it. 4466 // Separated the asserts so that we know which one fires. 4467 assert(_cm->out_of_regions(), "only way to reach here"); 4468 assert(_task_queue->size() == 0, "only way to reach here"); 4469 4470 if (_cm->verbose_low()) { 4471 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id); 4472 } 4473 4474 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4475 4476 // The CMTask class also extends the TerminatorTerminator class, 4477 // hence its should_exit_termination() method will also decide 4478 // whether to exit the termination protocol or not. 4479 bool finished = (is_serial || 4480 _cm->terminator()->offer_termination(this)); 4481 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4482 _termination_time_ms += 4483 termination_end_time_ms - _termination_start_time_ms; 4484 4485 if (finished) { 4486 // We're all done. 4487 4488 if (_worker_id == 0) { 4489 // let's allow task 0 to do this 4490 if (concurrent()) { 4491 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4492 // we need to set this to false before the next 4493 // safepoint. This way we ensure that the marking phase 4494 // doesn't observe any more heap expansions. 4495 _cm->clear_concurrent_marking_in_progress(); 4496 } 4497 } 4498 4499 // We can now guarantee that the global stack is empty, since 4500 // all other tasks have finished. We separated the guarantees so 4501 // that, if a condition is false, we can immediately find out 4502 // which one. 4503 guarantee(_cm->out_of_regions(), "only way to reach here"); 4504 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4505 guarantee(_task_queue->size() == 0, "only way to reach here"); 4506 guarantee(!_cm->has_overflown(), "only way to reach here"); 4507 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4508 4509 if (_cm->verbose_low()) { 4510 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id); 4511 } 4512 } else { 4513 // Apparently there's more work to do. Let's abort this task. It 4514 // will restart it and we can hopefully find more things to do. 4515 4516 if (_cm->verbose_low()) { 4517 gclog_or_tty->print_cr("[%u] apparently there is more work to do", 4518 _worker_id); 4519 } 4520 4521 set_has_aborted(); 4522 statsOnly( ++_aborted_termination ); 4523 } 4524 } 4525 4526 // Mainly for debugging purposes to make sure that a pointer to the 4527 // closure which was statically allocated in this frame doesn't 4528 // escape it by accident. 4529 set_cm_oop_closure(NULL); 4530 double end_time_ms = os::elapsedVTime() * 1000.0; 4531 double elapsed_time_ms = end_time_ms - _start_time_ms; 4532 // Update the step history. 4533 _step_times_ms.add(elapsed_time_ms); 4534 4535 if (has_aborted()) { 4536 // The task was aborted for some reason. 4537 4538 statsOnly( ++_aborted ); 4539 4540 if (_has_timed_out) { 4541 double diff_ms = elapsed_time_ms - _time_target_ms; 4542 // Keep statistics of how well we did with respect to hitting 4543 // our target only if we actually timed out (if we aborted for 4544 // other reasons, then the results might get skewed). 4545 _marking_step_diffs_ms.add(diff_ms); 4546 } 4547 4548 if (_cm->has_overflown()) { 4549 // This is the interesting one. We aborted because a global 4550 // overflow was raised. This means we have to restart the 4551 // marking phase and start iterating over regions. However, in 4552 // order to do this we have to make sure that all tasks stop 4553 // what they are doing and re-initialize in a safe manner. We 4554 // will achieve this with the use of two barrier sync points. 4555 4556 if (_cm->verbose_low()) { 4557 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id); 4558 } 4559 4560 if (!is_serial) { 4561 // We only need to enter the sync barrier if being called 4562 // from a parallel context 4563 _cm->enter_first_sync_barrier(_worker_id); 4564 4565 // When we exit this sync barrier we know that all tasks have 4566 // stopped doing marking work. So, it's now safe to 4567 // re-initialize our data structures. At the end of this method, 4568 // task 0 will clear the global data structures. 4569 } 4570 4571 statsOnly( ++_aborted_overflow ); 4572 4573 // We clear the local state of this task... 4574 clear_region_fields(); 4575 4576 if (!is_serial) { 4577 // ...and enter the second barrier. 4578 _cm->enter_second_sync_barrier(_worker_id); 4579 } 4580 // At this point, if we're during the concurrent phase of 4581 // marking, everything has been re-initialized and we're 4582 // ready to restart. 4583 } 4584 4585 if (_cm->verbose_low()) { 4586 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4587 "elapsed = %1.2lfms <<<<<<<<<<", 4588 _worker_id, _time_target_ms, elapsed_time_ms); 4589 if (_cm->has_aborted()) { 4590 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========", 4591 _worker_id); 4592 } 4593 } 4594 } else { 4595 if (_cm->verbose_low()) { 4596 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4597 "elapsed = %1.2lfms <<<<<<<<<<", 4598 _worker_id, _time_target_ms, elapsed_time_ms); 4599 } 4600 } 4601 4602 _claimed = false; 4603 } 4604 4605 CMTask::CMTask(uint worker_id, 4606 ConcurrentMark* cm, 4607 size_t* marked_bytes, 4608 BitMap* card_bm, 4609 CMTaskQueue* task_queue, 4610 CMTaskQueueSet* task_queues) 4611 : _g1h(G1CollectedHeap::heap()), 4612 _worker_id(worker_id), _cm(cm), 4613 _claimed(false), 4614 _nextMarkBitMap(NULL), _hash_seed(17), 4615 _task_queue(task_queue), 4616 _task_queues(task_queues), 4617 _cm_oop_closure(NULL), 4618 _marked_bytes_array(marked_bytes), 4619 _card_bm(card_bm) { 4620 guarantee(task_queue != NULL, "invariant"); 4621 guarantee(task_queues != NULL, "invariant"); 4622 4623 statsOnly( _clock_due_to_scanning = 0; 4624 _clock_due_to_marking = 0 ); 4625 4626 _marking_step_diffs_ms.add(0.5); 4627 } 4628 4629 // These are formatting macros that are used below to ensure 4630 // consistent formatting. The *_H_* versions are used to format the 4631 // header for a particular value and they should be kept consistent 4632 // with the corresponding macro. Also note that most of the macros add 4633 // the necessary white space (as a prefix) which makes them a bit 4634 // easier to compose. 4635 4636 // All the output lines are prefixed with this string to be able to 4637 // identify them easily in a large log file. 4638 #define G1PPRL_LINE_PREFIX "###" 4639 4640 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4641 #ifdef _LP64 4642 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4643 #else // _LP64 4644 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4645 #endif // _LP64 4646 4647 // For per-region info 4648 #define G1PPRL_TYPE_FORMAT " %-4s" 4649 #define G1PPRL_TYPE_H_FORMAT " %4s" 4650 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4651 #define G1PPRL_BYTE_H_FORMAT " %9s" 4652 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4653 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4654 4655 // For summary info 4656 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4657 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4658 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4659 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4660 4661 G1PrintRegionLivenessInfoClosure:: 4662 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4663 : _out(out), 4664 _total_used_bytes(0), _total_capacity_bytes(0), 4665 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4666 _hum_used_bytes(0), _hum_capacity_bytes(0), 4667 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 4668 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 4669 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4670 MemRegion g1_committed = g1h->g1_committed(); 4671 MemRegion g1_reserved = g1h->g1_reserved(); 4672 double now = os::elapsedTime(); 4673 4674 // Print the header of the output. 4675 _out->cr(); 4676 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4677 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4678 G1PPRL_SUM_ADDR_FORMAT("committed") 4679 G1PPRL_SUM_ADDR_FORMAT("reserved") 4680 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4681 p2i(g1_committed.start()), p2i(g1_committed.end()), 4682 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 4683 HeapRegion::GrainBytes); 4684 _out->print_cr(G1PPRL_LINE_PREFIX); 4685 _out->print_cr(G1PPRL_LINE_PREFIX 4686 G1PPRL_TYPE_H_FORMAT 4687 G1PPRL_ADDR_BASE_H_FORMAT 4688 G1PPRL_BYTE_H_FORMAT 4689 G1PPRL_BYTE_H_FORMAT 4690 G1PPRL_BYTE_H_FORMAT 4691 G1PPRL_DOUBLE_H_FORMAT 4692 G1PPRL_BYTE_H_FORMAT 4693 G1PPRL_BYTE_H_FORMAT, 4694 "type", "address-range", 4695 "used", "prev-live", "next-live", "gc-eff", 4696 "remset", "code-roots"); 4697 _out->print_cr(G1PPRL_LINE_PREFIX 4698 G1PPRL_TYPE_H_FORMAT 4699 G1PPRL_ADDR_BASE_H_FORMAT 4700 G1PPRL_BYTE_H_FORMAT 4701 G1PPRL_BYTE_H_FORMAT 4702 G1PPRL_BYTE_H_FORMAT 4703 G1PPRL_DOUBLE_H_FORMAT 4704 G1PPRL_BYTE_H_FORMAT 4705 G1PPRL_BYTE_H_FORMAT, 4706 "", "", 4707 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 4708 "(bytes)", "(bytes)"); 4709 } 4710 4711 // It takes as a parameter a reference to one of the _hum_* fields, it 4712 // deduces the corresponding value for a region in a humongous region 4713 // series (either the region size, or what's left if the _hum_* field 4714 // is < the region size), and updates the _hum_* field accordingly. 4715 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4716 size_t bytes = 0; 4717 // The > 0 check is to deal with the prev and next live bytes which 4718 // could be 0. 4719 if (*hum_bytes > 0) { 4720 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4721 *hum_bytes -= bytes; 4722 } 4723 return bytes; 4724 } 4725 4726 // It deduces the values for a region in a humongous region series 4727 // from the _hum_* fields and updates those accordingly. It assumes 4728 // that that _hum_* fields have already been set up from the "starts 4729 // humongous" region and we visit the regions in address order. 4730 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4731 size_t* capacity_bytes, 4732 size_t* prev_live_bytes, 4733 size_t* next_live_bytes) { 4734 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4735 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4736 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4737 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4738 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4739 } 4740 4741 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4742 const char* type = ""; 4743 HeapWord* bottom = r->bottom(); 4744 HeapWord* end = r->end(); 4745 size_t capacity_bytes = r->capacity(); 4746 size_t used_bytes = r->used(); 4747 size_t prev_live_bytes = r->live_bytes(); 4748 size_t next_live_bytes = r->next_live_bytes(); 4749 double gc_eff = r->gc_efficiency(); 4750 size_t remset_bytes = r->rem_set()->mem_size(); 4751 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 4752 4753 if (r->used() == 0) { 4754 type = "FREE"; 4755 } else if (r->is_survivor()) { 4756 type = "SURV"; 4757 } else if (r->is_young()) { 4758 type = "EDEN"; 4759 } else if (r->startsHumongous()) { 4760 type = "HUMS"; 4761 4762 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4763 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4764 "they should have been zeroed after the last time we used them"); 4765 // Set up the _hum_* fields. 4766 _hum_capacity_bytes = capacity_bytes; 4767 _hum_used_bytes = used_bytes; 4768 _hum_prev_live_bytes = prev_live_bytes; 4769 _hum_next_live_bytes = next_live_bytes; 4770 get_hum_bytes(&used_bytes, &capacity_bytes, 4771 &prev_live_bytes, &next_live_bytes); 4772 end = bottom + HeapRegion::GrainWords; 4773 } else if (r->continuesHumongous()) { 4774 type = "HUMC"; 4775 get_hum_bytes(&used_bytes, &capacity_bytes, 4776 &prev_live_bytes, &next_live_bytes); 4777 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4778 } else { 4779 type = "OLD"; 4780 } 4781 4782 _total_used_bytes += used_bytes; 4783 _total_capacity_bytes += capacity_bytes; 4784 _total_prev_live_bytes += prev_live_bytes; 4785 _total_next_live_bytes += next_live_bytes; 4786 _total_remset_bytes += remset_bytes; 4787 _total_strong_code_roots_bytes += strong_code_roots_bytes; 4788 4789 // Print a line for this particular region. 4790 _out->print_cr(G1PPRL_LINE_PREFIX 4791 G1PPRL_TYPE_FORMAT 4792 G1PPRL_ADDR_BASE_FORMAT 4793 G1PPRL_BYTE_FORMAT 4794 G1PPRL_BYTE_FORMAT 4795 G1PPRL_BYTE_FORMAT 4796 G1PPRL_DOUBLE_FORMAT 4797 G1PPRL_BYTE_FORMAT 4798 G1PPRL_BYTE_FORMAT, 4799 type, p2i(bottom), p2i(end), 4800 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 4801 remset_bytes, strong_code_roots_bytes); 4802 4803 return false; 4804 } 4805 4806 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4807 // add static memory usages to remembered set sizes 4808 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 4809 // Print the footer of the output. 4810 _out->print_cr(G1PPRL_LINE_PREFIX); 4811 _out->print_cr(G1PPRL_LINE_PREFIX 4812 " SUMMARY" 4813 G1PPRL_SUM_MB_FORMAT("capacity") 4814 G1PPRL_SUM_MB_PERC_FORMAT("used") 4815 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4816 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 4817 G1PPRL_SUM_MB_FORMAT("remset") 4818 G1PPRL_SUM_MB_FORMAT("code-roots"), 4819 bytes_to_mb(_total_capacity_bytes), 4820 bytes_to_mb(_total_used_bytes), 4821 perc(_total_used_bytes, _total_capacity_bytes), 4822 bytes_to_mb(_total_prev_live_bytes), 4823 perc(_total_prev_live_bytes, _total_capacity_bytes), 4824 bytes_to_mb(_total_next_live_bytes), 4825 perc(_total_next_live_bytes, _total_capacity_bytes), 4826 bytes_to_mb(_total_remset_bytes), 4827 bytes_to_mb(_total_strong_code_roots_bytes)); 4828 _out->cr(); 4829 }