1 /* 2 * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc_implementation/g1/concurrentMark.inline.hpp" 30 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 31 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 32 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 33 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 34 #include "gc_implementation/g1/g1Log.hpp" 35 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 36 #include "gc_implementation/g1/g1RemSet.hpp" 37 #include "gc_implementation/g1/g1StringDedup.hpp" 38 #include "gc_implementation/g1/heapRegion.inline.hpp" 39 #include "gc_implementation/g1/heapRegionManager.inline.hpp" 40 #include "gc_implementation/g1/heapRegionRemSet.hpp" 41 #include "gc_implementation/g1/heapRegionSet.inline.hpp" 42 #include "gc_implementation/shared/vmGCOperations.hpp" 43 #include "gc_implementation/shared/gcTimer.hpp" 44 #include "gc_implementation/shared/gcTrace.hpp" 45 #include "gc_implementation/shared/gcTraceTime.hpp" 46 #include "memory/allocation.hpp" 47 #include "memory/genOopClosures.inline.hpp" 48 #include "memory/referencePolicy.hpp" 49 #include "memory/resourceArea.hpp" 50 #include "memory/strongRootsScope.hpp" 51 #include "oops/oop.inline.hpp" 52 #include "runtime/handles.inline.hpp" 53 #include "runtime/java.hpp" 54 #include "runtime/atomic.inline.hpp" 55 #include "runtime/prefetch.inline.hpp" 56 #include "services/memTracker.hpp" 57 58 // Concurrent marking bit map wrapper 59 60 CMBitMapRO::CMBitMapRO(int shifter) : 61 _bm(), 62 _shifter(shifter) { 63 _bmStartWord = 0; 64 _bmWordSize = 0; 65 } 66 67 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 68 const HeapWord* limit) const { 69 // First we must round addr *up* to a possible object boundary. 70 addr = (HeapWord*)align_size_up((intptr_t)addr, 71 HeapWordSize << _shifter); 72 size_t addrOffset = heapWordToOffset(addr); 73 if (limit == NULL) { 74 limit = _bmStartWord + _bmWordSize; 75 } 76 size_t limitOffset = heapWordToOffset(limit); 77 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 78 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 79 assert(nextAddr >= addr, "get_next_one postcondition"); 80 assert(nextAddr == limit || isMarked(nextAddr), 81 "get_next_one postcondition"); 82 return nextAddr; 83 } 84 85 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr, 86 const HeapWord* limit) const { 87 size_t addrOffset = heapWordToOffset(addr); 88 if (limit == NULL) { 89 limit = _bmStartWord + _bmWordSize; 90 } 91 size_t limitOffset = heapWordToOffset(limit); 92 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 93 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 94 assert(nextAddr >= addr, "get_next_one postcondition"); 95 assert(nextAddr == limit || !isMarked(nextAddr), 96 "get_next_one postcondition"); 97 return nextAddr; 98 } 99 100 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 101 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 102 return (int) (diff >> _shifter); 103 } 104 105 #ifndef PRODUCT 106 bool CMBitMapRO::covers(MemRegion heap_rs) const { 107 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 108 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 109 "size inconsistency"); 110 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 111 _bmWordSize == heap_rs.word_size(); 112 } 113 #endif 114 115 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 116 _bm.print_on_error(st, prefix); 117 } 118 119 size_t CMBitMap::compute_size(size_t heap_size) { 120 return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); 121 } 122 123 size_t CMBitMap::mark_distance() { 124 return MinObjAlignmentInBytes * BitsPerByte; 125 } 126 127 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 128 _bmStartWord = heap.start(); 129 _bmWordSize = heap.word_size(); 130 131 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start()); 132 _bm.set_size(_bmWordSize >> _shifter); 133 134 storage->set_mapping_changed_listener(&_listener); 135 } 136 137 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 138 if (zero_filled) { 139 return; 140 } 141 // We need to clear the bitmap on commit, removing any existing information. 142 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 143 _bm->clearRange(mr); 144 } 145 146 // Closure used for clearing the given mark bitmap. 147 class ClearBitmapHRClosure : public HeapRegionClosure { 148 private: 149 ConcurrentMark* _cm; 150 CMBitMap* _bitmap; 151 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration. 152 public: 153 ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) { 154 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield."); 155 } 156 157 virtual bool doHeapRegion(HeapRegion* r) { 158 size_t const chunk_size_in_words = M / HeapWordSize; 159 160 HeapWord* cur = r->bottom(); 161 HeapWord* const end = r->end(); 162 163 while (cur < end) { 164 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 165 _bitmap->clearRange(mr); 166 167 cur += chunk_size_in_words; 168 169 // Abort iteration if after yielding the marking has been aborted. 170 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) { 171 return true; 172 } 173 // Repeat the asserts from before the start of the closure. We will do them 174 // as asserts here to minimize their overhead on the product. However, we 175 // will have them as guarantees at the beginning / end of the bitmap 176 // clearing to get some checking in the product. 177 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant"); 178 assert(!_may_yield || !G1CollectedHeap::heap()->mark_in_progress(), "invariant"); 179 } 180 181 return false; 182 } 183 }; 184 185 class ParClearNextMarkBitmapTask : public AbstractGangTask { 186 ClearBitmapHRClosure* _cl; 187 HeapRegionClaimer _hrclaimer; 188 bool _suspendible; // If the task is suspendible, workers must join the STS. 189 190 public: 191 ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) : 192 _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {} 193 194 void work(uint worker_id) { 195 if (_suspendible) { 196 SuspendibleThreadSet::join(); 197 } 198 G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true); 199 if (_suspendible) { 200 SuspendibleThreadSet::leave(); 201 } 202 } 203 }; 204 205 void CMBitMap::clearAll() { 206 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 207 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */); 208 uint n_workers = g1h->workers()->active_workers(); 209 ParClearNextMarkBitmapTask task(&cl, n_workers, false); 210 g1h->workers()->run_task(&task); 211 guarantee(cl.complete(), "Must have completed iteration."); 212 return; 213 } 214 215 void CMBitMap::markRange(MemRegion mr) { 216 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 217 assert(!mr.is_empty(), "unexpected empty region"); 218 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 219 ((HeapWord *) mr.end())), 220 "markRange memory region end is not card aligned"); 221 // convert address range into offset range 222 _bm.at_put_range(heapWordToOffset(mr.start()), 223 heapWordToOffset(mr.end()), true); 224 } 225 226 void CMBitMap::clearRange(MemRegion mr) { 227 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 228 assert(!mr.is_empty(), "unexpected empty region"); 229 // convert address range into offset range 230 _bm.at_put_range(heapWordToOffset(mr.start()), 231 heapWordToOffset(mr.end()), false); 232 } 233 234 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 235 HeapWord* end_addr) { 236 HeapWord* start = getNextMarkedWordAddress(addr); 237 start = MIN2(start, end_addr); 238 HeapWord* end = getNextUnmarkedWordAddress(start); 239 end = MIN2(end, end_addr); 240 assert(start <= end, "Consistency check"); 241 MemRegion mr(start, end); 242 if (!mr.is_empty()) { 243 clearRange(mr); 244 } 245 return mr; 246 } 247 248 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 249 _base(NULL), _cm(cm) 250 #ifdef ASSERT 251 , _drain_in_progress(false) 252 , _drain_in_progress_yields(false) 253 #endif 254 {} 255 256 bool CMMarkStack::allocate(size_t capacity) { 257 // allocate a stack of the requisite depth 258 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 259 if (!rs.is_reserved()) { 260 warning("ConcurrentMark MarkStack allocation failure"); 261 return false; 262 } 263 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 264 if (!_virtual_space.initialize(rs, rs.size())) { 265 warning("ConcurrentMark MarkStack backing store failure"); 266 // Release the virtual memory reserved for the marking stack 267 rs.release(); 268 return false; 269 } 270 assert(_virtual_space.committed_size() == rs.size(), 271 "Didn't reserve backing store for all of ConcurrentMark stack?"); 272 _base = (oop*) _virtual_space.low(); 273 setEmpty(); 274 _capacity = (jint) capacity; 275 _saved_index = -1; 276 _should_expand = false; 277 NOT_PRODUCT(_max_depth = 0); 278 return true; 279 } 280 281 void CMMarkStack::expand() { 282 // Called, during remark, if we've overflown the marking stack during marking. 283 assert(isEmpty(), "stack should been emptied while handling overflow"); 284 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 285 // Clear expansion flag 286 _should_expand = false; 287 if (_capacity == (jint) MarkStackSizeMax) { 288 if (PrintGCDetails && Verbose) { 289 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 290 } 291 return; 292 } 293 // Double capacity if possible 294 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 295 // Do not give up existing stack until we have managed to 296 // get the double capacity that we desired. 297 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 298 sizeof(oop))); 299 if (rs.is_reserved()) { 300 // Release the backing store associated with old stack 301 _virtual_space.release(); 302 // Reinitialize virtual space for new stack 303 if (!_virtual_space.initialize(rs, rs.size())) { 304 fatal("Not enough swap for expanded marking stack capacity"); 305 } 306 _base = (oop*)(_virtual_space.low()); 307 _index = 0; 308 _capacity = new_capacity; 309 } else { 310 if (PrintGCDetails && Verbose) { 311 // Failed to double capacity, continue; 312 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 313 SIZE_FORMAT"K to " SIZE_FORMAT"K", 314 _capacity / K, new_capacity / K); 315 } 316 } 317 } 318 319 void CMMarkStack::set_should_expand() { 320 // If we're resetting the marking state because of an 321 // marking stack overflow, record that we should, if 322 // possible, expand the stack. 323 _should_expand = _cm->has_overflown(); 324 } 325 326 CMMarkStack::~CMMarkStack() { 327 if (_base != NULL) { 328 _base = NULL; 329 _virtual_space.release(); 330 } 331 } 332 333 void CMMarkStack::par_push(oop ptr) { 334 while (true) { 335 if (isFull()) { 336 _overflow = true; 337 return; 338 } 339 // Otherwise... 340 jint index = _index; 341 jint next_index = index+1; 342 jint res = Atomic::cmpxchg(next_index, &_index, index); 343 if (res == index) { 344 _base[index] = ptr; 345 // Note that we don't maintain this atomically. We could, but it 346 // doesn't seem necessary. 347 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 348 return; 349 } 350 // Otherwise, we need to try again. 351 } 352 } 353 354 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 355 while (true) { 356 if (isFull()) { 357 _overflow = true; 358 return; 359 } 360 // Otherwise... 361 jint index = _index; 362 jint next_index = index + n; 363 if (next_index > _capacity) { 364 _overflow = true; 365 return; 366 } 367 jint res = Atomic::cmpxchg(next_index, &_index, index); 368 if (res == index) { 369 for (int i = 0; i < n; i++) { 370 int ind = index + i; 371 assert(ind < _capacity, "By overflow test above."); 372 _base[ind] = ptr_arr[i]; 373 } 374 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 375 return; 376 } 377 // Otherwise, we need to try again. 378 } 379 } 380 381 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 382 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 383 jint start = _index; 384 jint next_index = start + n; 385 if (next_index > _capacity) { 386 _overflow = true; 387 return; 388 } 389 // Otherwise. 390 _index = next_index; 391 for (int i = 0; i < n; i++) { 392 int ind = start + i; 393 assert(ind < _capacity, "By overflow test above."); 394 _base[ind] = ptr_arr[i]; 395 } 396 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 397 } 398 399 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 400 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 401 jint index = _index; 402 if (index == 0) { 403 *n = 0; 404 return false; 405 } else { 406 int k = MIN2(max, index); 407 jint new_ind = index - k; 408 for (int j = 0; j < k; j++) { 409 ptr_arr[j] = _base[new_ind + j]; 410 } 411 _index = new_ind; 412 *n = k; 413 return true; 414 } 415 } 416 417 template<class OopClosureClass> 418 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 419 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 420 || SafepointSynchronize::is_at_safepoint(), 421 "Drain recursion must be yield-safe."); 422 bool res = true; 423 debug_only(_drain_in_progress = true); 424 debug_only(_drain_in_progress_yields = yield_after); 425 while (!isEmpty()) { 426 oop newOop = pop(); 427 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 428 assert(newOop->is_oop(), "Expected an oop"); 429 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 430 "only grey objects on this stack"); 431 newOop->oop_iterate(cl); 432 if (yield_after && _cm->do_yield_check()) { 433 res = false; 434 break; 435 } 436 } 437 debug_only(_drain_in_progress = false); 438 return res; 439 } 440 441 void CMMarkStack::note_start_of_gc() { 442 assert(_saved_index == -1, 443 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 444 _saved_index = _index; 445 } 446 447 void CMMarkStack::note_end_of_gc() { 448 // This is intentionally a guarantee, instead of an assert. If we 449 // accidentally add something to the mark stack during GC, it 450 // will be a correctness issue so it's better if we crash. we'll 451 // only check this once per GC anyway, so it won't be a performance 452 // issue in any way. 453 guarantee(_saved_index == _index, 454 err_msg("saved index: %d index: %d", _saved_index, _index)); 455 _saved_index = -1; 456 } 457 458 void CMMarkStack::oops_do(OopClosure* f) { 459 assert(_saved_index == _index, 460 err_msg("saved index: %d index: %d", _saved_index, _index)); 461 for (int i = 0; i < _index; i += 1) { 462 f->do_oop(&_base[i]); 463 } 464 } 465 466 CMRootRegions::CMRootRegions() : 467 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 468 _should_abort(false), _next_survivor(NULL) { } 469 470 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 471 _young_list = g1h->young_list(); 472 _cm = cm; 473 } 474 475 void CMRootRegions::prepare_for_scan() { 476 assert(!scan_in_progress(), "pre-condition"); 477 478 // Currently, only survivors can be root regions. 479 assert(_next_survivor == NULL, "pre-condition"); 480 _next_survivor = _young_list->first_survivor_region(); 481 _scan_in_progress = (_next_survivor != NULL); 482 _should_abort = false; 483 } 484 485 HeapRegion* CMRootRegions::claim_next() { 486 if (_should_abort) { 487 // If someone has set the should_abort flag, we return NULL to 488 // force the caller to bail out of their loop. 489 return NULL; 490 } 491 492 // Currently, only survivors can be root regions. 493 HeapRegion* res = _next_survivor; 494 if (res != NULL) { 495 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 496 // Read it again in case it changed while we were waiting for the lock. 497 res = _next_survivor; 498 if (res != NULL) { 499 if (res == _young_list->last_survivor_region()) { 500 // We just claimed the last survivor so store NULL to indicate 501 // that we're done. 502 _next_survivor = NULL; 503 } else { 504 _next_survivor = res->get_next_young_region(); 505 } 506 } else { 507 // Someone else claimed the last survivor while we were trying 508 // to take the lock so nothing else to do. 509 } 510 } 511 assert(res == NULL || res->is_survivor(), "post-condition"); 512 513 return res; 514 } 515 516 void CMRootRegions::scan_finished() { 517 assert(scan_in_progress(), "pre-condition"); 518 519 // Currently, only survivors can be root regions. 520 if (!_should_abort) { 521 assert(_next_survivor == NULL, "we should have claimed all survivors"); 522 } 523 _next_survivor = NULL; 524 525 { 526 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 527 _scan_in_progress = false; 528 RootRegionScan_lock->notify_all(); 529 } 530 } 531 532 bool CMRootRegions::wait_until_scan_finished() { 533 if (!scan_in_progress()) return false; 534 535 { 536 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 537 while (scan_in_progress()) { 538 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 539 } 540 } 541 return true; 542 } 543 544 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 545 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 546 #endif // _MSC_VER 547 548 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 549 return MAX2((n_par_threads + 2) / 4, 1U); 550 } 551 552 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 553 _g1h(g1h), 554 _markBitMap1(), 555 _markBitMap2(), 556 _parallel_marking_threads(0), 557 _max_parallel_marking_threads(0), 558 _sleep_factor(0.0), 559 _marking_task_overhead(1.0), 560 _cleanup_sleep_factor(0.0), 561 _cleanup_task_overhead(1.0), 562 _cleanup_list("Cleanup List"), 563 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 564 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> 565 CardTableModRefBS::card_shift, 566 false /* in_resource_area*/), 567 568 _prevMarkBitMap(&_markBitMap1), 569 _nextMarkBitMap(&_markBitMap2), 570 571 _markStack(this), 572 // _finger set in set_non_marking_state 573 574 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)), 575 // _active_tasks set in set_non_marking_state 576 // _tasks set inside the constructor 577 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 578 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 579 580 _has_overflown(false), 581 _concurrent(false), 582 _has_aborted(false), 583 _aborted_gc_id(GCId::undefined()), 584 _restart_for_overflow(false), 585 _concurrent_marking_in_progress(false), 586 587 // _verbose_level set below 588 589 _init_times(), 590 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 591 _cleanup_times(), 592 _total_counting_time(0.0), 593 _total_rs_scrub_time(0.0), 594 595 _parallel_workers(NULL), 596 597 _count_card_bitmaps(NULL), 598 _count_marked_bytes(NULL), 599 _completed_initialization(false) { 600 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 601 if (verbose_level < no_verbose) { 602 verbose_level = no_verbose; 603 } 604 if (verbose_level > high_verbose) { 605 verbose_level = high_verbose; 606 } 607 _verbose_level = verbose_level; 608 609 if (verbose_low()) { 610 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 611 "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end)); 612 } 613 614 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 615 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 616 617 // Create & start a ConcurrentMark thread. 618 _cmThread = new ConcurrentMarkThread(this); 619 assert(cmThread() != NULL, "CM Thread should have been created"); 620 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 621 if (_cmThread->osthread() == NULL) { 622 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 623 } 624 625 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 626 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 627 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 628 629 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 630 satb_qs.set_buffer_size(G1SATBBufferSize); 631 632 _root_regions.init(_g1h, this); 633 634 if (ConcGCThreads > ParallelGCThreads) { 635 warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") " 636 "than ParallelGCThreads (" UINTX_FORMAT ").", 637 ConcGCThreads, ParallelGCThreads); 638 return; 639 } 640 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 641 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 642 // if both are set 643 _sleep_factor = 0.0; 644 _marking_task_overhead = 1.0; 645 } else if (G1MarkingOverheadPercent > 0) { 646 // We will calculate the number of parallel marking threads based 647 // on a target overhead with respect to the soft real-time goal 648 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 649 double overall_cm_overhead = 650 (double) MaxGCPauseMillis * marking_overhead / 651 (double) GCPauseIntervalMillis; 652 double cpu_ratio = 1.0 / (double) os::processor_count(); 653 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 654 double marking_task_overhead = 655 overall_cm_overhead / marking_thread_num * 656 (double) os::processor_count(); 657 double sleep_factor = 658 (1.0 - marking_task_overhead) / marking_task_overhead; 659 660 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num); 661 _sleep_factor = sleep_factor; 662 _marking_task_overhead = marking_task_overhead; 663 } else { 664 // Calculate the number of parallel marking threads by scaling 665 // the number of parallel GC threads. 666 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads); 667 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num); 668 _sleep_factor = 0.0; 669 _marking_task_overhead = 1.0; 670 } 671 672 assert(ConcGCThreads > 0, "Should have been set"); 673 _parallel_marking_threads = (uint) ConcGCThreads; 674 _max_parallel_marking_threads = _parallel_marking_threads; 675 676 if (parallel_marking_threads() > 1) { 677 _cleanup_task_overhead = 1.0; 678 } else { 679 _cleanup_task_overhead = marking_task_overhead(); 680 } 681 _cleanup_sleep_factor = 682 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 683 684 #if 0 685 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 686 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 687 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 688 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 689 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 690 #endif 691 692 _parallel_workers = new FlexibleWorkGang("G1 Marker", 693 _max_parallel_marking_threads, false, true); 694 if (_parallel_workers == NULL) { 695 vm_exit_during_initialization("Failed necessary allocation."); 696 } else { 697 _parallel_workers->initialize_workers(); 698 } 699 700 if (FLAG_IS_DEFAULT(MarkStackSize)) { 701 size_t mark_stack_size = 702 MIN2(MarkStackSizeMax, 703 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 704 // Verify that the calculated value for MarkStackSize is in range. 705 // It would be nice to use the private utility routine from Arguments. 706 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 707 warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 708 "must be between 1 and " SIZE_FORMAT, 709 mark_stack_size, MarkStackSizeMax); 710 return; 711 } 712 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 713 } else { 714 // Verify MarkStackSize is in range. 715 if (FLAG_IS_CMDLINE(MarkStackSize)) { 716 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 717 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 718 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 719 "must be between 1 and " SIZE_FORMAT, 720 MarkStackSize, MarkStackSizeMax); 721 return; 722 } 723 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 724 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 725 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 726 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 727 MarkStackSize, MarkStackSizeMax); 728 return; 729 } 730 } 731 } 732 } 733 734 if (!_markStack.allocate(MarkStackSize)) { 735 warning("Failed to allocate CM marking stack"); 736 return; 737 } 738 739 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 740 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 741 742 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 743 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 744 745 BitMap::idx_t card_bm_size = _card_bm.size(); 746 747 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 748 _active_tasks = _max_worker_id; 749 750 uint max_regions = _g1h->max_regions(); 751 for (uint i = 0; i < _max_worker_id; ++i) { 752 CMTaskQueue* task_queue = new CMTaskQueue(); 753 task_queue->initialize(); 754 _task_queues->register_queue(i, task_queue); 755 756 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 757 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 758 759 _tasks[i] = new CMTask(i, this, 760 _count_marked_bytes[i], 761 &_count_card_bitmaps[i], 762 task_queue, _task_queues); 763 764 _accum_task_vtime[i] = 0.0; 765 } 766 767 // Calculate the card number for the bottom of the heap. Used 768 // in biasing indexes into the accounting card bitmaps. 769 _heap_bottom_card_num = 770 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 771 CardTableModRefBS::card_shift); 772 773 // Clear all the liveness counting data 774 clear_all_count_data(); 775 776 // so that the call below can read a sensible value 777 _heap_start = g1h->reserved_region().start(); 778 set_non_marking_state(); 779 _completed_initialization = true; 780 } 781 782 void ConcurrentMark::reset() { 783 // Starting values for these two. This should be called in a STW 784 // phase. 785 MemRegion reserved = _g1h->g1_reserved(); 786 _heap_start = reserved.start(); 787 _heap_end = reserved.end(); 788 789 // Separated the asserts so that we know which one fires. 790 assert(_heap_start != NULL, "heap bounds should look ok"); 791 assert(_heap_end != NULL, "heap bounds should look ok"); 792 assert(_heap_start < _heap_end, "heap bounds should look ok"); 793 794 // Reset all the marking data structures and any necessary flags 795 reset_marking_state(); 796 797 if (verbose_low()) { 798 gclog_or_tty->print_cr("[global] resetting"); 799 } 800 801 // We do reset all of them, since different phases will use 802 // different number of active threads. So, it's easiest to have all 803 // of them ready. 804 for (uint i = 0; i < _max_worker_id; ++i) { 805 _tasks[i]->reset(_nextMarkBitMap); 806 } 807 808 // we need this to make sure that the flag is on during the evac 809 // pause with initial mark piggy-backed 810 set_concurrent_marking_in_progress(); 811 } 812 813 814 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 815 _markStack.set_should_expand(); 816 _markStack.setEmpty(); // Also clears the _markStack overflow flag 817 if (clear_overflow) { 818 clear_has_overflown(); 819 } else { 820 assert(has_overflown(), "pre-condition"); 821 } 822 _finger = _heap_start; 823 824 for (uint i = 0; i < _max_worker_id; ++i) { 825 CMTaskQueue* queue = _task_queues->queue(i); 826 queue->set_empty(); 827 } 828 } 829 830 void ConcurrentMark::set_concurrency(uint active_tasks) { 831 assert(active_tasks <= _max_worker_id, "we should not have more"); 832 833 _active_tasks = active_tasks; 834 // Need to update the three data structures below according to the 835 // number of active threads for this phase. 836 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 837 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 838 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 839 } 840 841 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 842 set_concurrency(active_tasks); 843 844 _concurrent = concurrent; 845 // We propagate this to all tasks, not just the active ones. 846 for (uint i = 0; i < _max_worker_id; ++i) 847 _tasks[i]->set_concurrent(concurrent); 848 849 if (concurrent) { 850 set_concurrent_marking_in_progress(); 851 } else { 852 // We currently assume that the concurrent flag has been set to 853 // false before we start remark. At this point we should also be 854 // in a STW phase. 855 assert(!concurrent_marking_in_progress(), "invariant"); 856 assert(out_of_regions(), 857 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT, 858 p2i(_finger), p2i(_heap_end))); 859 } 860 } 861 862 void ConcurrentMark::set_non_marking_state() { 863 // We set the global marking state to some default values when we're 864 // not doing marking. 865 reset_marking_state(); 866 _active_tasks = 0; 867 clear_concurrent_marking_in_progress(); 868 } 869 870 ConcurrentMark::~ConcurrentMark() { 871 // The ConcurrentMark instance is never freed. 872 ShouldNotReachHere(); 873 } 874 875 void ConcurrentMark::clearNextBitmap() { 876 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 877 878 // Make sure that the concurrent mark thread looks to still be in 879 // the current cycle. 880 guarantee(cmThread()->during_cycle(), "invariant"); 881 882 // We are finishing up the current cycle by clearing the next 883 // marking bitmap and getting it ready for the next cycle. During 884 // this time no other cycle can start. So, let's make sure that this 885 // is the case. 886 guarantee(!g1h->mark_in_progress(), "invariant"); 887 888 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */); 889 ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true); 890 _parallel_workers->run_task(&task); 891 892 // Clear the liveness counting data. If the marking has been aborted, the abort() 893 // call already did that. 894 if (cl.complete()) { 895 clear_all_count_data(); 896 } 897 898 // Repeat the asserts from above. 899 guarantee(cmThread()->during_cycle(), "invariant"); 900 guarantee(!g1h->mark_in_progress(), "invariant"); 901 } 902 903 class CheckBitmapClearHRClosure : public HeapRegionClosure { 904 CMBitMap* _bitmap; 905 bool _error; 906 public: 907 CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) { 908 } 909 910 virtual bool doHeapRegion(HeapRegion* r) { 911 // This closure can be called concurrently to the mutator, so we must make sure 912 // that the result of the getNextMarkedWordAddress() call is compared to the 913 // value passed to it as limit to detect any found bits. 914 // We can use the region's orig_end() for the limit and the comparison value 915 // as it always contains the "real" end of the region that never changes and 916 // has no side effects. 917 // Due to the latter, there can also be no problem with the compiler generating 918 // reloads of the orig_end() call. 919 HeapWord* end = r->orig_end(); 920 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end; 921 } 922 }; 923 924 bool ConcurrentMark::nextMarkBitmapIsClear() { 925 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 926 _g1h->heap_region_iterate(&cl); 927 return cl.complete(); 928 } 929 930 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 931 public: 932 bool doHeapRegion(HeapRegion* r) { 933 if (!r->is_continues_humongous()) { 934 r->note_start_of_marking(); 935 } 936 return false; 937 } 938 }; 939 940 void ConcurrentMark::checkpointRootsInitialPre() { 941 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 942 G1CollectorPolicy* g1p = g1h->g1_policy(); 943 944 _has_aborted = false; 945 946 #ifndef PRODUCT 947 if (G1PrintReachableAtInitialMark) { 948 print_reachable("at-cycle-start", 949 VerifyOption_G1UsePrevMarking, true /* all */); 950 } 951 #endif 952 953 // Initialize marking structures. This has to be done in a STW phase. 954 reset(); 955 956 // For each region note start of marking. 957 NoteStartOfMarkHRClosure startcl; 958 g1h->heap_region_iterate(&startcl); 959 } 960 961 962 void ConcurrentMark::checkpointRootsInitialPost() { 963 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 964 965 // If we force an overflow during remark, the remark operation will 966 // actually abort and we'll restart concurrent marking. If we always 967 // force an overflow during remark we'll never actually complete the 968 // marking phase. So, we initialize this here, at the start of the 969 // cycle, so that at the remaining overflow number will decrease at 970 // every remark and we'll eventually not need to cause one. 971 force_overflow_stw()->init(); 972 973 // Start Concurrent Marking weak-reference discovery. 974 ReferenceProcessor* rp = g1h->ref_processor_cm(); 975 // enable ("weak") refs discovery 976 rp->enable_discovery(); 977 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 978 979 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 980 // This is the start of the marking cycle, we're expected all 981 // threads to have SATB queues with active set to false. 982 satb_mq_set.set_active_all_threads(true, /* new active value */ 983 false /* expected_active */); 984 985 _root_regions.prepare_for_scan(); 986 987 // update_g1_committed() will be called at the end of an evac pause 988 // when marking is on. So, it's also called at the end of the 989 // initial-mark pause to update the heap end, if the heap expands 990 // during it. No need to call it here. 991 } 992 993 /* 994 * Notice that in the next two methods, we actually leave the STS 995 * during the barrier sync and join it immediately afterwards. If we 996 * do not do this, the following deadlock can occur: one thread could 997 * be in the barrier sync code, waiting for the other thread to also 998 * sync up, whereas another one could be trying to yield, while also 999 * waiting for the other threads to sync up too. 1000 * 1001 * Note, however, that this code is also used during remark and in 1002 * this case we should not attempt to leave / enter the STS, otherwise 1003 * we'll either hit an assert (debug / fastdebug) or deadlock 1004 * (product). So we should only leave / enter the STS if we are 1005 * operating concurrently. 1006 * 1007 * Because the thread that does the sync barrier has left the STS, it 1008 * is possible to be suspended for a Full GC or an evacuation pause 1009 * could occur. This is actually safe, since the entering the sync 1010 * barrier is one of the last things do_marking_step() does, and it 1011 * doesn't manipulate any data structures afterwards. 1012 */ 1013 1014 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 1015 if (verbose_low()) { 1016 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id); 1017 } 1018 1019 if (concurrent()) { 1020 SuspendibleThreadSet::leave(); 1021 } 1022 1023 bool barrier_aborted = !_first_overflow_barrier_sync.enter(); 1024 1025 if (concurrent()) { 1026 SuspendibleThreadSet::join(); 1027 } 1028 // at this point everyone should have synced up and not be doing any 1029 // more work 1030 1031 if (verbose_low()) { 1032 if (barrier_aborted) { 1033 gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id); 1034 } else { 1035 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); 1036 } 1037 } 1038 1039 if (barrier_aborted) { 1040 // If the barrier aborted we ignore the overflow condition and 1041 // just abort the whole marking phase as quickly as possible. 1042 return; 1043 } 1044 1045 // If we're executing the concurrent phase of marking, reset the marking 1046 // state; otherwise the marking state is reset after reference processing, 1047 // during the remark pause. 1048 // If we reset here as a result of an overflow during the remark we will 1049 // see assertion failures from any subsequent set_concurrency_and_phase() 1050 // calls. 1051 if (concurrent()) { 1052 // let the task associated with with worker 0 do this 1053 if (worker_id == 0) { 1054 // task 0 is responsible for clearing the global data structures 1055 // We should be here because of an overflow. During STW we should 1056 // not clear the overflow flag since we rely on it being true when 1057 // we exit this method to abort the pause and restart concurrent 1058 // marking. 1059 reset_marking_state(true /* clear_overflow */); 1060 force_overflow()->update(); 1061 1062 if (G1Log::fine()) { 1063 gclog_or_tty->gclog_stamp(concurrent_gc_id()); 1064 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 1065 } 1066 } 1067 } 1068 1069 // after this, each task should reset its own data structures then 1070 // then go into the second barrier 1071 } 1072 1073 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 1074 if (verbose_low()) { 1075 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id); 1076 } 1077 1078 if (concurrent()) { 1079 SuspendibleThreadSet::leave(); 1080 } 1081 1082 bool barrier_aborted = !_second_overflow_barrier_sync.enter(); 1083 1084 if (concurrent()) { 1085 SuspendibleThreadSet::join(); 1086 } 1087 // at this point everything should be re-initialized and ready to go 1088 1089 if (verbose_low()) { 1090 if (barrier_aborted) { 1091 gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id); 1092 } else { 1093 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); 1094 } 1095 } 1096 } 1097 1098 #ifndef PRODUCT 1099 void ForceOverflowSettings::init() { 1100 _num_remaining = G1ConcMarkForceOverflow; 1101 _force = false; 1102 update(); 1103 } 1104 1105 void ForceOverflowSettings::update() { 1106 if (_num_remaining > 0) { 1107 _num_remaining -= 1; 1108 _force = true; 1109 } else { 1110 _force = false; 1111 } 1112 } 1113 1114 bool ForceOverflowSettings::should_force() { 1115 if (_force) { 1116 _force = false; 1117 return true; 1118 } else { 1119 return false; 1120 } 1121 } 1122 #endif // !PRODUCT 1123 1124 class CMConcurrentMarkingTask: public AbstractGangTask { 1125 private: 1126 ConcurrentMark* _cm; 1127 ConcurrentMarkThread* _cmt; 1128 1129 public: 1130 void work(uint worker_id) { 1131 assert(Thread::current()->is_ConcurrentGC_thread(), 1132 "this should only be done by a conc GC thread"); 1133 ResourceMark rm; 1134 1135 double start_vtime = os::elapsedVTime(); 1136 1137 SuspendibleThreadSet::join(); 1138 1139 assert(worker_id < _cm->active_tasks(), "invariant"); 1140 CMTask* the_task = _cm->task(worker_id); 1141 the_task->record_start_time(); 1142 if (!_cm->has_aborted()) { 1143 do { 1144 double start_vtime_sec = os::elapsedVTime(); 1145 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1146 1147 the_task->do_marking_step(mark_step_duration_ms, 1148 true /* do_termination */, 1149 false /* is_serial*/); 1150 1151 double end_vtime_sec = os::elapsedVTime(); 1152 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1153 _cm->clear_has_overflown(); 1154 1155 _cm->do_yield_check(worker_id); 1156 1157 jlong sleep_time_ms; 1158 if (!_cm->has_aborted() && the_task->has_aborted()) { 1159 sleep_time_ms = 1160 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1161 SuspendibleThreadSet::leave(); 1162 os::sleep(Thread::current(), sleep_time_ms, false); 1163 SuspendibleThreadSet::join(); 1164 } 1165 } while (!_cm->has_aborted() && the_task->has_aborted()); 1166 } 1167 the_task->record_end_time(); 1168 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1169 1170 SuspendibleThreadSet::leave(); 1171 1172 double end_vtime = os::elapsedVTime(); 1173 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1174 } 1175 1176 CMConcurrentMarkingTask(ConcurrentMark* cm, 1177 ConcurrentMarkThread* cmt) : 1178 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1179 1180 ~CMConcurrentMarkingTask() { } 1181 }; 1182 1183 // Calculates the number of active workers for a concurrent 1184 // phase. 1185 uint ConcurrentMark::calc_parallel_marking_threads() { 1186 uint n_conc_workers = 0; 1187 if (!UseDynamicNumberOfGCThreads || 1188 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1189 !ForceDynamicNumberOfGCThreads)) { 1190 n_conc_workers = max_parallel_marking_threads(); 1191 } else { 1192 n_conc_workers = 1193 AdaptiveSizePolicy::calc_default_active_workers( 1194 max_parallel_marking_threads(), 1195 1, /* Minimum workers */ 1196 parallel_marking_threads(), 1197 Threads::number_of_non_daemon_threads()); 1198 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1199 // that scaling has already gone into "_max_parallel_marking_threads". 1200 } 1201 assert(n_conc_workers > 0, "Always need at least 1"); 1202 return n_conc_workers; 1203 } 1204 1205 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1206 // Currently, only survivors can be root regions. 1207 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1208 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1209 1210 const uintx interval = PrefetchScanIntervalInBytes; 1211 HeapWord* curr = hr->bottom(); 1212 const HeapWord* end = hr->top(); 1213 while (curr < end) { 1214 Prefetch::read(curr, interval); 1215 oop obj = oop(curr); 1216 int size = obj->oop_iterate(&cl); 1217 assert(size == obj->size(), "sanity"); 1218 curr += size; 1219 } 1220 } 1221 1222 class CMRootRegionScanTask : public AbstractGangTask { 1223 private: 1224 ConcurrentMark* _cm; 1225 1226 public: 1227 CMRootRegionScanTask(ConcurrentMark* cm) : 1228 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1229 1230 void work(uint worker_id) { 1231 assert(Thread::current()->is_ConcurrentGC_thread(), 1232 "this should only be done by a conc GC thread"); 1233 1234 CMRootRegions* root_regions = _cm->root_regions(); 1235 HeapRegion* hr = root_regions->claim_next(); 1236 while (hr != NULL) { 1237 _cm->scanRootRegion(hr, worker_id); 1238 hr = root_regions->claim_next(); 1239 } 1240 } 1241 }; 1242 1243 void ConcurrentMark::scanRootRegions() { 1244 // Start of concurrent marking. 1245 ClassLoaderDataGraph::clear_claimed_marks(); 1246 1247 // scan_in_progress() will have been set to true only if there was 1248 // at least one root region to scan. So, if it's false, we 1249 // should not attempt to do any further work. 1250 if (root_regions()->scan_in_progress()) { 1251 _parallel_marking_threads = calc_parallel_marking_threads(); 1252 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1253 "Maximum number of marking threads exceeded"); 1254 uint active_workers = MAX2(1U, parallel_marking_threads()); 1255 1256 CMRootRegionScanTask task(this); 1257 _parallel_workers->set_active_workers(active_workers); 1258 _parallel_workers->run_task(&task); 1259 1260 // It's possible that has_aborted() is true here without actually 1261 // aborting the survivor scan earlier. This is OK as it's 1262 // mainly used for sanity checking. 1263 root_regions()->scan_finished(); 1264 } 1265 } 1266 1267 void ConcurrentMark::markFromRoots() { 1268 // we might be tempted to assert that: 1269 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1270 // "inconsistent argument?"); 1271 // However that wouldn't be right, because it's possible that 1272 // a safepoint is indeed in progress as a younger generation 1273 // stop-the-world GC happens even as we mark in this generation. 1274 1275 _restart_for_overflow = false; 1276 force_overflow_conc()->init(); 1277 1278 // _g1h has _n_par_threads 1279 _parallel_marking_threads = calc_parallel_marking_threads(); 1280 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1281 "Maximum number of marking threads exceeded"); 1282 1283 uint active_workers = MAX2(1U, parallel_marking_threads()); 1284 1285 // Parallel task terminator is set in "set_concurrency_and_phase()" 1286 set_concurrency_and_phase(active_workers, true /* concurrent */); 1287 1288 CMConcurrentMarkingTask markingTask(this, cmThread()); 1289 _parallel_workers->set_active_workers(active_workers); 1290 // Don't set _n_par_threads because it affects MT in process_roots() 1291 // and the decisions on that MT processing is made elsewhere. 1292 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1293 _parallel_workers->run_task(&markingTask); 1294 print_stats(); 1295 } 1296 1297 // Helper class to get rid of some boilerplate code. 1298 class G1CMTraceTime : public GCTraceTime { 1299 static bool doit_and_prepend(bool doit) { 1300 if (doit) { 1301 gclog_or_tty->put(' '); 1302 } 1303 return doit; 1304 } 1305 1306 public: 1307 G1CMTraceTime(const char* title, bool doit) 1308 : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(), 1309 G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) { 1310 } 1311 }; 1312 1313 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1314 // world is stopped at this checkpoint 1315 assert(SafepointSynchronize::is_at_safepoint(), 1316 "world should be stopped"); 1317 1318 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1319 1320 // If a full collection has happened, we shouldn't do this. 1321 if (has_aborted()) { 1322 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1323 return; 1324 } 1325 1326 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1327 1328 if (VerifyDuringGC) { 1329 HandleMark hm; // handle scope 1330 g1h->prepare_for_verify(); 1331 Universe::verify(VerifyOption_G1UsePrevMarking, 1332 " VerifyDuringGC:(before)"); 1333 } 1334 g1h->check_bitmaps("Remark Start"); 1335 1336 G1CollectorPolicy* g1p = g1h->g1_policy(); 1337 g1p->record_concurrent_mark_remark_start(); 1338 1339 double start = os::elapsedTime(); 1340 1341 checkpointRootsFinalWork(); 1342 1343 double mark_work_end = os::elapsedTime(); 1344 1345 weakRefsWork(clear_all_soft_refs); 1346 1347 if (has_overflown()) { 1348 // Oops. We overflowed. Restart concurrent marking. 1349 _restart_for_overflow = true; 1350 if (G1TraceMarkStackOverflow) { 1351 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1352 } 1353 1354 // Verify the heap w.r.t. the previous marking bitmap. 1355 if (VerifyDuringGC) { 1356 HandleMark hm; // handle scope 1357 g1h->prepare_for_verify(); 1358 Universe::verify(VerifyOption_G1UsePrevMarking, 1359 " VerifyDuringGC:(overflow)"); 1360 } 1361 1362 // Clear the marking state because we will be restarting 1363 // marking due to overflowing the global mark stack. 1364 reset_marking_state(); 1365 } else { 1366 { 1367 G1CMTraceTime trace("GC aggregate-data", G1Log::finer()); 1368 1369 // Aggregate the per-task counting data that we have accumulated 1370 // while marking. 1371 aggregate_count_data(); 1372 } 1373 1374 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1375 // We're done with marking. 1376 // This is the end of the marking cycle, we're expected all 1377 // threads to have SATB queues with active set to true. 1378 satb_mq_set.set_active_all_threads(false, /* new active value */ 1379 true /* expected_active */); 1380 1381 if (VerifyDuringGC) { 1382 HandleMark hm; // handle scope 1383 g1h->prepare_for_verify(); 1384 Universe::verify(VerifyOption_G1UseNextMarking, 1385 " VerifyDuringGC:(after)"); 1386 } 1387 g1h->check_bitmaps("Remark End"); 1388 assert(!restart_for_overflow(), "sanity"); 1389 // Completely reset the marking state since marking completed 1390 set_non_marking_state(); 1391 } 1392 1393 // Expand the marking stack, if we have to and if we can. 1394 if (_markStack.should_expand()) { 1395 _markStack.expand(); 1396 } 1397 1398 // Statistics 1399 double now = os::elapsedTime(); 1400 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1401 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1402 _remark_times.add((now - start) * 1000.0); 1403 1404 g1p->record_concurrent_mark_remark_end(); 1405 1406 G1CMIsAliveClosure is_alive(g1h); 1407 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1408 } 1409 1410 // Base class of the closures that finalize and verify the 1411 // liveness counting data. 1412 class CMCountDataClosureBase: public HeapRegionClosure { 1413 protected: 1414 G1CollectedHeap* _g1h; 1415 ConcurrentMark* _cm; 1416 CardTableModRefBS* _ct_bs; 1417 1418 BitMap* _region_bm; 1419 BitMap* _card_bm; 1420 1421 // Takes a region that's not empty (i.e., it has at least one 1422 // live object in it and sets its corresponding bit on the region 1423 // bitmap to 1. If the region is "starts humongous" it will also set 1424 // to 1 the bits on the region bitmap that correspond to its 1425 // associated "continues humongous" regions. 1426 void set_bit_for_region(HeapRegion* hr) { 1427 assert(!hr->is_continues_humongous(), "should have filtered those out"); 1428 1429 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1430 if (!hr->is_starts_humongous()) { 1431 // Normal (non-humongous) case: just set the bit. 1432 _region_bm->par_at_put(index, true); 1433 } else { 1434 // Starts humongous case: calculate how many regions are part of 1435 // this humongous region and then set the bit range. 1436 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1437 _region_bm->par_at_put_range(index, end_index, true); 1438 } 1439 } 1440 1441 public: 1442 CMCountDataClosureBase(G1CollectedHeap* g1h, 1443 BitMap* region_bm, BitMap* card_bm): 1444 _g1h(g1h), _cm(g1h->concurrent_mark()), 1445 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 1446 _region_bm(region_bm), _card_bm(card_bm) { } 1447 }; 1448 1449 // Closure that calculates the # live objects per region. Used 1450 // for verification purposes during the cleanup pause. 1451 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1452 CMBitMapRO* _bm; 1453 size_t _region_marked_bytes; 1454 1455 public: 1456 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1457 BitMap* region_bm, BitMap* card_bm) : 1458 CMCountDataClosureBase(g1h, region_bm, card_bm), 1459 _bm(bm), _region_marked_bytes(0) { } 1460 1461 bool doHeapRegion(HeapRegion* hr) { 1462 1463 if (hr->is_continues_humongous()) { 1464 // We will ignore these here and process them when their 1465 // associated "starts humongous" region is processed (see 1466 // set_bit_for_heap_region()). Note that we cannot rely on their 1467 // associated "starts humongous" region to have their bit set to 1468 // 1 since, due to the region chunking in the parallel region 1469 // iteration, a "continues humongous" region might be visited 1470 // before its associated "starts humongous". 1471 return false; 1472 } 1473 1474 HeapWord* ntams = hr->next_top_at_mark_start(); 1475 HeapWord* start = hr->bottom(); 1476 1477 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1478 err_msg("Preconditions not met - " 1479 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT, 1480 p2i(start), p2i(ntams), p2i(hr->end()))); 1481 1482 // Find the first marked object at or after "start". 1483 start = _bm->getNextMarkedWordAddress(start, ntams); 1484 1485 size_t marked_bytes = 0; 1486 1487 while (start < ntams) { 1488 oop obj = oop(start); 1489 int obj_sz = obj->size(); 1490 HeapWord* obj_end = start + obj_sz; 1491 1492 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1493 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1494 1495 // Note: if we're looking at the last region in heap - obj_end 1496 // could be actually just beyond the end of the heap; end_idx 1497 // will then correspond to a (non-existent) card that is also 1498 // just beyond the heap. 1499 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1500 // end of object is not card aligned - increment to cover 1501 // all the cards spanned by the object 1502 end_idx += 1; 1503 } 1504 1505 // Set the bits in the card BM for the cards spanned by this object. 1506 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1507 1508 // Add the size of this object to the number of marked bytes. 1509 marked_bytes += (size_t)obj_sz * HeapWordSize; 1510 1511 // Find the next marked object after this one. 1512 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1513 } 1514 1515 // Mark the allocated-since-marking portion... 1516 HeapWord* top = hr->top(); 1517 if (ntams < top) { 1518 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1519 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1520 1521 // Note: if we're looking at the last region in heap - top 1522 // could be actually just beyond the end of the heap; end_idx 1523 // will then correspond to a (non-existent) card that is also 1524 // just beyond the heap. 1525 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1526 // end of object is not card aligned - increment to cover 1527 // all the cards spanned by the object 1528 end_idx += 1; 1529 } 1530 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1531 1532 // This definitely means the region has live objects. 1533 set_bit_for_region(hr); 1534 } 1535 1536 // Update the live region bitmap. 1537 if (marked_bytes > 0) { 1538 set_bit_for_region(hr); 1539 } 1540 1541 // Set the marked bytes for the current region so that 1542 // it can be queried by a calling verification routine 1543 _region_marked_bytes = marked_bytes; 1544 1545 return false; 1546 } 1547 1548 size_t region_marked_bytes() const { return _region_marked_bytes; } 1549 }; 1550 1551 // Heap region closure used for verifying the counting data 1552 // that was accumulated concurrently and aggregated during 1553 // the remark pause. This closure is applied to the heap 1554 // regions during the STW cleanup pause. 1555 1556 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1557 G1CollectedHeap* _g1h; 1558 ConcurrentMark* _cm; 1559 CalcLiveObjectsClosure _calc_cl; 1560 BitMap* _region_bm; // Region BM to be verified 1561 BitMap* _card_bm; // Card BM to be verified 1562 bool _verbose; // verbose output? 1563 1564 BitMap* _exp_region_bm; // Expected Region BM values 1565 BitMap* _exp_card_bm; // Expected card BM values 1566 1567 int _failures; 1568 1569 public: 1570 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1571 BitMap* region_bm, 1572 BitMap* card_bm, 1573 BitMap* exp_region_bm, 1574 BitMap* exp_card_bm, 1575 bool verbose) : 1576 _g1h(g1h), _cm(g1h->concurrent_mark()), 1577 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1578 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1579 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1580 _failures(0) { } 1581 1582 int failures() const { return _failures; } 1583 1584 bool doHeapRegion(HeapRegion* hr) { 1585 if (hr->is_continues_humongous()) { 1586 // We will ignore these here and process them when their 1587 // associated "starts humongous" region is processed (see 1588 // set_bit_for_heap_region()). Note that we cannot rely on their 1589 // associated "starts humongous" region to have their bit set to 1590 // 1 since, due to the region chunking in the parallel region 1591 // iteration, a "continues humongous" region might be visited 1592 // before its associated "starts humongous". 1593 return false; 1594 } 1595 1596 int failures = 0; 1597 1598 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1599 // this region and set the corresponding bits in the expected region 1600 // and card bitmaps. 1601 bool res = _calc_cl.doHeapRegion(hr); 1602 assert(res == false, "should be continuing"); 1603 1604 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1605 Mutex::_no_safepoint_check_flag); 1606 1607 // Verify the marked bytes for this region. 1608 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1609 size_t act_marked_bytes = hr->next_marked_bytes(); 1610 1611 // We're not OK if expected marked bytes > actual marked bytes. It means 1612 // we have missed accounting some objects during the actual marking. 1613 if (exp_marked_bytes > act_marked_bytes) { 1614 if (_verbose) { 1615 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1616 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1617 hr->hrm_index(), exp_marked_bytes, act_marked_bytes); 1618 } 1619 failures += 1; 1620 } 1621 1622 // Verify the bit, for this region, in the actual and expected 1623 // (which was just calculated) region bit maps. 1624 // We're not OK if the bit in the calculated expected region 1625 // bitmap is set and the bit in the actual region bitmap is not. 1626 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1627 1628 bool expected = _exp_region_bm->at(index); 1629 bool actual = _region_bm->at(index); 1630 if (expected && !actual) { 1631 if (_verbose) { 1632 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1633 "expected: %s, actual: %s", 1634 hr->hrm_index(), 1635 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1636 } 1637 failures += 1; 1638 } 1639 1640 // Verify that the card bit maps for the cards spanned by the current 1641 // region match. We have an error if we have a set bit in the expected 1642 // bit map and the corresponding bit in the actual bitmap is not set. 1643 1644 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1645 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1646 1647 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1648 expected = _exp_card_bm->at(i); 1649 actual = _card_bm->at(i); 1650 1651 if (expected && !actual) { 1652 if (_verbose) { 1653 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1654 "expected: %s, actual: %s", 1655 hr->hrm_index(), i, 1656 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1657 } 1658 failures += 1; 1659 } 1660 } 1661 1662 if (failures > 0 && _verbose) { 1663 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1664 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1665 HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()), 1666 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1667 } 1668 1669 _failures += failures; 1670 1671 // We could stop iteration over the heap when we 1672 // find the first violating region by returning true. 1673 return false; 1674 } 1675 }; 1676 1677 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1678 protected: 1679 G1CollectedHeap* _g1h; 1680 ConcurrentMark* _cm; 1681 BitMap* _actual_region_bm; 1682 BitMap* _actual_card_bm; 1683 1684 uint _n_workers; 1685 1686 BitMap* _expected_region_bm; 1687 BitMap* _expected_card_bm; 1688 1689 int _failures; 1690 bool _verbose; 1691 1692 HeapRegionClaimer _hrclaimer; 1693 1694 public: 1695 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1696 BitMap* region_bm, BitMap* card_bm, 1697 BitMap* expected_region_bm, BitMap* expected_card_bm) 1698 : AbstractGangTask("G1 verify final counting"), 1699 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1700 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1701 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1702 _failures(0), _verbose(false), 1703 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1704 assert(VerifyDuringGC, "don't call this otherwise"); 1705 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1706 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1707 1708 _verbose = _cm->verbose_medium(); 1709 } 1710 1711 void work(uint worker_id) { 1712 assert(worker_id < _n_workers, "invariant"); 1713 1714 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1715 _actual_region_bm, _actual_card_bm, 1716 _expected_region_bm, 1717 _expected_card_bm, 1718 _verbose); 1719 1720 _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer); 1721 1722 Atomic::add(verify_cl.failures(), &_failures); 1723 } 1724 1725 int failures() const { return _failures; } 1726 }; 1727 1728 // Closure that finalizes the liveness counting data. 1729 // Used during the cleanup pause. 1730 // Sets the bits corresponding to the interval [NTAMS, top] 1731 // (which contains the implicitly live objects) in the 1732 // card liveness bitmap. Also sets the bit for each region, 1733 // containing live data, in the region liveness bitmap. 1734 1735 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1736 public: 1737 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1738 BitMap* region_bm, 1739 BitMap* card_bm) : 1740 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1741 1742 bool doHeapRegion(HeapRegion* hr) { 1743 1744 if (hr->is_continues_humongous()) { 1745 // We will ignore these here and process them when their 1746 // associated "starts humongous" region is processed (see 1747 // set_bit_for_heap_region()). Note that we cannot rely on their 1748 // associated "starts humongous" region to have their bit set to 1749 // 1 since, due to the region chunking in the parallel region 1750 // iteration, a "continues humongous" region might be visited 1751 // before its associated "starts humongous". 1752 return false; 1753 } 1754 1755 HeapWord* ntams = hr->next_top_at_mark_start(); 1756 HeapWord* top = hr->top(); 1757 1758 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1759 1760 // Mark the allocated-since-marking portion... 1761 if (ntams < top) { 1762 // This definitely means the region has live objects. 1763 set_bit_for_region(hr); 1764 1765 // Now set the bits in the card bitmap for [ntams, top) 1766 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1767 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1768 1769 // Note: if we're looking at the last region in heap - top 1770 // could be actually just beyond the end of the heap; end_idx 1771 // will then correspond to a (non-existent) card that is also 1772 // just beyond the heap. 1773 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1774 // end of object is not card aligned - increment to cover 1775 // all the cards spanned by the object 1776 end_idx += 1; 1777 } 1778 1779 assert(end_idx <= _card_bm->size(), 1780 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1781 end_idx, _card_bm->size())); 1782 assert(start_idx < _card_bm->size(), 1783 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1784 start_idx, _card_bm->size())); 1785 1786 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1787 } 1788 1789 // Set the bit for the region if it contains live data 1790 if (hr->next_marked_bytes() > 0) { 1791 set_bit_for_region(hr); 1792 } 1793 1794 return false; 1795 } 1796 }; 1797 1798 class G1ParFinalCountTask: public AbstractGangTask { 1799 protected: 1800 G1CollectedHeap* _g1h; 1801 ConcurrentMark* _cm; 1802 BitMap* _actual_region_bm; 1803 BitMap* _actual_card_bm; 1804 1805 uint _n_workers; 1806 HeapRegionClaimer _hrclaimer; 1807 1808 public: 1809 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1810 : AbstractGangTask("G1 final counting"), 1811 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1812 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1813 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1814 } 1815 1816 void work(uint worker_id) { 1817 assert(worker_id < _n_workers, "invariant"); 1818 1819 FinalCountDataUpdateClosure final_update_cl(_g1h, 1820 _actual_region_bm, 1821 _actual_card_bm); 1822 1823 _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer); 1824 } 1825 }; 1826 1827 class G1ParNoteEndTask; 1828 1829 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1830 G1CollectedHeap* _g1; 1831 size_t _max_live_bytes; 1832 uint _regions_claimed; 1833 size_t _freed_bytes; 1834 FreeRegionList* _local_cleanup_list; 1835 HeapRegionSetCount _old_regions_removed; 1836 HeapRegionSetCount _humongous_regions_removed; 1837 HRRSCleanupTask* _hrrs_cleanup_task; 1838 double _claimed_region_time; 1839 double _max_region_time; 1840 1841 public: 1842 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1843 FreeRegionList* local_cleanup_list, 1844 HRRSCleanupTask* hrrs_cleanup_task) : 1845 _g1(g1), 1846 _max_live_bytes(0), _regions_claimed(0), 1847 _freed_bytes(0), 1848 _claimed_region_time(0.0), _max_region_time(0.0), 1849 _local_cleanup_list(local_cleanup_list), 1850 _old_regions_removed(), 1851 _humongous_regions_removed(), 1852 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1853 1854 size_t freed_bytes() { return _freed_bytes; } 1855 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; } 1856 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } 1857 1858 bool doHeapRegion(HeapRegion *hr) { 1859 if (hr->is_continues_humongous()) { 1860 return false; 1861 } 1862 // We use a claim value of zero here because all regions 1863 // were claimed with value 1 in the FinalCount task. 1864 _g1->reset_gc_time_stamps(hr); 1865 double start = os::elapsedTime(); 1866 _regions_claimed++; 1867 hr->note_end_of_marking(); 1868 _max_live_bytes += hr->max_live_bytes(); 1869 1870 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1871 _freed_bytes += hr->used(); 1872 hr->set_containing_set(NULL); 1873 if (hr->is_humongous()) { 1874 assert(hr->is_starts_humongous(), "we should only see starts humongous"); 1875 _humongous_regions_removed.increment(1u, hr->capacity()); 1876 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1877 } else { 1878 _old_regions_removed.increment(1u, hr->capacity()); 1879 _g1->free_region(hr, _local_cleanup_list, true); 1880 } 1881 } else { 1882 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1883 } 1884 1885 double region_time = (os::elapsedTime() - start); 1886 _claimed_region_time += region_time; 1887 if (region_time > _max_region_time) { 1888 _max_region_time = region_time; 1889 } 1890 return false; 1891 } 1892 1893 size_t max_live_bytes() { return _max_live_bytes; } 1894 uint regions_claimed() { return _regions_claimed; } 1895 double claimed_region_time_sec() { return _claimed_region_time; } 1896 double max_region_time_sec() { return _max_region_time; } 1897 }; 1898 1899 class G1ParNoteEndTask: public AbstractGangTask { 1900 friend class G1NoteEndOfConcMarkClosure; 1901 1902 protected: 1903 G1CollectedHeap* _g1h; 1904 size_t _max_live_bytes; 1905 size_t _freed_bytes; 1906 FreeRegionList* _cleanup_list; 1907 HeapRegionClaimer _hrclaimer; 1908 1909 public: 1910 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1911 AbstractGangTask("G1 note end"), _g1h(g1h), _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1912 } 1913 1914 void work(uint worker_id) { 1915 FreeRegionList local_cleanup_list("Local Cleanup List"); 1916 HRRSCleanupTask hrrs_cleanup_task; 1917 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1918 &hrrs_cleanup_task); 1919 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1920 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1921 1922 // Now update the lists 1923 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1924 { 1925 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1926 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1927 _max_live_bytes += g1_note_end.max_live_bytes(); 1928 _freed_bytes += g1_note_end.freed_bytes(); 1929 1930 // If we iterate over the global cleanup list at the end of 1931 // cleanup to do this printing we will not guarantee to only 1932 // generate output for the newly-reclaimed regions (the list 1933 // might not be empty at the beginning of cleanup; we might 1934 // still be working on its previous contents). So we do the 1935 // printing here, before we append the new regions to the global 1936 // cleanup list. 1937 1938 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1939 if (hr_printer->is_active()) { 1940 FreeRegionListIterator iter(&local_cleanup_list); 1941 while (iter.more_available()) { 1942 HeapRegion* hr = iter.get_next(); 1943 hr_printer->cleanup(hr); 1944 } 1945 } 1946 1947 _cleanup_list->add_ordered(&local_cleanup_list); 1948 assert(local_cleanup_list.is_empty(), "post-condition"); 1949 1950 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1951 } 1952 } 1953 size_t max_live_bytes() { return _max_live_bytes; } 1954 size_t freed_bytes() { return _freed_bytes; } 1955 }; 1956 1957 class G1ParScrubRemSetTask: public AbstractGangTask { 1958 protected: 1959 G1RemSet* _g1rs; 1960 BitMap* _region_bm; 1961 BitMap* _card_bm; 1962 HeapRegionClaimer _hrclaimer; 1963 1964 public: 1965 G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) : 1966 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) { 1967 } 1968 1969 void work(uint worker_id) { 1970 _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer); 1971 } 1972 1973 }; 1974 1975 void ConcurrentMark::cleanup() { 1976 // world is stopped at this checkpoint 1977 assert(SafepointSynchronize::is_at_safepoint(), 1978 "world should be stopped"); 1979 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1980 1981 // If a full collection has happened, we shouldn't do this. 1982 if (has_aborted()) { 1983 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1984 return; 1985 } 1986 1987 g1h->verify_region_sets_optional(); 1988 1989 if (VerifyDuringGC) { 1990 HandleMark hm; // handle scope 1991 g1h->prepare_for_verify(); 1992 Universe::verify(VerifyOption_G1UsePrevMarking, 1993 " VerifyDuringGC:(before)"); 1994 } 1995 g1h->check_bitmaps("Cleanup Start"); 1996 1997 G1CollectorPolicy* g1p = g1h->g1_policy(); 1998 g1p->record_concurrent_mark_cleanup_start(); 1999 2000 double start = os::elapsedTime(); 2001 2002 HeapRegionRemSet::reset_for_cleanup_tasks(); 2003 2004 uint n_workers; 2005 2006 // Do counting once more with the world stopped for good measure. 2007 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 2008 2009 g1h->set_par_threads(); 2010 n_workers = g1h->n_par_threads(); 2011 assert(g1h->n_par_threads() == n_workers, 2012 "Should not have been reset"); 2013 g1h->workers()->run_task(&g1_par_count_task); 2014 // Done with the parallel phase so reset to 0. 2015 g1h->set_par_threads(0); 2016 2017 if (VerifyDuringGC) { 2018 // Verify that the counting data accumulated during marking matches 2019 // that calculated by walking the marking bitmap. 2020 2021 // Bitmaps to hold expected values 2022 BitMap expected_region_bm(_region_bm.size(), true); 2023 BitMap expected_card_bm(_card_bm.size(), true); 2024 2025 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 2026 &_region_bm, 2027 &_card_bm, 2028 &expected_region_bm, 2029 &expected_card_bm); 2030 2031 g1h->set_par_threads((int)n_workers); 2032 g1h->workers()->run_task(&g1_par_verify_task); 2033 // Done with the parallel phase so reset to 0. 2034 g1h->set_par_threads(0); 2035 2036 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 2037 } 2038 2039 size_t start_used_bytes = g1h->used(); 2040 g1h->set_marking_complete(); 2041 2042 double count_end = os::elapsedTime(); 2043 double this_final_counting_time = (count_end - start); 2044 _total_counting_time += this_final_counting_time; 2045 2046 if (G1PrintRegionLivenessInfo) { 2047 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 2048 _g1h->heap_region_iterate(&cl); 2049 } 2050 2051 // Install newly created mark bitMap as "prev". 2052 swapMarkBitMaps(); 2053 2054 g1h->reset_gc_time_stamp(); 2055 2056 // Note end of marking in all heap regions. 2057 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 2058 g1h->set_par_threads((int)n_workers); 2059 g1h->workers()->run_task(&g1_par_note_end_task); 2060 g1h->set_par_threads(0); 2061 g1h->check_gc_time_stamps(); 2062 2063 if (!cleanup_list_is_empty()) { 2064 // The cleanup list is not empty, so we'll have to process it 2065 // concurrently. Notify anyone else that might be wanting free 2066 // regions that there will be more free regions coming soon. 2067 g1h->set_free_regions_coming(); 2068 } 2069 2070 // call below, since it affects the metric by which we sort the heap 2071 // regions. 2072 if (G1ScrubRemSets) { 2073 double rs_scrub_start = os::elapsedTime(); 2074 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers); 2075 g1h->set_par_threads((int)n_workers); 2076 g1h->workers()->run_task(&g1_par_scrub_rs_task); 2077 g1h->set_par_threads(0); 2078 2079 double rs_scrub_end = os::elapsedTime(); 2080 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 2081 _total_rs_scrub_time += this_rs_scrub_time; 2082 } 2083 2084 // this will also free any regions totally full of garbage objects, 2085 // and sort the regions. 2086 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 2087 2088 // Statistics. 2089 double end = os::elapsedTime(); 2090 _cleanup_times.add((end - start) * 1000.0); 2091 2092 if (G1Log::fine()) { 2093 g1h->g1_policy()->print_heap_transition(start_used_bytes); 2094 } 2095 2096 // Clean up will have freed any regions completely full of garbage. 2097 // Update the soft reference policy with the new heap occupancy. 2098 Universe::update_heap_info_at_gc(); 2099 2100 if (VerifyDuringGC) { 2101 HandleMark hm; // handle scope 2102 g1h->prepare_for_verify(); 2103 Universe::verify(VerifyOption_G1UsePrevMarking, 2104 " VerifyDuringGC:(after)"); 2105 } 2106 2107 g1h->check_bitmaps("Cleanup End"); 2108 2109 g1h->verify_region_sets_optional(); 2110 2111 // We need to make this be a "collection" so any collection pause that 2112 // races with it goes around and waits for completeCleanup to finish. 2113 g1h->increment_total_collections(); 2114 2115 // Clean out dead classes and update Metaspace sizes. 2116 if (ClassUnloadingWithConcurrentMark) { 2117 ClassLoaderDataGraph::purge(); 2118 } 2119 MetaspaceGC::compute_new_size(); 2120 2121 // We reclaimed old regions so we should calculate the sizes to make 2122 // sure we update the old gen/space data. 2123 g1h->g1mm()->update_sizes(); 2124 g1h->allocation_context_stats().update_after_mark(); 2125 2126 g1h->trace_heap_after_concurrent_cycle(); 2127 } 2128 2129 void ConcurrentMark::completeCleanup() { 2130 if (has_aborted()) return; 2131 2132 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2133 2134 _cleanup_list.verify_optional(); 2135 FreeRegionList tmp_free_list("Tmp Free List"); 2136 2137 if (G1ConcRegionFreeingVerbose) { 2138 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2139 "cleanup list has %u entries", 2140 _cleanup_list.length()); 2141 } 2142 2143 // No one else should be accessing the _cleanup_list at this point, 2144 // so it is not necessary to take any locks 2145 while (!_cleanup_list.is_empty()) { 2146 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 2147 assert(hr != NULL, "Got NULL from a non-empty list"); 2148 hr->par_clear(); 2149 tmp_free_list.add_ordered(hr); 2150 2151 // Instead of adding one region at a time to the secondary_free_list, 2152 // we accumulate them in the local list and move them a few at a 2153 // time. This also cuts down on the number of notify_all() calls 2154 // we do during this process. We'll also append the local list when 2155 // _cleanup_list is empty (which means we just removed the last 2156 // region from the _cleanup_list). 2157 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2158 _cleanup_list.is_empty()) { 2159 if (G1ConcRegionFreeingVerbose) { 2160 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2161 "appending %u entries to the secondary_free_list, " 2162 "cleanup list still has %u entries", 2163 tmp_free_list.length(), 2164 _cleanup_list.length()); 2165 } 2166 2167 { 2168 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2169 g1h->secondary_free_list_add(&tmp_free_list); 2170 SecondaryFreeList_lock->notify_all(); 2171 } 2172 #ifndef PRODUCT 2173 if (G1StressConcRegionFreeing) { 2174 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2175 os::sleep(Thread::current(), (jlong) 1, false); 2176 } 2177 } 2178 #endif 2179 } 2180 } 2181 assert(tmp_free_list.is_empty(), "post-condition"); 2182 } 2183 2184 // Supporting Object and Oop closures for reference discovery 2185 // and processing in during marking 2186 2187 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2188 HeapWord* addr = (HeapWord*)obj; 2189 return addr != NULL && 2190 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2191 } 2192 2193 // 'Keep Alive' oop closure used by both serial parallel reference processing. 2194 // Uses the CMTask associated with a worker thread (for serial reference 2195 // processing the CMTask for worker 0 is used) to preserve (mark) and 2196 // trace referent objects. 2197 // 2198 // Using the CMTask and embedded local queues avoids having the worker 2199 // threads operating on the global mark stack. This reduces the risk 2200 // of overflowing the stack - which we would rather avoid at this late 2201 // state. Also using the tasks' local queues removes the potential 2202 // of the workers interfering with each other that could occur if 2203 // operating on the global stack. 2204 2205 class G1CMKeepAliveAndDrainClosure: public OopClosure { 2206 ConcurrentMark* _cm; 2207 CMTask* _task; 2208 int _ref_counter_limit; 2209 int _ref_counter; 2210 bool _is_serial; 2211 public: 2212 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2213 _cm(cm), _task(task), _is_serial(is_serial), 2214 _ref_counter_limit(G1RefProcDrainInterval) { 2215 assert(_ref_counter_limit > 0, "sanity"); 2216 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2217 _ref_counter = _ref_counter_limit; 2218 } 2219 2220 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2221 virtual void do_oop( oop* p) { do_oop_work(p); } 2222 2223 template <class T> void do_oop_work(T* p) { 2224 if (!_cm->has_overflown()) { 2225 oop obj = oopDesc::load_decode_heap_oop(p); 2226 if (_cm->verbose_high()) { 2227 gclog_or_tty->print_cr("\t[%u] we're looking at location " 2228 "*"PTR_FORMAT" = "PTR_FORMAT, 2229 _task->worker_id(), p2i(p), p2i((void*) obj)); 2230 } 2231 2232 _task->deal_with_reference(obj); 2233 _ref_counter--; 2234 2235 if (_ref_counter == 0) { 2236 // We have dealt with _ref_counter_limit references, pushing them 2237 // and objects reachable from them on to the local stack (and 2238 // possibly the global stack). Call CMTask::do_marking_step() to 2239 // process these entries. 2240 // 2241 // We call CMTask::do_marking_step() in a loop, which we'll exit if 2242 // there's nothing more to do (i.e. we're done with the entries that 2243 // were pushed as a result of the CMTask::deal_with_reference() calls 2244 // above) or we overflow. 2245 // 2246 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2247 // flag while there may still be some work to do. (See the comment at 2248 // the beginning of CMTask::do_marking_step() for those conditions - 2249 // one of which is reaching the specified time target.) It is only 2250 // when CMTask::do_marking_step() returns without setting the 2251 // has_aborted() flag that the marking step has completed. 2252 do { 2253 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2254 _task->do_marking_step(mark_step_duration_ms, 2255 false /* do_termination */, 2256 _is_serial); 2257 } while (_task->has_aborted() && !_cm->has_overflown()); 2258 _ref_counter = _ref_counter_limit; 2259 } 2260 } else { 2261 if (_cm->verbose_high()) { 2262 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id()); 2263 } 2264 } 2265 } 2266 }; 2267 2268 // 'Drain' oop closure used by both serial and parallel reference processing. 2269 // Uses the CMTask associated with a given worker thread (for serial 2270 // reference processing the CMtask for worker 0 is used). Calls the 2271 // do_marking_step routine, with an unbelievably large timeout value, 2272 // to drain the marking data structures of the remaining entries 2273 // added by the 'keep alive' oop closure above. 2274 2275 class G1CMDrainMarkingStackClosure: public VoidClosure { 2276 ConcurrentMark* _cm; 2277 CMTask* _task; 2278 bool _is_serial; 2279 public: 2280 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2281 _cm(cm), _task(task), _is_serial(is_serial) { 2282 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2283 } 2284 2285 void do_void() { 2286 do { 2287 if (_cm->verbose_high()) { 2288 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s", 2289 _task->worker_id(), BOOL_TO_STR(_is_serial)); 2290 } 2291 2292 // We call CMTask::do_marking_step() to completely drain the local 2293 // and global marking stacks of entries pushed by the 'keep alive' 2294 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2295 // 2296 // CMTask::do_marking_step() is called in a loop, which we'll exit 2297 // if there's nothing more to do (i.e. we've completely drained the 2298 // entries that were pushed as a a result of applying the 'keep alive' 2299 // closure to the entries on the discovered ref lists) or we overflow 2300 // the global marking stack. 2301 // 2302 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2303 // flag while there may still be some work to do. (See the comment at 2304 // the beginning of CMTask::do_marking_step() for those conditions - 2305 // one of which is reaching the specified time target.) It is only 2306 // when CMTask::do_marking_step() returns without setting the 2307 // has_aborted() flag that the marking step has completed. 2308 2309 _task->do_marking_step(1000000000.0 /* something very large */, 2310 true /* do_termination */, 2311 _is_serial); 2312 } while (_task->has_aborted() && !_cm->has_overflown()); 2313 } 2314 }; 2315 2316 // Implementation of AbstractRefProcTaskExecutor for parallel 2317 // reference processing at the end of G1 concurrent marking 2318 2319 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2320 private: 2321 G1CollectedHeap* _g1h; 2322 ConcurrentMark* _cm; 2323 WorkGang* _workers; 2324 int _active_workers; 2325 2326 public: 2327 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2328 ConcurrentMark* cm, 2329 WorkGang* workers, 2330 int n_workers) : 2331 _g1h(g1h), _cm(cm), 2332 _workers(workers), _active_workers(n_workers) { } 2333 2334 // Executes the given task using concurrent marking worker threads. 2335 virtual void execute(ProcessTask& task); 2336 virtual void execute(EnqueueTask& task); 2337 }; 2338 2339 class G1CMRefProcTaskProxy: public AbstractGangTask { 2340 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2341 ProcessTask& _proc_task; 2342 G1CollectedHeap* _g1h; 2343 ConcurrentMark* _cm; 2344 2345 public: 2346 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2347 G1CollectedHeap* g1h, 2348 ConcurrentMark* cm) : 2349 AbstractGangTask("Process reference objects in parallel"), 2350 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2351 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2352 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2353 } 2354 2355 virtual void work(uint worker_id) { 2356 ResourceMark rm; 2357 HandleMark hm; 2358 CMTask* task = _cm->task(worker_id); 2359 G1CMIsAliveClosure g1_is_alive(_g1h); 2360 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2361 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2362 2363 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2364 } 2365 }; 2366 2367 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2368 assert(_workers != NULL, "Need parallel worker threads."); 2369 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2370 2371 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2372 2373 // We need to reset the concurrency level before each 2374 // proxy task execution, so that the termination protocol 2375 // and overflow handling in CMTask::do_marking_step() knows 2376 // how many workers to wait for. 2377 _cm->set_concurrency(_active_workers); 2378 _g1h->set_par_threads(_active_workers); 2379 _workers->run_task(&proc_task_proxy); 2380 _g1h->set_par_threads(0); 2381 } 2382 2383 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2384 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2385 EnqueueTask& _enq_task; 2386 2387 public: 2388 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2389 AbstractGangTask("Enqueue reference objects in parallel"), 2390 _enq_task(enq_task) { } 2391 2392 virtual void work(uint worker_id) { 2393 _enq_task.work(worker_id); 2394 } 2395 }; 2396 2397 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2398 assert(_workers != NULL, "Need parallel worker threads."); 2399 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2400 2401 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2402 2403 // Not strictly necessary but... 2404 // 2405 // We need to reset the concurrency level before each 2406 // proxy task execution, so that the termination protocol 2407 // and overflow handling in CMTask::do_marking_step() knows 2408 // how many workers to wait for. 2409 _cm->set_concurrency(_active_workers); 2410 _g1h->set_par_threads(_active_workers); 2411 _workers->run_task(&enq_task_proxy); 2412 _g1h->set_par_threads(0); 2413 } 2414 2415 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 2416 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 2417 } 2418 2419 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2420 if (has_overflown()) { 2421 // Skip processing the discovered references if we have 2422 // overflown the global marking stack. Reference objects 2423 // only get discovered once so it is OK to not 2424 // de-populate the discovered reference lists. We could have, 2425 // but the only benefit would be that, when marking restarts, 2426 // less reference objects are discovered. 2427 return; 2428 } 2429 2430 ResourceMark rm; 2431 HandleMark hm; 2432 2433 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2434 2435 // Is alive closure. 2436 G1CMIsAliveClosure g1_is_alive(g1h); 2437 2438 // Inner scope to exclude the cleaning of the string and symbol 2439 // tables from the displayed time. 2440 { 2441 G1CMTraceTime t("GC ref-proc", G1Log::finer()); 2442 2443 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2444 2445 // See the comment in G1CollectedHeap::ref_processing_init() 2446 // about how reference processing currently works in G1. 2447 2448 // Set the soft reference policy 2449 rp->setup_policy(clear_all_soft_refs); 2450 assert(_markStack.isEmpty(), "mark stack should be empty"); 2451 2452 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2453 // in serial reference processing. Note these closures are also 2454 // used for serially processing (by the the current thread) the 2455 // JNI references during parallel reference processing. 2456 // 2457 // These closures do not need to synchronize with the worker 2458 // threads involved in parallel reference processing as these 2459 // instances are executed serially by the current thread (e.g. 2460 // reference processing is not multi-threaded and is thus 2461 // performed by the current thread instead of a gang worker). 2462 // 2463 // The gang tasks involved in parallel reference processing create 2464 // their own instances of these closures, which do their own 2465 // synchronization among themselves. 2466 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2467 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2468 2469 // We need at least one active thread. If reference processing 2470 // is not multi-threaded we use the current (VMThread) thread, 2471 // otherwise we use the work gang from the G1CollectedHeap and 2472 // we utilize all the worker threads we can. 2473 bool processing_is_mt = rp->processing_is_mt(); 2474 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2475 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2476 2477 // Parallel processing task executor. 2478 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2479 g1h->workers(), active_workers); 2480 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2481 2482 // Set the concurrency level. The phase was already set prior to 2483 // executing the remark task. 2484 set_concurrency(active_workers); 2485 2486 // Set the degree of MT processing here. If the discovery was done MT, 2487 // the number of threads involved during discovery could differ from 2488 // the number of active workers. This is OK as long as the discovered 2489 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2490 rp->set_active_mt_degree(active_workers); 2491 2492 // Process the weak references. 2493 const ReferenceProcessorStats& stats = 2494 rp->process_discovered_references(&g1_is_alive, 2495 &g1_keep_alive, 2496 &g1_drain_mark_stack, 2497 executor, 2498 g1h->gc_timer_cm(), 2499 concurrent_gc_id()); 2500 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2501 2502 // The do_oop work routines of the keep_alive and drain_marking_stack 2503 // oop closures will set the has_overflown flag if we overflow the 2504 // global marking stack. 2505 2506 assert(_markStack.overflow() || _markStack.isEmpty(), 2507 "mark stack should be empty (unless it overflowed)"); 2508 2509 if (_markStack.overflow()) { 2510 // This should have been done already when we tried to push an 2511 // entry on to the global mark stack. But let's do it again. 2512 set_has_overflown(); 2513 } 2514 2515 assert(rp->num_q() == active_workers, "why not"); 2516 2517 rp->enqueue_discovered_references(executor); 2518 2519 rp->verify_no_references_recorded(); 2520 assert(!rp->discovery_enabled(), "Post condition"); 2521 } 2522 2523 if (has_overflown()) { 2524 // We can not trust g1_is_alive if the marking stack overflowed 2525 return; 2526 } 2527 2528 assert(_markStack.isEmpty(), "Marking should have completed"); 2529 2530 // Unload Klasses, String, Symbols, Code Cache, etc. 2531 { 2532 G1CMTraceTime trace("Unloading", G1Log::finer()); 2533 2534 if (ClassUnloadingWithConcurrentMark) { 2535 bool purged_classes; 2536 2537 { 2538 G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest()); 2539 purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */); 2540 } 2541 2542 { 2543 G1CMTraceTime trace("Parallel Unloading", G1Log::finest()); 2544 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2545 } 2546 } 2547 2548 if (G1StringDedup::is_enabled()) { 2549 G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest()); 2550 G1StringDedup::unlink(&g1_is_alive); 2551 } 2552 } 2553 } 2554 2555 void ConcurrentMark::swapMarkBitMaps() { 2556 CMBitMapRO* temp = _prevMarkBitMap; 2557 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2558 _nextMarkBitMap = (CMBitMap*) temp; 2559 } 2560 2561 class CMObjectClosure; 2562 2563 // Closure for iterating over objects, currently only used for 2564 // processing SATB buffers. 2565 class CMObjectClosure : public ObjectClosure { 2566 private: 2567 CMTask* _task; 2568 2569 public: 2570 void do_object(oop obj) { 2571 _task->deal_with_reference(obj); 2572 } 2573 2574 CMObjectClosure(CMTask* task) : _task(task) { } 2575 }; 2576 2577 class G1RemarkThreadsClosure : public ThreadClosure { 2578 CMObjectClosure _cm_obj; 2579 G1CMOopClosure _cm_cl; 2580 MarkingCodeBlobClosure _code_cl; 2581 int _thread_parity; 2582 2583 public: 2584 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) : 2585 _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2586 _thread_parity(Threads::thread_claim_parity()) {} 2587 2588 void do_thread(Thread* thread) { 2589 if (thread->is_Java_thread()) { 2590 if (thread->claim_oops_do(true, _thread_parity)) { 2591 JavaThread* jt = (JavaThread*)thread; 2592 2593 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2594 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2595 // * Alive if on the stack of an executing method 2596 // * Weakly reachable otherwise 2597 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2598 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2599 jt->nmethods_do(&_code_cl); 2600 2601 jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj); 2602 } 2603 } else if (thread->is_VM_thread()) { 2604 if (thread->claim_oops_do(true, _thread_parity)) { 2605 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj); 2606 } 2607 } 2608 } 2609 }; 2610 2611 class CMRemarkTask: public AbstractGangTask { 2612 private: 2613 ConcurrentMark* _cm; 2614 public: 2615 void work(uint worker_id) { 2616 // Since all available tasks are actually started, we should 2617 // only proceed if we're supposed to be active. 2618 if (worker_id < _cm->active_tasks()) { 2619 CMTask* task = _cm->task(worker_id); 2620 task->record_start_time(); 2621 { 2622 ResourceMark rm; 2623 HandleMark hm; 2624 2625 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 2626 Threads::threads_do(&threads_f); 2627 } 2628 2629 do { 2630 task->do_marking_step(1000000000.0 /* something very large */, 2631 true /* do_termination */, 2632 false /* is_serial */); 2633 } while (task->has_aborted() && !_cm->has_overflown()); 2634 // If we overflow, then we do not want to restart. We instead 2635 // want to abort remark and do concurrent marking again. 2636 task->record_end_time(); 2637 } 2638 } 2639 2640 CMRemarkTask(ConcurrentMark* cm, int active_workers) : 2641 AbstractGangTask("Par Remark"), _cm(cm) { 2642 _cm->terminator()->reset_for_reuse(active_workers); 2643 } 2644 }; 2645 2646 void ConcurrentMark::checkpointRootsFinalWork() { 2647 ResourceMark rm; 2648 HandleMark hm; 2649 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2650 2651 G1CMTraceTime trace("Finalize Marking", G1Log::finer()); 2652 2653 g1h->ensure_parsability(false); 2654 2655 StrongRootsScope srs; 2656 // this is remark, so we'll use up all active threads 2657 uint active_workers = g1h->workers()->active_workers(); 2658 if (active_workers == 0) { 2659 assert(active_workers > 0, "Should have been set earlier"); 2660 active_workers = (uint) ParallelGCThreads; 2661 g1h->workers()->set_active_workers(active_workers); 2662 } 2663 set_concurrency_and_phase(active_workers, false /* concurrent */); 2664 // Leave _parallel_marking_threads at it's 2665 // value originally calculated in the ConcurrentMark 2666 // constructor and pass values of the active workers 2667 // through the gang in the task. 2668 2669 CMRemarkTask remarkTask(this, active_workers); 2670 // We will start all available threads, even if we decide that the 2671 // active_workers will be fewer. The extra ones will just bail out 2672 // immediately. 2673 g1h->set_par_threads(active_workers); 2674 g1h->workers()->run_task(&remarkTask); 2675 g1h->set_par_threads(0); 2676 2677 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2678 guarantee(has_overflown() || 2679 satb_mq_set.completed_buffers_num() == 0, 2680 err_msg("Invariant: has_overflown = %s, num buffers = %d", 2681 BOOL_TO_STR(has_overflown()), 2682 satb_mq_set.completed_buffers_num())); 2683 2684 print_stats(); 2685 } 2686 2687 #ifndef PRODUCT 2688 2689 class PrintReachableOopClosure: public OopClosure { 2690 private: 2691 G1CollectedHeap* _g1h; 2692 outputStream* _out; 2693 VerifyOption _vo; 2694 bool _all; 2695 2696 public: 2697 PrintReachableOopClosure(outputStream* out, 2698 VerifyOption vo, 2699 bool all) : 2700 _g1h(G1CollectedHeap::heap()), 2701 _out(out), _vo(vo), _all(all) { } 2702 2703 void do_oop(narrowOop* p) { do_oop_work(p); } 2704 void do_oop( oop* p) { do_oop_work(p); } 2705 2706 template <class T> void do_oop_work(T* p) { 2707 oop obj = oopDesc::load_decode_heap_oop(p); 2708 const char* str = NULL; 2709 const char* str2 = ""; 2710 2711 if (obj == NULL) { 2712 str = ""; 2713 } else if (!_g1h->is_in_g1_reserved(obj)) { 2714 str = " O"; 2715 } else { 2716 HeapRegion* hr = _g1h->heap_region_containing(obj); 2717 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2718 bool marked = _g1h->is_marked(obj, _vo); 2719 2720 if (over_tams) { 2721 str = " >"; 2722 if (marked) { 2723 str2 = " AND MARKED"; 2724 } 2725 } else if (marked) { 2726 str = " M"; 2727 } else { 2728 str = " NOT"; 2729 } 2730 } 2731 2732 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2733 p2i(p), p2i((void*) obj), str, str2); 2734 } 2735 }; 2736 2737 class PrintReachableObjectClosure : public ObjectClosure { 2738 private: 2739 G1CollectedHeap* _g1h; 2740 outputStream* _out; 2741 VerifyOption _vo; 2742 bool _all; 2743 HeapRegion* _hr; 2744 2745 public: 2746 PrintReachableObjectClosure(outputStream* out, 2747 VerifyOption vo, 2748 bool all, 2749 HeapRegion* hr) : 2750 _g1h(G1CollectedHeap::heap()), 2751 _out(out), _vo(vo), _all(all), _hr(hr) { } 2752 2753 void do_object(oop o) { 2754 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2755 bool marked = _g1h->is_marked(o, _vo); 2756 bool print_it = _all || over_tams || marked; 2757 2758 if (print_it) { 2759 _out->print_cr(" "PTR_FORMAT"%s", 2760 p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : ""); 2761 PrintReachableOopClosure oopCl(_out, _vo, _all); 2762 o->oop_iterate_no_header(&oopCl); 2763 } 2764 } 2765 }; 2766 2767 class PrintReachableRegionClosure : public HeapRegionClosure { 2768 private: 2769 G1CollectedHeap* _g1h; 2770 outputStream* _out; 2771 VerifyOption _vo; 2772 bool _all; 2773 2774 public: 2775 bool doHeapRegion(HeapRegion* hr) { 2776 HeapWord* b = hr->bottom(); 2777 HeapWord* e = hr->end(); 2778 HeapWord* t = hr->top(); 2779 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2780 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2781 "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p)); 2782 _out->cr(); 2783 2784 HeapWord* from = b; 2785 HeapWord* to = t; 2786 2787 if (to > from) { 2788 _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to)); 2789 _out->cr(); 2790 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2791 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2792 _out->cr(); 2793 } 2794 2795 return false; 2796 } 2797 2798 PrintReachableRegionClosure(outputStream* out, 2799 VerifyOption vo, 2800 bool all) : 2801 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2802 }; 2803 2804 void ConcurrentMark::print_reachable(const char* str, 2805 VerifyOption vo, 2806 bool all) { 2807 gclog_or_tty->cr(); 2808 gclog_or_tty->print_cr("== Doing heap dump... "); 2809 2810 if (G1PrintReachableBaseFile == NULL) { 2811 gclog_or_tty->print_cr(" #### error: no base file defined"); 2812 return; 2813 } 2814 2815 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2816 (JVM_MAXPATHLEN - 1)) { 2817 gclog_or_tty->print_cr(" #### error: file name too long"); 2818 return; 2819 } 2820 2821 char file_name[JVM_MAXPATHLEN]; 2822 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2823 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2824 2825 fileStream fout(file_name); 2826 if (!fout.is_open()) { 2827 gclog_or_tty->print_cr(" #### error: could not open file"); 2828 return; 2829 } 2830 2831 outputStream* out = &fout; 2832 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2833 out->cr(); 2834 2835 out->print_cr("--- ITERATING OVER REGIONS"); 2836 out->cr(); 2837 PrintReachableRegionClosure rcl(out, vo, all); 2838 _g1h->heap_region_iterate(&rcl); 2839 out->cr(); 2840 2841 gclog_or_tty->print_cr(" done"); 2842 gclog_or_tty->flush(); 2843 } 2844 2845 #endif // PRODUCT 2846 2847 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2848 // Note we are overriding the read-only view of the prev map here, via 2849 // the cast. 2850 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2851 } 2852 2853 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2854 _nextMarkBitMap->clearRange(mr); 2855 } 2856 2857 HeapRegion* 2858 ConcurrentMark::claim_region(uint worker_id) { 2859 // "checkpoint" the finger 2860 HeapWord* finger = _finger; 2861 2862 // _heap_end will not change underneath our feet; it only changes at 2863 // yield points. 2864 while (finger < _heap_end) { 2865 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2866 2867 // Note on how this code handles humongous regions. In the 2868 // normal case the finger will reach the start of a "starts 2869 // humongous" (SH) region. Its end will either be the end of the 2870 // last "continues humongous" (CH) region in the sequence, or the 2871 // standard end of the SH region (if the SH is the only region in 2872 // the sequence). That way claim_region() will skip over the CH 2873 // regions. However, there is a subtle race between a CM thread 2874 // executing this method and a mutator thread doing a humongous 2875 // object allocation. The two are not mutually exclusive as the CM 2876 // thread does not need to hold the Heap_lock when it gets 2877 // here. So there is a chance that claim_region() will come across 2878 // a free region that's in the progress of becoming a SH or a CH 2879 // region. In the former case, it will either 2880 // a) Miss the update to the region's end, in which case it will 2881 // visit every subsequent CH region, will find their bitmaps 2882 // empty, and do nothing, or 2883 // b) Will observe the update of the region's end (in which case 2884 // it will skip the subsequent CH regions). 2885 // If it comes across a region that suddenly becomes CH, the 2886 // scenario will be similar to b). So, the race between 2887 // claim_region() and a humongous object allocation might force us 2888 // to do a bit of unnecessary work (due to some unnecessary bitmap 2889 // iterations) but it should not introduce and correctness issues. 2890 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2891 2892 // Above heap_region_containing_raw may return NULL as we always scan claim 2893 // until the end of the heap. In this case, just jump to the next region. 2894 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 2895 2896 // Is the gap between reading the finger and doing the CAS too long? 2897 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2898 if (res == finger && curr_region != NULL) { 2899 // we succeeded 2900 HeapWord* bottom = curr_region->bottom(); 2901 HeapWord* limit = curr_region->next_top_at_mark_start(); 2902 2903 if (verbose_low()) { 2904 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" " 2905 "["PTR_FORMAT", "PTR_FORMAT"), " 2906 "limit = "PTR_FORMAT, 2907 worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit)); 2908 } 2909 2910 // notice that _finger == end cannot be guaranteed here since, 2911 // someone else might have moved the finger even further 2912 assert(_finger >= end, "the finger should have moved forward"); 2913 2914 if (verbose_low()) { 2915 gclog_or_tty->print_cr("[%u] we were successful with region = " 2916 PTR_FORMAT, worker_id, p2i(curr_region)); 2917 } 2918 2919 if (limit > bottom) { 2920 if (verbose_low()) { 2921 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, " 2922 "returning it ", worker_id, p2i(curr_region)); 2923 } 2924 return curr_region; 2925 } else { 2926 assert(limit == bottom, 2927 "the region limit should be at bottom"); 2928 if (verbose_low()) { 2929 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, " 2930 "returning NULL", worker_id, p2i(curr_region)); 2931 } 2932 // we return NULL and the caller should try calling 2933 // claim_region() again. 2934 return NULL; 2935 } 2936 } else { 2937 assert(_finger > finger, "the finger should have moved forward"); 2938 if (verbose_low()) { 2939 if (curr_region == NULL) { 2940 gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, " 2941 "global finger = "PTR_FORMAT", " 2942 "our finger = "PTR_FORMAT, 2943 worker_id, p2i(_finger), p2i(finger)); 2944 } else { 2945 gclog_or_tty->print_cr("[%u] somebody else moved the finger, " 2946 "global finger = "PTR_FORMAT", " 2947 "our finger = "PTR_FORMAT, 2948 worker_id, p2i(_finger), p2i(finger)); 2949 } 2950 } 2951 2952 // read it again 2953 finger = _finger; 2954 } 2955 } 2956 2957 return NULL; 2958 } 2959 2960 #ifndef PRODUCT 2961 enum VerifyNoCSetOopsPhase { 2962 VerifyNoCSetOopsStack, 2963 VerifyNoCSetOopsQueues, 2964 VerifyNoCSetOopsSATBCompleted, 2965 VerifyNoCSetOopsSATBThread 2966 }; 2967 2968 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 2969 private: 2970 G1CollectedHeap* _g1h; 2971 VerifyNoCSetOopsPhase _phase; 2972 int _info; 2973 2974 const char* phase_str() { 2975 switch (_phase) { 2976 case VerifyNoCSetOopsStack: return "Stack"; 2977 case VerifyNoCSetOopsQueues: return "Queue"; 2978 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 2979 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 2980 default: ShouldNotReachHere(); 2981 } 2982 return NULL; 2983 } 2984 2985 void do_object_work(oop obj) { 2986 guarantee(!_g1h->obj_in_cs(obj), 2987 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 2988 p2i((void*) obj), phase_str(), _info)); 2989 } 2990 2991 public: 2992 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 2993 2994 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 2995 _phase = phase; 2996 _info = info; 2997 } 2998 2999 virtual void do_oop(oop* p) { 3000 oop obj = oopDesc::load_decode_heap_oop(p); 3001 do_object_work(obj); 3002 } 3003 3004 virtual void do_oop(narrowOop* p) { 3005 // We should not come across narrow oops while scanning marking 3006 // stacks and SATB buffers. 3007 ShouldNotReachHere(); 3008 } 3009 3010 virtual void do_object(oop obj) { 3011 do_object_work(obj); 3012 } 3013 }; 3014 3015 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 3016 bool verify_enqueued_buffers, 3017 bool verify_thread_buffers, 3018 bool verify_fingers) { 3019 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 3020 if (!G1CollectedHeap::heap()->mark_in_progress()) { 3021 return; 3022 } 3023 3024 VerifyNoCSetOopsClosure cl; 3025 3026 if (verify_stacks) { 3027 // Verify entries on the global mark stack 3028 cl.set_phase(VerifyNoCSetOopsStack); 3029 _markStack.oops_do(&cl); 3030 3031 // Verify entries on the task queues 3032 for (uint i = 0; i < _max_worker_id; i += 1) { 3033 cl.set_phase(VerifyNoCSetOopsQueues, i); 3034 CMTaskQueue* queue = _task_queues->queue(i); 3035 queue->oops_do(&cl); 3036 } 3037 } 3038 3039 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 3040 3041 // Verify entries on the enqueued SATB buffers 3042 if (verify_enqueued_buffers) { 3043 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 3044 satb_qs.iterate_completed_buffers_read_only(&cl); 3045 } 3046 3047 // Verify entries on the per-thread SATB buffers 3048 if (verify_thread_buffers) { 3049 cl.set_phase(VerifyNoCSetOopsSATBThread); 3050 satb_qs.iterate_thread_buffers_read_only(&cl); 3051 } 3052 3053 if (verify_fingers) { 3054 // Verify the global finger 3055 HeapWord* global_finger = finger(); 3056 if (global_finger != NULL && global_finger < _heap_end) { 3057 // The global finger always points to a heap region boundary. We 3058 // use heap_region_containing_raw() to get the containing region 3059 // given that the global finger could be pointing to a free region 3060 // which subsequently becomes continues humongous. If that 3061 // happens, heap_region_containing() will return the bottom of the 3062 // corresponding starts humongous region and the check below will 3063 // not hold any more. 3064 // Since we always iterate over all regions, we might get a NULL HeapRegion 3065 // here. 3066 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 3067 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 3068 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 3069 p2i(global_finger), HR_FORMAT_PARAMS(global_hr))); 3070 } 3071 3072 // Verify the task fingers 3073 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 3074 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 3075 CMTask* task = _tasks[i]; 3076 HeapWord* task_finger = task->finger(); 3077 if (task_finger != NULL && task_finger < _heap_end) { 3078 // See above note on the global finger verification. 3079 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 3080 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 3081 !task_hr->in_collection_set(), 3082 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 3083 p2i(task_finger), HR_FORMAT_PARAMS(task_hr))); 3084 } 3085 } 3086 } 3087 } 3088 #endif // PRODUCT 3089 3090 // Aggregate the counting data that was constructed concurrently 3091 // with marking. 3092 class AggregateCountDataHRClosure: public HeapRegionClosure { 3093 G1CollectedHeap* _g1h; 3094 ConcurrentMark* _cm; 3095 CardTableModRefBS* _ct_bs; 3096 BitMap* _cm_card_bm; 3097 uint _max_worker_id; 3098 3099 public: 3100 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 3101 BitMap* cm_card_bm, 3102 uint max_worker_id) : 3103 _g1h(g1h), _cm(g1h->concurrent_mark()), 3104 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 3105 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 3106 3107 bool doHeapRegion(HeapRegion* hr) { 3108 if (hr->is_continues_humongous()) { 3109 // We will ignore these here and process them when their 3110 // associated "starts humongous" region is processed. 3111 // Note that we cannot rely on their associated 3112 // "starts humongous" region to have their bit set to 1 3113 // since, due to the region chunking in the parallel region 3114 // iteration, a "continues humongous" region might be visited 3115 // before its associated "starts humongous". 3116 return false; 3117 } 3118 3119 HeapWord* start = hr->bottom(); 3120 HeapWord* limit = hr->next_top_at_mark_start(); 3121 HeapWord* end = hr->end(); 3122 3123 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 3124 err_msg("Preconditions not met - " 3125 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 3126 "top: "PTR_FORMAT", end: "PTR_FORMAT, 3127 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()))); 3128 3129 assert(hr->next_marked_bytes() == 0, "Precondition"); 3130 3131 if (start == limit) { 3132 // NTAMS of this region has not been set so nothing to do. 3133 return false; 3134 } 3135 3136 // 'start' should be in the heap. 3137 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 3138 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 3139 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 3140 3141 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 3142 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 3143 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 3144 3145 // If ntams is not card aligned then we bump card bitmap index 3146 // for limit so that we get the all the cards spanned by 3147 // the object ending at ntams. 3148 // Note: if this is the last region in the heap then ntams 3149 // could be actually just beyond the end of the the heap; 3150 // limit_idx will then correspond to a (non-existent) card 3151 // that is also outside the heap. 3152 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 3153 limit_idx += 1; 3154 } 3155 3156 assert(limit_idx <= end_idx, "or else use atomics"); 3157 3158 // Aggregate the "stripe" in the count data associated with hr. 3159 uint hrm_index = hr->hrm_index(); 3160 size_t marked_bytes = 0; 3161 3162 for (uint i = 0; i < _max_worker_id; i += 1) { 3163 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 3164 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 3165 3166 // Fetch the marked_bytes in this region for task i and 3167 // add it to the running total for this region. 3168 marked_bytes += marked_bytes_array[hrm_index]; 3169 3170 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 3171 // into the global card bitmap. 3172 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 3173 3174 while (scan_idx < limit_idx) { 3175 assert(task_card_bm->at(scan_idx) == true, "should be"); 3176 _cm_card_bm->set_bit(scan_idx); 3177 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 3178 3179 // BitMap::get_next_one_offset() can handle the case when 3180 // its left_offset parameter is greater than its right_offset 3181 // parameter. It does, however, have an early exit if 3182 // left_offset == right_offset. So let's limit the value 3183 // passed in for left offset here. 3184 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 3185 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 3186 } 3187 } 3188 3189 // Update the marked bytes for this region. 3190 hr->add_to_marked_bytes(marked_bytes); 3191 3192 // Next heap region 3193 return false; 3194 } 3195 }; 3196 3197 class G1AggregateCountDataTask: public AbstractGangTask { 3198 protected: 3199 G1CollectedHeap* _g1h; 3200 ConcurrentMark* _cm; 3201 BitMap* _cm_card_bm; 3202 uint _max_worker_id; 3203 int _active_workers; 3204 HeapRegionClaimer _hrclaimer; 3205 3206 public: 3207 G1AggregateCountDataTask(G1CollectedHeap* g1h, 3208 ConcurrentMark* cm, 3209 BitMap* cm_card_bm, 3210 uint max_worker_id, 3211 int n_workers) : 3212 AbstractGangTask("Count Aggregation"), 3213 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 3214 _max_worker_id(max_worker_id), 3215 _active_workers(n_workers), 3216 _hrclaimer(_active_workers) { 3217 } 3218 3219 void work(uint worker_id) { 3220 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 3221 3222 _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer); 3223 } 3224 }; 3225 3226 3227 void ConcurrentMark::aggregate_count_data() { 3228 int n_workers = _g1h->workers()->active_workers(); 3229 3230 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3231 _max_worker_id, n_workers); 3232 3233 _g1h->set_par_threads(n_workers); 3234 _g1h->workers()->run_task(&g1_par_agg_task); 3235 _g1h->set_par_threads(0); 3236 } 3237 3238 // Clear the per-worker arrays used to store the per-region counting data 3239 void ConcurrentMark::clear_all_count_data() { 3240 // Clear the global card bitmap - it will be filled during 3241 // liveness count aggregation (during remark) and the 3242 // final counting task. 3243 _card_bm.clear(); 3244 3245 // Clear the global region bitmap - it will be filled as part 3246 // of the final counting task. 3247 _region_bm.clear(); 3248 3249 uint max_regions = _g1h->max_regions(); 3250 assert(_max_worker_id > 0, "uninitialized"); 3251 3252 for (uint i = 0; i < _max_worker_id; i += 1) { 3253 BitMap* task_card_bm = count_card_bitmap_for(i); 3254 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3255 3256 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3257 assert(marked_bytes_array != NULL, "uninitialized"); 3258 3259 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3260 task_card_bm->clear(); 3261 } 3262 } 3263 3264 void ConcurrentMark::print_stats() { 3265 if (verbose_stats()) { 3266 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3267 for (size_t i = 0; i < _active_tasks; ++i) { 3268 _tasks[i]->print_stats(); 3269 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3270 } 3271 } 3272 } 3273 3274 // abandon current marking iteration due to a Full GC 3275 void ConcurrentMark::abort() { 3276 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 3277 // concurrent bitmap clearing. 3278 _nextMarkBitMap->clearAll(); 3279 3280 // Note we cannot clear the previous marking bitmap here 3281 // since VerifyDuringGC verifies the objects marked during 3282 // a full GC against the previous bitmap. 3283 3284 // Clear the liveness counting data 3285 clear_all_count_data(); 3286 // Empty mark stack 3287 reset_marking_state(); 3288 for (uint i = 0; i < _max_worker_id; ++i) { 3289 _tasks[i]->clear_region_fields(); 3290 } 3291 _first_overflow_barrier_sync.abort(); 3292 _second_overflow_barrier_sync.abort(); 3293 const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id(); 3294 if (!gc_id.is_undefined()) { 3295 // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance 3296 // to detect that it was aborted. Only keep track of the first GC id that we aborted. 3297 _aborted_gc_id = gc_id; 3298 } 3299 _has_aborted = true; 3300 3301 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3302 satb_mq_set.abandon_partial_marking(); 3303 // This can be called either during or outside marking, we'll read 3304 // the expected_active value from the SATB queue set. 3305 satb_mq_set.set_active_all_threads( 3306 false, /* new active value */ 3307 satb_mq_set.is_active() /* expected_active */); 3308 3309 _g1h->trace_heap_after_concurrent_cycle(); 3310 _g1h->register_concurrent_cycle_end(); 3311 } 3312 3313 const GCId& ConcurrentMark::concurrent_gc_id() { 3314 if (has_aborted()) { 3315 return _aborted_gc_id; 3316 } 3317 return _g1h->gc_tracer_cm()->gc_id(); 3318 } 3319 3320 static void print_ms_time_info(const char* prefix, const char* name, 3321 NumberSeq& ns) { 3322 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3323 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3324 if (ns.num() > 0) { 3325 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3326 prefix, ns.sd(), ns.maximum()); 3327 } 3328 } 3329 3330 void ConcurrentMark::print_summary_info() { 3331 gclog_or_tty->print_cr(" Concurrent marking:"); 3332 print_ms_time_info(" ", "init marks", _init_times); 3333 print_ms_time_info(" ", "remarks", _remark_times); 3334 { 3335 print_ms_time_info(" ", "final marks", _remark_mark_times); 3336 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3337 3338 } 3339 print_ms_time_info(" ", "cleanups", _cleanup_times); 3340 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3341 _total_counting_time, 3342 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3343 (double)_cleanup_times.num() 3344 : 0.0)); 3345 if (G1ScrubRemSets) { 3346 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3347 _total_rs_scrub_time, 3348 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3349 (double)_cleanup_times.num() 3350 : 0.0)); 3351 } 3352 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3353 (_init_times.sum() + _remark_times.sum() + 3354 _cleanup_times.sum())/1000.0); 3355 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3356 "(%8.2f s marking).", 3357 cmThread()->vtime_accum(), 3358 cmThread()->vtime_mark_accum()); 3359 } 3360 3361 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3362 _parallel_workers->print_worker_threads_on(st); 3363 } 3364 3365 void ConcurrentMark::print_on_error(outputStream* st) const { 3366 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 3367 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 3368 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 3369 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 3370 } 3371 3372 // We take a break if someone is trying to stop the world. 3373 bool ConcurrentMark::do_yield_check(uint worker_id) { 3374 if (SuspendibleThreadSet::should_yield()) { 3375 if (worker_id == 0) { 3376 _g1h->g1_policy()->record_concurrent_pause(); 3377 } 3378 SuspendibleThreadSet::yield(); 3379 return true; 3380 } else { 3381 return false; 3382 } 3383 } 3384 3385 #ifndef PRODUCT 3386 // for debugging purposes 3387 void ConcurrentMark::print_finger() { 3388 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3389 p2i(_heap_start), p2i(_heap_end), p2i(_finger)); 3390 for (uint i = 0; i < _max_worker_id; ++i) { 3391 gclog_or_tty->print(" %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger())); 3392 } 3393 gclog_or_tty->cr(); 3394 } 3395 #endif 3396 3397 void CMTask::scan_object(oop obj) { 3398 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3399 3400 if (_cm->verbose_high()) { 3401 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT, 3402 _worker_id, p2i((void*) obj)); 3403 } 3404 3405 size_t obj_size = obj->size(); 3406 _words_scanned += obj_size; 3407 3408 obj->oop_iterate(_cm_oop_closure); 3409 statsOnly( ++_objs_scanned ); 3410 check_limits(); 3411 } 3412 3413 // Closure for iteration over bitmaps 3414 class CMBitMapClosure : public BitMapClosure { 3415 private: 3416 // the bitmap that is being iterated over 3417 CMBitMap* _nextMarkBitMap; 3418 ConcurrentMark* _cm; 3419 CMTask* _task; 3420 3421 public: 3422 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3423 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3424 3425 bool do_bit(size_t offset) { 3426 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3427 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3428 assert( addr < _cm->finger(), "invariant"); 3429 3430 statsOnly( _task->increase_objs_found_on_bitmap() ); 3431 assert(addr >= _task->finger(), "invariant"); 3432 3433 // We move that task's local finger along. 3434 _task->move_finger_to(addr); 3435 3436 _task->scan_object(oop(addr)); 3437 // we only partially drain the local queue and global stack 3438 _task->drain_local_queue(true); 3439 _task->drain_global_stack(true); 3440 3441 // if the has_aborted flag has been raised, we need to bail out of 3442 // the iteration 3443 return !_task->has_aborted(); 3444 } 3445 }; 3446 3447 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3448 ConcurrentMark* cm, 3449 CMTask* task) 3450 : _g1h(g1h), _cm(cm), _task(task) { 3451 assert(_ref_processor == NULL, "should be initialized to NULL"); 3452 3453 if (G1UseConcMarkReferenceProcessing) { 3454 _ref_processor = g1h->ref_processor_cm(); 3455 assert(_ref_processor != NULL, "should not be NULL"); 3456 } 3457 } 3458 3459 void CMTask::setup_for_region(HeapRegion* hr) { 3460 assert(hr != NULL, 3461 "claim_region() should have filtered out NULL regions"); 3462 assert(!hr->is_continues_humongous(), 3463 "claim_region() should have filtered out continues humongous regions"); 3464 3465 if (_cm->verbose_low()) { 3466 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT, 3467 _worker_id, p2i(hr)); 3468 } 3469 3470 _curr_region = hr; 3471 _finger = hr->bottom(); 3472 update_region_limit(); 3473 } 3474 3475 void CMTask::update_region_limit() { 3476 HeapRegion* hr = _curr_region; 3477 HeapWord* bottom = hr->bottom(); 3478 HeapWord* limit = hr->next_top_at_mark_start(); 3479 3480 if (limit == bottom) { 3481 if (_cm->verbose_low()) { 3482 gclog_or_tty->print_cr("[%u] found an empty region " 3483 "["PTR_FORMAT", "PTR_FORMAT")", 3484 _worker_id, p2i(bottom), p2i(limit)); 3485 } 3486 // The region was collected underneath our feet. 3487 // We set the finger to bottom to ensure that the bitmap 3488 // iteration that will follow this will not do anything. 3489 // (this is not a condition that holds when we set the region up, 3490 // as the region is not supposed to be empty in the first place) 3491 _finger = bottom; 3492 } else if (limit >= _region_limit) { 3493 assert(limit >= _finger, "peace of mind"); 3494 } else { 3495 assert(limit < _region_limit, "only way to get here"); 3496 // This can happen under some pretty unusual circumstances. An 3497 // evacuation pause empties the region underneath our feet (NTAMS 3498 // at bottom). We then do some allocation in the region (NTAMS 3499 // stays at bottom), followed by the region being used as a GC 3500 // alloc region (NTAMS will move to top() and the objects 3501 // originally below it will be grayed). All objects now marked in 3502 // the region are explicitly grayed, if below the global finger, 3503 // and we do not need in fact to scan anything else. So, we simply 3504 // set _finger to be limit to ensure that the bitmap iteration 3505 // doesn't do anything. 3506 _finger = limit; 3507 } 3508 3509 _region_limit = limit; 3510 } 3511 3512 void CMTask::giveup_current_region() { 3513 assert(_curr_region != NULL, "invariant"); 3514 if (_cm->verbose_low()) { 3515 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT, 3516 _worker_id, p2i(_curr_region)); 3517 } 3518 clear_region_fields(); 3519 } 3520 3521 void CMTask::clear_region_fields() { 3522 // Values for these three fields that indicate that we're not 3523 // holding on to a region. 3524 _curr_region = NULL; 3525 _finger = NULL; 3526 _region_limit = NULL; 3527 } 3528 3529 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3530 if (cm_oop_closure == NULL) { 3531 assert(_cm_oop_closure != NULL, "invariant"); 3532 } else { 3533 assert(_cm_oop_closure == NULL, "invariant"); 3534 } 3535 _cm_oop_closure = cm_oop_closure; 3536 } 3537 3538 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3539 guarantee(nextMarkBitMap != NULL, "invariant"); 3540 3541 if (_cm->verbose_low()) { 3542 gclog_or_tty->print_cr("[%u] resetting", _worker_id); 3543 } 3544 3545 _nextMarkBitMap = nextMarkBitMap; 3546 clear_region_fields(); 3547 3548 _calls = 0; 3549 _elapsed_time_ms = 0.0; 3550 _termination_time_ms = 0.0; 3551 _termination_start_time_ms = 0.0; 3552 3553 #if _MARKING_STATS_ 3554 _aborted = 0; 3555 _aborted_overflow = 0; 3556 _aborted_cm_aborted = 0; 3557 _aborted_yield = 0; 3558 _aborted_timed_out = 0; 3559 _aborted_satb = 0; 3560 _aborted_termination = 0; 3561 _steal_attempts = 0; 3562 _steals = 0; 3563 _local_pushes = 0; 3564 _local_pops = 0; 3565 _local_max_size = 0; 3566 _objs_scanned = 0; 3567 _global_pushes = 0; 3568 _global_pops = 0; 3569 _global_max_size = 0; 3570 _global_transfers_to = 0; 3571 _global_transfers_from = 0; 3572 _regions_claimed = 0; 3573 _objs_found_on_bitmap = 0; 3574 _satb_buffers_processed = 0; 3575 #endif // _MARKING_STATS_ 3576 } 3577 3578 bool CMTask::should_exit_termination() { 3579 regular_clock_call(); 3580 // This is called when we are in the termination protocol. We should 3581 // quit if, for some reason, this task wants to abort or the global 3582 // stack is not empty (this means that we can get work from it). 3583 return !_cm->mark_stack_empty() || has_aborted(); 3584 } 3585 3586 void CMTask::reached_limit() { 3587 assert(_words_scanned >= _words_scanned_limit || 3588 _refs_reached >= _refs_reached_limit , 3589 "shouldn't have been called otherwise"); 3590 regular_clock_call(); 3591 } 3592 3593 void CMTask::regular_clock_call() { 3594 if (has_aborted()) return; 3595 3596 // First, we need to recalculate the words scanned and refs reached 3597 // limits for the next clock call. 3598 recalculate_limits(); 3599 3600 // During the regular clock call we do the following 3601 3602 // (1) If an overflow has been flagged, then we abort. 3603 if (_cm->has_overflown()) { 3604 set_has_aborted(); 3605 return; 3606 } 3607 3608 // If we are not concurrent (i.e. we're doing remark) we don't need 3609 // to check anything else. The other steps are only needed during 3610 // the concurrent marking phase. 3611 if (!concurrent()) return; 3612 3613 // (2) If marking has been aborted for Full GC, then we also abort. 3614 if (_cm->has_aborted()) { 3615 set_has_aborted(); 3616 statsOnly( ++_aborted_cm_aborted ); 3617 return; 3618 } 3619 3620 double curr_time_ms = os::elapsedVTime() * 1000.0; 3621 3622 // (3) If marking stats are enabled, then we update the step history. 3623 #if _MARKING_STATS_ 3624 if (_words_scanned >= _words_scanned_limit) { 3625 ++_clock_due_to_scanning; 3626 } 3627 if (_refs_reached >= _refs_reached_limit) { 3628 ++_clock_due_to_marking; 3629 } 3630 3631 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3632 _interval_start_time_ms = curr_time_ms; 3633 _all_clock_intervals_ms.add(last_interval_ms); 3634 3635 if (_cm->verbose_medium()) { 3636 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, " 3637 "scanned = "SIZE_FORMAT"%s, refs reached = "SIZE_FORMAT"%s", 3638 _worker_id, last_interval_ms, 3639 _words_scanned, 3640 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3641 _refs_reached, 3642 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3643 } 3644 #endif // _MARKING_STATS_ 3645 3646 // (4) We check whether we should yield. If we have to, then we abort. 3647 if (SuspendibleThreadSet::should_yield()) { 3648 // We should yield. To do this we abort the task. The caller is 3649 // responsible for yielding. 3650 set_has_aborted(); 3651 statsOnly( ++_aborted_yield ); 3652 return; 3653 } 3654 3655 // (5) We check whether we've reached our time quota. If we have, 3656 // then we abort. 3657 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3658 if (elapsed_time_ms > _time_target_ms) { 3659 set_has_aborted(); 3660 _has_timed_out = true; 3661 statsOnly( ++_aborted_timed_out ); 3662 return; 3663 } 3664 3665 // (6) Finally, we check whether there are enough completed STAB 3666 // buffers available for processing. If there are, we abort. 3667 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3668 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3669 if (_cm->verbose_low()) { 3670 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers", 3671 _worker_id); 3672 } 3673 // we do need to process SATB buffers, we'll abort and restart 3674 // the marking task to do so 3675 set_has_aborted(); 3676 statsOnly( ++_aborted_satb ); 3677 return; 3678 } 3679 } 3680 3681 void CMTask::recalculate_limits() { 3682 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3683 _words_scanned_limit = _real_words_scanned_limit; 3684 3685 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3686 _refs_reached_limit = _real_refs_reached_limit; 3687 } 3688 3689 void CMTask::decrease_limits() { 3690 // This is called when we believe that we're going to do an infrequent 3691 // operation which will increase the per byte scanned cost (i.e. move 3692 // entries to/from the global stack). It basically tries to decrease the 3693 // scanning limit so that the clock is called earlier. 3694 3695 if (_cm->verbose_medium()) { 3696 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id); 3697 } 3698 3699 _words_scanned_limit = _real_words_scanned_limit - 3700 3 * words_scanned_period / 4; 3701 _refs_reached_limit = _real_refs_reached_limit - 3702 3 * refs_reached_period / 4; 3703 } 3704 3705 void CMTask::move_entries_to_global_stack() { 3706 // local array where we'll store the entries that will be popped 3707 // from the local queue 3708 oop buffer[global_stack_transfer_size]; 3709 3710 int n = 0; 3711 oop obj; 3712 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3713 buffer[n] = obj; 3714 ++n; 3715 } 3716 3717 if (n > 0) { 3718 // we popped at least one entry from the local queue 3719 3720 statsOnly( ++_global_transfers_to; _local_pops += n ); 3721 3722 if (!_cm->mark_stack_push(buffer, n)) { 3723 if (_cm->verbose_low()) { 3724 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow", 3725 _worker_id); 3726 } 3727 set_has_aborted(); 3728 } else { 3729 // the transfer was successful 3730 3731 if (_cm->verbose_medium()) { 3732 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack", 3733 _worker_id, n); 3734 } 3735 statsOnly( size_t tmp_size = _cm->mark_stack_size(); 3736 if (tmp_size > _global_max_size) { 3737 _global_max_size = tmp_size; 3738 } 3739 _global_pushes += n ); 3740 } 3741 } 3742 3743 // this operation was quite expensive, so decrease the limits 3744 decrease_limits(); 3745 } 3746 3747 void CMTask::get_entries_from_global_stack() { 3748 // local array where we'll store the entries that will be popped 3749 // from the global stack. 3750 oop buffer[global_stack_transfer_size]; 3751 int n; 3752 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3753 assert(n <= global_stack_transfer_size, 3754 "we should not pop more than the given limit"); 3755 if (n > 0) { 3756 // yes, we did actually pop at least one entry 3757 3758 statsOnly( ++_global_transfers_from; _global_pops += n ); 3759 if (_cm->verbose_medium()) { 3760 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack", 3761 _worker_id, n); 3762 } 3763 for (int i = 0; i < n; ++i) { 3764 bool success = _task_queue->push(buffer[i]); 3765 // We only call this when the local queue is empty or under a 3766 // given target limit. So, we do not expect this push to fail. 3767 assert(success, "invariant"); 3768 } 3769 3770 statsOnly( size_t tmp_size = (size_t)_task_queue->size(); 3771 if (tmp_size > _local_max_size) { 3772 _local_max_size = tmp_size; 3773 } 3774 _local_pushes += n ); 3775 } 3776 3777 // this operation was quite expensive, so decrease the limits 3778 decrease_limits(); 3779 } 3780 3781 void CMTask::drain_local_queue(bool partially) { 3782 if (has_aborted()) return; 3783 3784 // Decide what the target size is, depending whether we're going to 3785 // drain it partially (so that other tasks can steal if they run out 3786 // of things to do) or totally (at the very end). 3787 size_t target_size; 3788 if (partially) { 3789 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3790 } else { 3791 target_size = 0; 3792 } 3793 3794 if (_task_queue->size() > target_size) { 3795 if (_cm->verbose_high()) { 3796 gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT, 3797 _worker_id, target_size); 3798 } 3799 3800 oop obj; 3801 bool ret = _task_queue->pop_local(obj); 3802 while (ret) { 3803 statsOnly( ++_local_pops ); 3804 3805 if (_cm->verbose_high()) { 3806 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id, 3807 p2i((void*) obj)); 3808 } 3809 3810 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3811 assert(!_g1h->is_on_master_free_list( 3812 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3813 3814 scan_object(obj); 3815 3816 if (_task_queue->size() <= target_size || has_aborted()) { 3817 ret = false; 3818 } else { 3819 ret = _task_queue->pop_local(obj); 3820 } 3821 } 3822 3823 if (_cm->verbose_high()) { 3824 gclog_or_tty->print_cr("[%u] drained local queue, size = %u", 3825 _worker_id, _task_queue->size()); 3826 } 3827 } 3828 } 3829 3830 void CMTask::drain_global_stack(bool partially) { 3831 if (has_aborted()) return; 3832 3833 // We have a policy to drain the local queue before we attempt to 3834 // drain the global stack. 3835 assert(partially || _task_queue->size() == 0, "invariant"); 3836 3837 // Decide what the target size is, depending whether we're going to 3838 // drain it partially (so that other tasks can steal if they run out 3839 // of things to do) or totally (at the very end). Notice that, 3840 // because we move entries from the global stack in chunks or 3841 // because another task might be doing the same, we might in fact 3842 // drop below the target. But, this is not a problem. 3843 size_t target_size; 3844 if (partially) { 3845 target_size = _cm->partial_mark_stack_size_target(); 3846 } else { 3847 target_size = 0; 3848 } 3849 3850 if (_cm->mark_stack_size() > target_size) { 3851 if (_cm->verbose_low()) { 3852 gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT, 3853 _worker_id, target_size); 3854 } 3855 3856 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3857 get_entries_from_global_stack(); 3858 drain_local_queue(partially); 3859 } 3860 3861 if (_cm->verbose_low()) { 3862 gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT, 3863 _worker_id, _cm->mark_stack_size()); 3864 } 3865 } 3866 } 3867 3868 // SATB Queue has several assumptions on whether to call the par or 3869 // non-par versions of the methods. this is why some of the code is 3870 // replicated. We should really get rid of the single-threaded version 3871 // of the code to simplify things. 3872 void CMTask::drain_satb_buffers() { 3873 if (has_aborted()) return; 3874 3875 // We set this so that the regular clock knows that we're in the 3876 // middle of draining buffers and doesn't set the abort flag when it 3877 // notices that SATB buffers are available for draining. It'd be 3878 // very counter productive if it did that. :-) 3879 _draining_satb_buffers = true; 3880 3881 CMObjectClosure oc(this); 3882 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3883 satb_mq_set.set_closure(_worker_id, &oc); 3884 3885 // This keeps claiming and applying the closure to completed buffers 3886 // until we run out of buffers or we need to abort. 3887 while (!has_aborted() && 3888 satb_mq_set.apply_closure_to_completed_buffer(_worker_id)) { 3889 if (_cm->verbose_medium()) { 3890 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3891 } 3892 statsOnly( ++_satb_buffers_processed ); 3893 regular_clock_call(); 3894 } 3895 3896 _draining_satb_buffers = false; 3897 3898 assert(has_aborted() || 3899 concurrent() || 3900 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3901 3902 satb_mq_set.set_closure(_worker_id, NULL); 3903 3904 // again, this was a potentially expensive operation, decrease the 3905 // limits to get the regular clock call early 3906 decrease_limits(); 3907 } 3908 3909 void CMTask::print_stats() { 3910 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 3911 _worker_id, _calls); 3912 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3913 _elapsed_time_ms, _termination_time_ms); 3914 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3915 _step_times_ms.num(), _step_times_ms.avg(), 3916 _step_times_ms.sd()); 3917 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3918 _step_times_ms.maximum(), _step_times_ms.sum()); 3919 3920 #if _MARKING_STATS_ 3921 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3922 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3923 _all_clock_intervals_ms.sd()); 3924 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3925 _all_clock_intervals_ms.maximum(), 3926 _all_clock_intervals_ms.sum()); 3927 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = " SIZE_FORMAT ", marking = " SIZE_FORMAT, 3928 _clock_due_to_scanning, _clock_due_to_marking); 3929 gclog_or_tty->print_cr(" Objects: scanned = " SIZE_FORMAT ", found on the bitmap = " SIZE_FORMAT, 3930 _objs_scanned, _objs_found_on_bitmap); 3931 gclog_or_tty->print_cr(" Local Queue: pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT, 3932 _local_pushes, _local_pops, _local_max_size); 3933 gclog_or_tty->print_cr(" Global Stack: pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT, 3934 _global_pushes, _global_pops, _global_max_size); 3935 gclog_or_tty->print_cr(" transfers to = " SIZE_FORMAT ", transfers from = " SIZE_FORMAT, 3936 _global_transfers_to,_global_transfers_from); 3937 gclog_or_tty->print_cr(" Regions: claimed = " SIZE_FORMAT, _regions_claimed); 3938 gclog_or_tty->print_cr(" SATB buffers: processed = " SIZE_FORMAT, _satb_buffers_processed); 3939 gclog_or_tty->print_cr(" Steals: attempts = " SIZE_FORMAT ", successes = " SIZE_FORMAT, 3940 _steal_attempts, _steals); 3941 gclog_or_tty->print_cr(" Aborted: " SIZE_FORMAT ", due to", _aborted); 3942 gclog_or_tty->print_cr(" overflow: " SIZE_FORMAT ", global abort: " SIZE_FORMAT ", yield: " SIZE_FORMAT, 3943 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3944 gclog_or_tty->print_cr(" time out: " SIZE_FORMAT ", SATB: " SIZE_FORMAT ", termination: " SIZE_FORMAT, 3945 _aborted_timed_out, _aborted_satb, _aborted_termination); 3946 #endif // _MARKING_STATS_ 3947 } 3948 3949 /***************************************************************************** 3950 3951 The do_marking_step(time_target_ms, ...) method is the building 3952 block of the parallel marking framework. It can be called in parallel 3953 with other invocations of do_marking_step() on different tasks 3954 (but only one per task, obviously) and concurrently with the 3955 mutator threads, or during remark, hence it eliminates the need 3956 for two versions of the code. When called during remark, it will 3957 pick up from where the task left off during the concurrent marking 3958 phase. Interestingly, tasks are also claimable during evacuation 3959 pauses too, since do_marking_step() ensures that it aborts before 3960 it needs to yield. 3961 3962 The data structures that it uses to do marking work are the 3963 following: 3964 3965 (1) Marking Bitmap. If there are gray objects that appear only 3966 on the bitmap (this happens either when dealing with an overflow 3967 or when the initial marking phase has simply marked the roots 3968 and didn't push them on the stack), then tasks claim heap 3969 regions whose bitmap they then scan to find gray objects. A 3970 global finger indicates where the end of the last claimed region 3971 is. A local finger indicates how far into the region a task has 3972 scanned. The two fingers are used to determine how to gray an 3973 object (i.e. whether simply marking it is OK, as it will be 3974 visited by a task in the future, or whether it needs to be also 3975 pushed on a stack). 3976 3977 (2) Local Queue. The local queue of the task which is accessed 3978 reasonably efficiently by the task. Other tasks can steal from 3979 it when they run out of work. Throughout the marking phase, a 3980 task attempts to keep its local queue short but not totally 3981 empty, so that entries are available for stealing by other 3982 tasks. Only when there is no more work, a task will totally 3983 drain its local queue. 3984 3985 (3) Global Mark Stack. This handles local queue overflow. During 3986 marking only sets of entries are moved between it and the local 3987 queues, as access to it requires a mutex and more fine-grain 3988 interaction with it which might cause contention. If it 3989 overflows, then the marking phase should restart and iterate 3990 over the bitmap to identify gray objects. Throughout the marking 3991 phase, tasks attempt to keep the global mark stack at a small 3992 length but not totally empty, so that entries are available for 3993 popping by other tasks. Only when there is no more work, tasks 3994 will totally drain the global mark stack. 3995 3996 (4) SATB Buffer Queue. This is where completed SATB buffers are 3997 made available. Buffers are regularly removed from this queue 3998 and scanned for roots, so that the queue doesn't get too 3999 long. During remark, all completed buffers are processed, as 4000 well as the filled in parts of any uncompleted buffers. 4001 4002 The do_marking_step() method tries to abort when the time target 4003 has been reached. There are a few other cases when the 4004 do_marking_step() method also aborts: 4005 4006 (1) When the marking phase has been aborted (after a Full GC). 4007 4008 (2) When a global overflow (on the global stack) has been 4009 triggered. Before the task aborts, it will actually sync up with 4010 the other tasks to ensure that all the marking data structures 4011 (local queues, stacks, fingers etc.) are re-initialized so that 4012 when do_marking_step() completes, the marking phase can 4013 immediately restart. 4014 4015 (3) When enough completed SATB buffers are available. The 4016 do_marking_step() method only tries to drain SATB buffers right 4017 at the beginning. So, if enough buffers are available, the 4018 marking step aborts and the SATB buffers are processed at 4019 the beginning of the next invocation. 4020 4021 (4) To yield. when we have to yield then we abort and yield 4022 right at the end of do_marking_step(). This saves us from a lot 4023 of hassle as, by yielding we might allow a Full GC. If this 4024 happens then objects will be compacted underneath our feet, the 4025 heap might shrink, etc. We save checking for this by just 4026 aborting and doing the yield right at the end. 4027 4028 From the above it follows that the do_marking_step() method should 4029 be called in a loop (or, otherwise, regularly) until it completes. 4030 4031 If a marking step completes without its has_aborted() flag being 4032 true, it means it has completed the current marking phase (and 4033 also all other marking tasks have done so and have all synced up). 4034 4035 A method called regular_clock_call() is invoked "regularly" (in 4036 sub ms intervals) throughout marking. It is this clock method that 4037 checks all the abort conditions which were mentioned above and 4038 decides when the task should abort. A work-based scheme is used to 4039 trigger this clock method: when the number of object words the 4040 marking phase has scanned or the number of references the marking 4041 phase has visited reach a given limit. Additional invocations to 4042 the method clock have been planted in a few other strategic places 4043 too. The initial reason for the clock method was to avoid calling 4044 vtime too regularly, as it is quite expensive. So, once it was in 4045 place, it was natural to piggy-back all the other conditions on it 4046 too and not constantly check them throughout the code. 4047 4048 If do_termination is true then do_marking_step will enter its 4049 termination protocol. 4050 4051 The value of is_serial must be true when do_marking_step is being 4052 called serially (i.e. by the VMThread) and do_marking_step should 4053 skip any synchronization in the termination and overflow code. 4054 Examples include the serial remark code and the serial reference 4055 processing closures. 4056 4057 The value of is_serial must be false when do_marking_step is 4058 being called by any of the worker threads in a work gang. 4059 Examples include the concurrent marking code (CMMarkingTask), 4060 the MT remark code, and the MT reference processing closures. 4061 4062 *****************************************************************************/ 4063 4064 void CMTask::do_marking_step(double time_target_ms, 4065 bool do_termination, 4066 bool is_serial) { 4067 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 4068 assert(concurrent() == _cm->concurrent(), "they should be the same"); 4069 4070 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4071 assert(_task_queues != NULL, "invariant"); 4072 assert(_task_queue != NULL, "invariant"); 4073 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 4074 4075 assert(!_claimed, 4076 "only one thread should claim this task at any one time"); 4077 4078 // OK, this doesn't safeguard again all possible scenarios, as it is 4079 // possible for two threads to set the _claimed flag at the same 4080 // time. But it is only for debugging purposes anyway and it will 4081 // catch most problems. 4082 _claimed = true; 4083 4084 _start_time_ms = os::elapsedVTime() * 1000.0; 4085 statsOnly( _interval_start_time_ms = _start_time_ms ); 4086 4087 // If do_stealing is true then do_marking_step will attempt to 4088 // steal work from the other CMTasks. It only makes sense to 4089 // enable stealing when the termination protocol is enabled 4090 // and do_marking_step() is not being called serially. 4091 bool do_stealing = do_termination && !is_serial; 4092 4093 double diff_prediction_ms = 4094 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4095 _time_target_ms = time_target_ms - diff_prediction_ms; 4096 4097 // set up the variables that are used in the work-based scheme to 4098 // call the regular clock method 4099 _words_scanned = 0; 4100 _refs_reached = 0; 4101 recalculate_limits(); 4102 4103 // clear all flags 4104 clear_has_aborted(); 4105 _has_timed_out = false; 4106 _draining_satb_buffers = false; 4107 4108 ++_calls; 4109 4110 if (_cm->verbose_low()) { 4111 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, " 4112 "target = %1.2lfms >>>>>>>>>>", 4113 _worker_id, _calls, _time_target_ms); 4114 } 4115 4116 // Set up the bitmap and oop closures. Anything that uses them is 4117 // eventually called from this method, so it is OK to allocate these 4118 // statically. 4119 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4120 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 4121 set_cm_oop_closure(&cm_oop_closure); 4122 4123 if (_cm->has_overflown()) { 4124 // This can happen if the mark stack overflows during a GC pause 4125 // and this task, after a yield point, restarts. We have to abort 4126 // as we need to get into the overflow protocol which happens 4127 // right at the end of this task. 4128 set_has_aborted(); 4129 } 4130 4131 // First drain any available SATB buffers. After this, we will not 4132 // look at SATB buffers before the next invocation of this method. 4133 // If enough completed SATB buffers are queued up, the regular clock 4134 // will abort this task so that it restarts. 4135 drain_satb_buffers(); 4136 // ...then partially drain the local queue and the global stack 4137 drain_local_queue(true); 4138 drain_global_stack(true); 4139 4140 do { 4141 if (!has_aborted() && _curr_region != NULL) { 4142 // This means that we're already holding on to a region. 4143 assert(_finger != NULL, "if region is not NULL, then the finger " 4144 "should not be NULL either"); 4145 4146 // We might have restarted this task after an evacuation pause 4147 // which might have evacuated the region we're holding on to 4148 // underneath our feet. Let's read its limit again to make sure 4149 // that we do not iterate over a region of the heap that 4150 // contains garbage (update_region_limit() will also move 4151 // _finger to the start of the region if it is found empty). 4152 update_region_limit(); 4153 // We will start from _finger not from the start of the region, 4154 // as we might be restarting this task after aborting half-way 4155 // through scanning this region. In this case, _finger points to 4156 // the address where we last found a marked object. If this is a 4157 // fresh region, _finger points to start(). 4158 MemRegion mr = MemRegion(_finger, _region_limit); 4159 4160 if (_cm->verbose_low()) { 4161 gclog_or_tty->print_cr("[%u] we're scanning part " 4162 "["PTR_FORMAT", "PTR_FORMAT") " 4163 "of region "HR_FORMAT, 4164 _worker_id, p2i(_finger), p2i(_region_limit), 4165 HR_FORMAT_PARAMS(_curr_region)); 4166 } 4167 4168 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 4169 "humongous regions should go around loop once only"); 4170 4171 // Some special cases: 4172 // If the memory region is empty, we can just give up the region. 4173 // If the current region is humongous then we only need to check 4174 // the bitmap for the bit associated with the start of the object, 4175 // scan the object if it's live, and give up the region. 4176 // Otherwise, let's iterate over the bitmap of the part of the region 4177 // that is left. 4178 // If the iteration is successful, give up the region. 4179 if (mr.is_empty()) { 4180 giveup_current_region(); 4181 regular_clock_call(); 4182 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 4183 if (_nextMarkBitMap->isMarked(mr.start())) { 4184 // The object is marked - apply the closure 4185 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 4186 bitmap_closure.do_bit(offset); 4187 } 4188 // Even if this task aborted while scanning the humongous object 4189 // we can (and should) give up the current region. 4190 giveup_current_region(); 4191 regular_clock_call(); 4192 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4193 giveup_current_region(); 4194 regular_clock_call(); 4195 } else { 4196 assert(has_aborted(), "currently the only way to do so"); 4197 // The only way to abort the bitmap iteration is to return 4198 // false from the do_bit() method. However, inside the 4199 // do_bit() method we move the _finger to point to the 4200 // object currently being looked at. So, if we bail out, we 4201 // have definitely set _finger to something non-null. 4202 assert(_finger != NULL, "invariant"); 4203 4204 // Region iteration was actually aborted. So now _finger 4205 // points to the address of the object we last scanned. If we 4206 // leave it there, when we restart this task, we will rescan 4207 // the object. It is easy to avoid this. We move the finger by 4208 // enough to point to the next possible object header (the 4209 // bitmap knows by how much we need to move it as it knows its 4210 // granularity). 4211 assert(_finger < _region_limit, "invariant"); 4212 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 4213 // Check if bitmap iteration was aborted while scanning the last object 4214 if (new_finger >= _region_limit) { 4215 giveup_current_region(); 4216 } else { 4217 move_finger_to(new_finger); 4218 } 4219 } 4220 } 4221 // At this point we have either completed iterating over the 4222 // region we were holding on to, or we have aborted. 4223 4224 // We then partially drain the local queue and the global stack. 4225 // (Do we really need this?) 4226 drain_local_queue(true); 4227 drain_global_stack(true); 4228 4229 // Read the note on the claim_region() method on why it might 4230 // return NULL with potentially more regions available for 4231 // claiming and why we have to check out_of_regions() to determine 4232 // whether we're done or not. 4233 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4234 // We are going to try to claim a new region. We should have 4235 // given up on the previous one. 4236 // Separated the asserts so that we know which one fires. 4237 assert(_curr_region == NULL, "invariant"); 4238 assert(_finger == NULL, "invariant"); 4239 assert(_region_limit == NULL, "invariant"); 4240 if (_cm->verbose_low()) { 4241 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id); 4242 } 4243 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 4244 if (claimed_region != NULL) { 4245 // Yes, we managed to claim one 4246 statsOnly( ++_regions_claimed ); 4247 4248 if (_cm->verbose_low()) { 4249 gclog_or_tty->print_cr("[%u] we successfully claimed " 4250 "region "PTR_FORMAT, 4251 _worker_id, p2i(claimed_region)); 4252 } 4253 4254 setup_for_region(claimed_region); 4255 assert(_curr_region == claimed_region, "invariant"); 4256 } 4257 // It is important to call the regular clock here. It might take 4258 // a while to claim a region if, for example, we hit a large 4259 // block of empty regions. So we need to call the regular clock 4260 // method once round the loop to make sure it's called 4261 // frequently enough. 4262 regular_clock_call(); 4263 } 4264 4265 if (!has_aborted() && _curr_region == NULL) { 4266 assert(_cm->out_of_regions(), 4267 "at this point we should be out of regions"); 4268 } 4269 } while ( _curr_region != NULL && !has_aborted()); 4270 4271 if (!has_aborted()) { 4272 // We cannot check whether the global stack is empty, since other 4273 // tasks might be pushing objects to it concurrently. 4274 assert(_cm->out_of_regions(), 4275 "at this point we should be out of regions"); 4276 4277 if (_cm->verbose_low()) { 4278 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id); 4279 } 4280 4281 // Try to reduce the number of available SATB buffers so that 4282 // remark has less work to do. 4283 drain_satb_buffers(); 4284 } 4285 4286 // Since we've done everything else, we can now totally drain the 4287 // local queue and global stack. 4288 drain_local_queue(false); 4289 drain_global_stack(false); 4290 4291 // Attempt at work stealing from other task's queues. 4292 if (do_stealing && !has_aborted()) { 4293 // We have not aborted. This means that we have finished all that 4294 // we could. Let's try to do some stealing... 4295 4296 // We cannot check whether the global stack is empty, since other 4297 // tasks might be pushing objects to it concurrently. 4298 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4299 "only way to reach here"); 4300 4301 if (_cm->verbose_low()) { 4302 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id); 4303 } 4304 4305 while (!has_aborted()) { 4306 oop obj; 4307 statsOnly( ++_steal_attempts ); 4308 4309 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 4310 if (_cm->verbose_medium()) { 4311 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully", 4312 _worker_id, p2i((void*) obj)); 4313 } 4314 4315 statsOnly( ++_steals ); 4316 4317 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4318 "any stolen object should be marked"); 4319 scan_object(obj); 4320 4321 // And since we're towards the end, let's totally drain the 4322 // local queue and global stack. 4323 drain_local_queue(false); 4324 drain_global_stack(false); 4325 } else { 4326 break; 4327 } 4328 } 4329 } 4330 4331 // If we are about to wrap up and go into termination, check if we 4332 // should raise the overflow flag. 4333 if (do_termination && !has_aborted()) { 4334 if (_cm->force_overflow()->should_force()) { 4335 _cm->set_has_overflown(); 4336 regular_clock_call(); 4337 } 4338 } 4339 4340 // We still haven't aborted. Now, let's try to get into the 4341 // termination protocol. 4342 if (do_termination && !has_aborted()) { 4343 // We cannot check whether the global stack is empty, since other 4344 // tasks might be concurrently pushing objects on it. 4345 // Separated the asserts so that we know which one fires. 4346 assert(_cm->out_of_regions(), "only way to reach here"); 4347 assert(_task_queue->size() == 0, "only way to reach here"); 4348 4349 if (_cm->verbose_low()) { 4350 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id); 4351 } 4352 4353 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4354 4355 // The CMTask class also extends the TerminatorTerminator class, 4356 // hence its should_exit_termination() method will also decide 4357 // whether to exit the termination protocol or not. 4358 bool finished = (is_serial || 4359 _cm->terminator()->offer_termination(this)); 4360 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4361 _termination_time_ms += 4362 termination_end_time_ms - _termination_start_time_ms; 4363 4364 if (finished) { 4365 // We're all done. 4366 4367 if (_worker_id == 0) { 4368 // let's allow task 0 to do this 4369 if (concurrent()) { 4370 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4371 // we need to set this to false before the next 4372 // safepoint. This way we ensure that the marking phase 4373 // doesn't observe any more heap expansions. 4374 _cm->clear_concurrent_marking_in_progress(); 4375 } 4376 } 4377 4378 // We can now guarantee that the global stack is empty, since 4379 // all other tasks have finished. We separated the guarantees so 4380 // that, if a condition is false, we can immediately find out 4381 // which one. 4382 guarantee(_cm->out_of_regions(), "only way to reach here"); 4383 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4384 guarantee(_task_queue->size() == 0, "only way to reach here"); 4385 guarantee(!_cm->has_overflown(), "only way to reach here"); 4386 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4387 4388 if (_cm->verbose_low()) { 4389 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id); 4390 } 4391 } else { 4392 // Apparently there's more work to do. Let's abort this task. It 4393 // will restart it and we can hopefully find more things to do. 4394 4395 if (_cm->verbose_low()) { 4396 gclog_or_tty->print_cr("[%u] apparently there is more work to do", 4397 _worker_id); 4398 } 4399 4400 set_has_aborted(); 4401 statsOnly( ++_aborted_termination ); 4402 } 4403 } 4404 4405 // Mainly for debugging purposes to make sure that a pointer to the 4406 // closure which was statically allocated in this frame doesn't 4407 // escape it by accident. 4408 set_cm_oop_closure(NULL); 4409 double end_time_ms = os::elapsedVTime() * 1000.0; 4410 double elapsed_time_ms = end_time_ms - _start_time_ms; 4411 // Update the step history. 4412 _step_times_ms.add(elapsed_time_ms); 4413 4414 if (has_aborted()) { 4415 // The task was aborted for some reason. 4416 4417 statsOnly( ++_aborted ); 4418 4419 if (_has_timed_out) { 4420 double diff_ms = elapsed_time_ms - _time_target_ms; 4421 // Keep statistics of how well we did with respect to hitting 4422 // our target only if we actually timed out (if we aborted for 4423 // other reasons, then the results might get skewed). 4424 _marking_step_diffs_ms.add(diff_ms); 4425 } 4426 4427 if (_cm->has_overflown()) { 4428 // This is the interesting one. We aborted because a global 4429 // overflow was raised. This means we have to restart the 4430 // marking phase and start iterating over regions. However, in 4431 // order to do this we have to make sure that all tasks stop 4432 // what they are doing and re-initialize in a safe manner. We 4433 // will achieve this with the use of two barrier sync points. 4434 4435 if (_cm->verbose_low()) { 4436 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id); 4437 } 4438 4439 if (!is_serial) { 4440 // We only need to enter the sync barrier if being called 4441 // from a parallel context 4442 _cm->enter_first_sync_barrier(_worker_id); 4443 4444 // When we exit this sync barrier we know that all tasks have 4445 // stopped doing marking work. So, it's now safe to 4446 // re-initialize our data structures. At the end of this method, 4447 // task 0 will clear the global data structures. 4448 } 4449 4450 statsOnly( ++_aborted_overflow ); 4451 4452 // We clear the local state of this task... 4453 clear_region_fields(); 4454 4455 if (!is_serial) { 4456 // ...and enter the second barrier. 4457 _cm->enter_second_sync_barrier(_worker_id); 4458 } 4459 // At this point, if we're during the concurrent phase of 4460 // marking, everything has been re-initialized and we're 4461 // ready to restart. 4462 } 4463 4464 if (_cm->verbose_low()) { 4465 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4466 "elapsed = %1.2lfms <<<<<<<<<<", 4467 _worker_id, _time_target_ms, elapsed_time_ms); 4468 if (_cm->has_aborted()) { 4469 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========", 4470 _worker_id); 4471 } 4472 } 4473 } else { 4474 if (_cm->verbose_low()) { 4475 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4476 "elapsed = %1.2lfms <<<<<<<<<<", 4477 _worker_id, _time_target_ms, elapsed_time_ms); 4478 } 4479 } 4480 4481 _claimed = false; 4482 } 4483 4484 CMTask::CMTask(uint worker_id, 4485 ConcurrentMark* cm, 4486 size_t* marked_bytes, 4487 BitMap* card_bm, 4488 CMTaskQueue* task_queue, 4489 CMTaskQueueSet* task_queues) 4490 : _g1h(G1CollectedHeap::heap()), 4491 _worker_id(worker_id), _cm(cm), 4492 _claimed(false), 4493 _nextMarkBitMap(NULL), _hash_seed(17), 4494 _task_queue(task_queue), 4495 _task_queues(task_queues), 4496 _cm_oop_closure(NULL), 4497 _marked_bytes_array(marked_bytes), 4498 _card_bm(card_bm) { 4499 guarantee(task_queue != NULL, "invariant"); 4500 guarantee(task_queues != NULL, "invariant"); 4501 4502 statsOnly( _clock_due_to_scanning = 0; 4503 _clock_due_to_marking = 0 ); 4504 4505 _marking_step_diffs_ms.add(0.5); 4506 } 4507 4508 // These are formatting macros that are used below to ensure 4509 // consistent formatting. The *_H_* versions are used to format the 4510 // header for a particular value and they should be kept consistent 4511 // with the corresponding macro. Also note that most of the macros add 4512 // the necessary white space (as a prefix) which makes them a bit 4513 // easier to compose. 4514 4515 // All the output lines are prefixed with this string to be able to 4516 // identify them easily in a large log file. 4517 #define G1PPRL_LINE_PREFIX "###" 4518 4519 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4520 #ifdef _LP64 4521 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4522 #else // _LP64 4523 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4524 #endif // _LP64 4525 4526 // For per-region info 4527 #define G1PPRL_TYPE_FORMAT " %-4s" 4528 #define G1PPRL_TYPE_H_FORMAT " %4s" 4529 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4530 #define G1PPRL_BYTE_H_FORMAT " %9s" 4531 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4532 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4533 4534 // For summary info 4535 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4536 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4537 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4538 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4539 4540 G1PrintRegionLivenessInfoClosure:: 4541 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4542 : _out(out), 4543 _total_used_bytes(0), _total_capacity_bytes(0), 4544 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4545 _hum_used_bytes(0), _hum_capacity_bytes(0), 4546 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 4547 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 4548 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4549 MemRegion g1_reserved = g1h->g1_reserved(); 4550 double now = os::elapsedTime(); 4551 4552 // Print the header of the output. 4553 _out->cr(); 4554 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4555 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4556 G1PPRL_SUM_ADDR_FORMAT("reserved") 4557 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4558 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 4559 HeapRegion::GrainBytes); 4560 _out->print_cr(G1PPRL_LINE_PREFIX); 4561 _out->print_cr(G1PPRL_LINE_PREFIX 4562 G1PPRL_TYPE_H_FORMAT 4563 G1PPRL_ADDR_BASE_H_FORMAT 4564 G1PPRL_BYTE_H_FORMAT 4565 G1PPRL_BYTE_H_FORMAT 4566 G1PPRL_BYTE_H_FORMAT 4567 G1PPRL_DOUBLE_H_FORMAT 4568 G1PPRL_BYTE_H_FORMAT 4569 G1PPRL_BYTE_H_FORMAT, 4570 "type", "address-range", 4571 "used", "prev-live", "next-live", "gc-eff", 4572 "remset", "code-roots"); 4573 _out->print_cr(G1PPRL_LINE_PREFIX 4574 G1PPRL_TYPE_H_FORMAT 4575 G1PPRL_ADDR_BASE_H_FORMAT 4576 G1PPRL_BYTE_H_FORMAT 4577 G1PPRL_BYTE_H_FORMAT 4578 G1PPRL_BYTE_H_FORMAT 4579 G1PPRL_DOUBLE_H_FORMAT 4580 G1PPRL_BYTE_H_FORMAT 4581 G1PPRL_BYTE_H_FORMAT, 4582 "", "", 4583 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 4584 "(bytes)", "(bytes)"); 4585 } 4586 4587 // It takes as a parameter a reference to one of the _hum_* fields, it 4588 // deduces the corresponding value for a region in a humongous region 4589 // series (either the region size, or what's left if the _hum_* field 4590 // is < the region size), and updates the _hum_* field accordingly. 4591 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4592 size_t bytes = 0; 4593 // The > 0 check is to deal with the prev and next live bytes which 4594 // could be 0. 4595 if (*hum_bytes > 0) { 4596 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4597 *hum_bytes -= bytes; 4598 } 4599 return bytes; 4600 } 4601 4602 // It deduces the values for a region in a humongous region series 4603 // from the _hum_* fields and updates those accordingly. It assumes 4604 // that that _hum_* fields have already been set up from the "starts 4605 // humongous" region and we visit the regions in address order. 4606 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4607 size_t* capacity_bytes, 4608 size_t* prev_live_bytes, 4609 size_t* next_live_bytes) { 4610 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4611 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4612 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4613 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4614 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4615 } 4616 4617 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4618 const char* type = r->get_type_str(); 4619 HeapWord* bottom = r->bottom(); 4620 HeapWord* end = r->end(); 4621 size_t capacity_bytes = r->capacity(); 4622 size_t used_bytes = r->used(); 4623 size_t prev_live_bytes = r->live_bytes(); 4624 size_t next_live_bytes = r->next_live_bytes(); 4625 double gc_eff = r->gc_efficiency(); 4626 size_t remset_bytes = r->rem_set()->mem_size(); 4627 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 4628 4629 if (r->is_starts_humongous()) { 4630 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4631 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4632 "they should have been zeroed after the last time we used them"); 4633 // Set up the _hum_* fields. 4634 _hum_capacity_bytes = capacity_bytes; 4635 _hum_used_bytes = used_bytes; 4636 _hum_prev_live_bytes = prev_live_bytes; 4637 _hum_next_live_bytes = next_live_bytes; 4638 get_hum_bytes(&used_bytes, &capacity_bytes, 4639 &prev_live_bytes, &next_live_bytes); 4640 end = bottom + HeapRegion::GrainWords; 4641 } else if (r->is_continues_humongous()) { 4642 get_hum_bytes(&used_bytes, &capacity_bytes, 4643 &prev_live_bytes, &next_live_bytes); 4644 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4645 } 4646 4647 _total_used_bytes += used_bytes; 4648 _total_capacity_bytes += capacity_bytes; 4649 _total_prev_live_bytes += prev_live_bytes; 4650 _total_next_live_bytes += next_live_bytes; 4651 _total_remset_bytes += remset_bytes; 4652 _total_strong_code_roots_bytes += strong_code_roots_bytes; 4653 4654 // Print a line for this particular region. 4655 _out->print_cr(G1PPRL_LINE_PREFIX 4656 G1PPRL_TYPE_FORMAT 4657 G1PPRL_ADDR_BASE_FORMAT 4658 G1PPRL_BYTE_FORMAT 4659 G1PPRL_BYTE_FORMAT 4660 G1PPRL_BYTE_FORMAT 4661 G1PPRL_DOUBLE_FORMAT 4662 G1PPRL_BYTE_FORMAT 4663 G1PPRL_BYTE_FORMAT, 4664 type, p2i(bottom), p2i(end), 4665 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 4666 remset_bytes, strong_code_roots_bytes); 4667 4668 return false; 4669 } 4670 4671 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4672 // add static memory usages to remembered set sizes 4673 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 4674 // Print the footer of the output. 4675 _out->print_cr(G1PPRL_LINE_PREFIX); 4676 _out->print_cr(G1PPRL_LINE_PREFIX 4677 " SUMMARY" 4678 G1PPRL_SUM_MB_FORMAT("capacity") 4679 G1PPRL_SUM_MB_PERC_FORMAT("used") 4680 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4681 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 4682 G1PPRL_SUM_MB_FORMAT("remset") 4683 G1PPRL_SUM_MB_FORMAT("code-roots"), 4684 bytes_to_mb(_total_capacity_bytes), 4685 bytes_to_mb(_total_used_bytes), 4686 perc(_total_used_bytes, _total_capacity_bytes), 4687 bytes_to_mb(_total_prev_live_bytes), 4688 perc(_total_prev_live_bytes, _total_capacity_bytes), 4689 bytes_to_mb(_total_next_live_bytes), 4690 perc(_total_next_live_bytes, _total_capacity_bytes), 4691 bytes_to_mb(_total_remset_bytes), 4692 bytes_to_mb(_total_strong_code_roots_bytes)); 4693 _out->cr(); 4694 }