1 /* 2 * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "code/codeCache.hpp" 28 #include "gc_implementation/g1/concurrentMark.inline.hpp" 29 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 30 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 31 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 32 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 33 #include "gc_implementation/g1/g1Log.hpp" 34 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 35 #include "gc_implementation/g1/g1RemSet.hpp" 36 #include "gc_implementation/g1/heapRegion.inline.hpp" 37 #include "gc_implementation/g1/heapRegionManager.inline.hpp" 38 #include "gc_implementation/g1/heapRegionRemSet.hpp" 39 #include "gc_implementation/g1/heapRegionSet.inline.hpp" 40 #include "gc_implementation/shared/vmGCOperations.hpp" 41 #include "gc_implementation/shared/gcTimer.hpp" 42 #include "gc_implementation/shared/gcTrace.hpp" 43 #include "gc_implementation/shared/gcTraceTime.hpp" 44 #include "memory/allocation.hpp" 45 #include "memory/genOopClosures.inline.hpp" 46 #include "memory/referencePolicy.hpp" 47 #include "memory/resourceArea.hpp" 48 #include "oops/oop.inline.hpp" 49 #include "runtime/handles.inline.hpp" 50 #include "runtime/java.hpp" 51 #include "runtime/atomic.inline.hpp" 52 #include "runtime/prefetch.inline.hpp" 53 #include "services/memTracker.hpp" 54 55 // Concurrent marking bit map wrapper 56 57 CMBitMapRO::CMBitMapRO(int shifter) : 58 _bm(), 59 _shifter(shifter) { 60 _bmStartWord = 0; 61 _bmWordSize = 0; 62 } 63 64 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 65 const HeapWord* limit) const { 66 // First we must round addr *up* to a possible object boundary. 67 addr = (HeapWord*)align_size_up((intptr_t)addr, 68 HeapWordSize << _shifter); 69 size_t addrOffset = heapWordToOffset(addr); 70 if (limit == NULL) { 71 limit = _bmStartWord + _bmWordSize; 72 } 73 size_t limitOffset = heapWordToOffset(limit); 74 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 75 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 76 assert(nextAddr >= addr, "get_next_one postcondition"); 77 assert(nextAddr == limit || isMarked(nextAddr), 78 "get_next_one postcondition"); 79 return nextAddr; 80 } 81 82 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr, 83 const HeapWord* limit) const { 84 size_t addrOffset = heapWordToOffset(addr); 85 if (limit == NULL) { 86 limit = _bmStartWord + _bmWordSize; 87 } 88 size_t limitOffset = heapWordToOffset(limit); 89 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 90 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 91 assert(nextAddr >= addr, "get_next_one postcondition"); 92 assert(nextAddr == limit || !isMarked(nextAddr), 93 "get_next_one postcondition"); 94 return nextAddr; 95 } 96 97 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 98 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 99 return (int) (diff >> _shifter); 100 } 101 102 #ifndef PRODUCT 103 bool CMBitMapRO::covers(MemRegion heap_rs) const { 104 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 105 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 106 "size inconsistency"); 107 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 108 _bmWordSize == heap_rs.word_size(); 109 } 110 #endif 111 112 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 113 _bm.print_on_error(st, prefix); 114 } 115 116 size_t CMBitMap::compute_size(size_t heap_size) { 117 return heap_size / mark_distance(); 118 } 119 120 size_t CMBitMap::mark_distance() { 121 return MinObjAlignmentInBytes * BitsPerByte; 122 } 123 124 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 125 _bmStartWord = heap.start(); 126 _bmWordSize = heap.word_size(); 127 128 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start()); 129 _bm.set_size(_bmWordSize >> _shifter); 130 131 storage->set_mapping_changed_listener(&_listener); 132 } 133 134 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions) { 135 // We need to clear the bitmap on commit, removing any existing information. 136 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 137 _bm->clearRange(mr); 138 } 139 140 // Closure used for clearing the given mark bitmap. 141 class ClearBitmapHRClosure : public HeapRegionClosure { 142 private: 143 ConcurrentMark* _cm; 144 CMBitMap* _bitmap; 145 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration. 146 public: 147 ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) { 148 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield."); 149 } 150 151 virtual bool doHeapRegion(HeapRegion* r) { 152 size_t const chunk_size_in_words = M / HeapWordSize; 153 154 HeapWord* cur = r->bottom(); 155 HeapWord* const end = r->end(); 156 157 while (cur < end) { 158 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 159 _bitmap->clearRange(mr); 160 161 cur += chunk_size_in_words; 162 163 // Abort iteration if after yielding the marking has been aborted. 164 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) { 165 return true; 166 } 167 // Repeat the asserts from before the start of the closure. We will do them 168 // as asserts here to minimize their overhead on the product. However, we 169 // will have them as guarantees at the beginning / end of the bitmap 170 // clearing to get some checking in the product. 171 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant"); 172 assert(!_may_yield || !G1CollectedHeap::heap()->mark_in_progress(), "invariant"); 173 } 174 175 return false; 176 } 177 }; 178 179 void CMBitMap::clearAll() { 180 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */); 181 G1CollectedHeap::heap()->heap_region_iterate(&cl); 182 guarantee(cl.complete(), "Must have completed iteration."); 183 return; 184 } 185 186 void CMBitMap::markRange(MemRegion mr) { 187 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 188 assert(!mr.is_empty(), "unexpected empty region"); 189 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 190 ((HeapWord *) mr.end())), 191 "markRange memory region end is not card aligned"); 192 // convert address range into offset range 193 _bm.at_put_range(heapWordToOffset(mr.start()), 194 heapWordToOffset(mr.end()), true); 195 } 196 197 void CMBitMap::clearRange(MemRegion mr) { 198 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 199 assert(!mr.is_empty(), "unexpected empty region"); 200 // convert address range into offset range 201 _bm.at_put_range(heapWordToOffset(mr.start()), 202 heapWordToOffset(mr.end()), false); 203 } 204 205 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 206 HeapWord* end_addr) { 207 HeapWord* start = getNextMarkedWordAddress(addr); 208 start = MIN2(start, end_addr); 209 HeapWord* end = getNextUnmarkedWordAddress(start); 210 end = MIN2(end, end_addr); 211 assert(start <= end, "Consistency check"); 212 MemRegion mr(start, end); 213 if (!mr.is_empty()) { 214 clearRange(mr); 215 } 216 return mr; 217 } 218 219 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 220 _base(NULL), _cm(cm) 221 #ifdef ASSERT 222 , _drain_in_progress(false) 223 , _drain_in_progress_yields(false) 224 #endif 225 {} 226 227 bool CMMarkStack::allocate(size_t capacity) { 228 // allocate a stack of the requisite depth 229 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 230 if (!rs.is_reserved()) { 231 warning("ConcurrentMark MarkStack allocation failure"); 232 return false; 233 } 234 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 235 if (!_virtual_space.initialize(rs, rs.size())) { 236 warning("ConcurrentMark MarkStack backing store failure"); 237 // Release the virtual memory reserved for the marking stack 238 rs.release(); 239 return false; 240 } 241 assert(_virtual_space.committed_size() == rs.size(), 242 "Didn't reserve backing store for all of ConcurrentMark stack?"); 243 _base = (oop*) _virtual_space.low(); 244 setEmpty(); 245 _capacity = (jint) capacity; 246 _saved_index = -1; 247 _should_expand = false; 248 NOT_PRODUCT(_max_depth = 0); 249 return true; 250 } 251 252 void CMMarkStack::expand() { 253 // Called, during remark, if we've overflown the marking stack during marking. 254 assert(isEmpty(), "stack should been emptied while handling overflow"); 255 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 256 // Clear expansion flag 257 _should_expand = false; 258 if (_capacity == (jint) MarkStackSizeMax) { 259 if (PrintGCDetails && Verbose) { 260 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 261 } 262 return; 263 } 264 // Double capacity if possible 265 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 266 // Do not give up existing stack until we have managed to 267 // get the double capacity that we desired. 268 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 269 sizeof(oop))); 270 if (rs.is_reserved()) { 271 // Release the backing store associated with old stack 272 _virtual_space.release(); 273 // Reinitialize virtual space for new stack 274 if (!_virtual_space.initialize(rs, rs.size())) { 275 fatal("Not enough swap for expanded marking stack capacity"); 276 } 277 _base = (oop*)(_virtual_space.low()); 278 _index = 0; 279 _capacity = new_capacity; 280 } else { 281 if (PrintGCDetails && Verbose) { 282 // Failed to double capacity, continue; 283 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 284 SIZE_FORMAT"K to " SIZE_FORMAT"K", 285 _capacity / K, new_capacity / K); 286 } 287 } 288 } 289 290 void CMMarkStack::set_should_expand() { 291 // If we're resetting the marking state because of an 292 // marking stack overflow, record that we should, if 293 // possible, expand the stack. 294 _should_expand = _cm->has_overflown(); 295 } 296 297 CMMarkStack::~CMMarkStack() { 298 if (_base != NULL) { 299 _base = NULL; 300 _virtual_space.release(); 301 } 302 } 303 304 void CMMarkStack::par_push(oop ptr) { 305 while (true) { 306 if (isFull()) { 307 _overflow = true; 308 return; 309 } 310 // Otherwise... 311 jint index = _index; 312 jint next_index = index+1; 313 jint res = Atomic::cmpxchg(next_index, &_index, index); 314 if (res == index) { 315 _base[index] = ptr; 316 // Note that we don't maintain this atomically. We could, but it 317 // doesn't seem necessary. 318 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 319 return; 320 } 321 // Otherwise, we need to try again. 322 } 323 } 324 325 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 326 while (true) { 327 if (isFull()) { 328 _overflow = true; 329 return; 330 } 331 // Otherwise... 332 jint index = _index; 333 jint next_index = index + n; 334 if (next_index > _capacity) { 335 _overflow = true; 336 return; 337 } 338 jint res = Atomic::cmpxchg(next_index, &_index, index); 339 if (res == index) { 340 for (int i = 0; i < n; i++) { 341 int ind = index + i; 342 assert(ind < _capacity, "By overflow test above."); 343 _base[ind] = ptr_arr[i]; 344 } 345 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 346 return; 347 } 348 // Otherwise, we need to try again. 349 } 350 } 351 352 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 353 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 354 jint start = _index; 355 jint next_index = start + n; 356 if (next_index > _capacity) { 357 _overflow = true; 358 return; 359 } 360 // Otherwise. 361 _index = next_index; 362 for (int i = 0; i < n; i++) { 363 int ind = start + i; 364 assert(ind < _capacity, "By overflow test above."); 365 _base[ind] = ptr_arr[i]; 366 } 367 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 368 } 369 370 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 371 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 372 jint index = _index; 373 if (index == 0) { 374 *n = 0; 375 return false; 376 } else { 377 int k = MIN2(max, index); 378 jint new_ind = index - k; 379 for (int j = 0; j < k; j++) { 380 ptr_arr[j] = _base[new_ind + j]; 381 } 382 _index = new_ind; 383 *n = k; 384 return true; 385 } 386 } 387 388 template<class OopClosureClass> 389 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 390 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 391 || SafepointSynchronize::is_at_safepoint(), 392 "Drain recursion must be yield-safe."); 393 bool res = true; 394 debug_only(_drain_in_progress = true); 395 debug_only(_drain_in_progress_yields = yield_after); 396 while (!isEmpty()) { 397 oop newOop = pop(); 398 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 399 assert(newOop->is_oop(), "Expected an oop"); 400 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 401 "only grey objects on this stack"); 402 newOop->oop_iterate(cl); 403 if (yield_after && _cm->do_yield_check()) { 404 res = false; 405 break; 406 } 407 } 408 debug_only(_drain_in_progress = false); 409 return res; 410 } 411 412 void CMMarkStack::note_start_of_gc() { 413 assert(_saved_index == -1, 414 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 415 _saved_index = _index; 416 } 417 418 void CMMarkStack::note_end_of_gc() { 419 // This is intentionally a guarantee, instead of an assert. If we 420 // accidentally add something to the mark stack during GC, it 421 // will be a correctness issue so it's better if we crash. we'll 422 // only check this once per GC anyway, so it won't be a performance 423 // issue in any way. 424 guarantee(_saved_index == _index, 425 err_msg("saved index: %d index: %d", _saved_index, _index)); 426 _saved_index = -1; 427 } 428 429 void CMMarkStack::oops_do(OopClosure* f) { 430 assert(_saved_index == _index, 431 err_msg("saved index: %d index: %d", _saved_index, _index)); 432 for (int i = 0; i < _index; i += 1) { 433 f->do_oop(&_base[i]); 434 } 435 } 436 437 bool ConcurrentMark::not_yet_marked(oop obj) const { 438 return _g1h->is_obj_ill(obj); 439 } 440 441 CMRootRegions::CMRootRegions() : 442 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 443 _should_abort(false), _next_survivor(NULL) { } 444 445 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 446 _young_list = g1h->young_list(); 447 _cm = cm; 448 } 449 450 void CMRootRegions::prepare_for_scan() { 451 assert(!scan_in_progress(), "pre-condition"); 452 453 // Currently, only survivors can be root regions. 454 assert(_next_survivor == NULL, "pre-condition"); 455 _next_survivor = _young_list->first_survivor_region(); 456 _scan_in_progress = (_next_survivor != NULL); 457 _should_abort = false; 458 } 459 460 HeapRegion* CMRootRegions::claim_next() { 461 if (_should_abort) { 462 // If someone has set the should_abort flag, we return NULL to 463 // force the caller to bail out of their loop. 464 return NULL; 465 } 466 467 // Currently, only survivors can be root regions. 468 HeapRegion* res = _next_survivor; 469 if (res != NULL) { 470 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 471 // Read it again in case it changed while we were waiting for the lock. 472 res = _next_survivor; 473 if (res != NULL) { 474 if (res == _young_list->last_survivor_region()) { 475 // We just claimed the last survivor so store NULL to indicate 476 // that we're done. 477 _next_survivor = NULL; 478 } else { 479 _next_survivor = res->get_next_young_region(); 480 } 481 } else { 482 // Someone else claimed the last survivor while we were trying 483 // to take the lock so nothing else to do. 484 } 485 } 486 assert(res == NULL || res->is_survivor(), "post-condition"); 487 488 return res; 489 } 490 491 void CMRootRegions::scan_finished() { 492 assert(scan_in_progress(), "pre-condition"); 493 494 // Currently, only survivors can be root regions. 495 if (!_should_abort) { 496 assert(_next_survivor == NULL, "we should have claimed all survivors"); 497 } 498 _next_survivor = NULL; 499 500 { 501 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 502 _scan_in_progress = false; 503 RootRegionScan_lock->notify_all(); 504 } 505 } 506 507 bool CMRootRegions::wait_until_scan_finished() { 508 if (!scan_in_progress()) return false; 509 510 { 511 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 512 while (scan_in_progress()) { 513 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 514 } 515 } 516 return true; 517 } 518 519 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 520 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 521 #endif // _MSC_VER 522 523 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 524 return MAX2((n_par_threads + 2) / 4, 1U); 525 } 526 527 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 528 _g1h(g1h), 529 _markBitMap1(), 530 _markBitMap2(), 531 _parallel_marking_threads(0), 532 _max_parallel_marking_threads(0), 533 _sleep_factor(0.0), 534 _marking_task_overhead(1.0), 535 _cleanup_sleep_factor(0.0), 536 _cleanup_task_overhead(1.0), 537 _cleanup_list("Cleanup List"), 538 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 539 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> 540 CardTableModRefBS::card_shift, 541 false /* in_resource_area*/), 542 543 _prevMarkBitMap(&_markBitMap1), 544 _nextMarkBitMap(&_markBitMap2), 545 546 _markStack(this), 547 // _finger set in set_non_marking_state 548 549 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)), 550 // _active_tasks set in set_non_marking_state 551 // _tasks set inside the constructor 552 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 553 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 554 555 _has_overflown(false), 556 _concurrent(false), 557 _has_aborted(false), 558 _aborted_gc_id(GCId::undefined()), 559 _restart_for_overflow(false), 560 _concurrent_marking_in_progress(false), 561 562 // _verbose_level set below 563 564 _init_times(), 565 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 566 _cleanup_times(), 567 _total_counting_time(0.0), 568 _total_rs_scrub_time(0.0), 569 570 _parallel_workers(NULL), 571 572 _count_card_bitmaps(NULL), 573 _count_marked_bytes(NULL), 574 _completed_initialization(false) { 575 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 576 if (verbose_level < no_verbose) { 577 verbose_level = no_verbose; 578 } 579 if (verbose_level > high_verbose) { 580 verbose_level = high_verbose; 581 } 582 _verbose_level = verbose_level; 583 584 if (verbose_low()) { 585 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 586 "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end)); 587 } 588 589 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 590 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 591 592 // Create & start a ConcurrentMark thread. 593 _cmThread = new ConcurrentMarkThread(this); 594 assert(cmThread() != NULL, "CM Thread should have been created"); 595 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 596 if (_cmThread->osthread() == NULL) { 597 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 598 } 599 600 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 601 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 602 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 603 604 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 605 satb_qs.set_buffer_size(G1SATBBufferSize); 606 607 _root_regions.init(_g1h, this); 608 609 if (ConcGCThreads > ParallelGCThreads) { 610 warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") " 611 "than ParallelGCThreads (" UINTX_FORMAT ").", 612 ConcGCThreads, ParallelGCThreads); 613 return; 614 } 615 if (ParallelGCThreads == 0) { 616 // if we are not running with any parallel GC threads we will not 617 // spawn any marking threads either 618 _parallel_marking_threads = 0; 619 _max_parallel_marking_threads = 0; 620 _sleep_factor = 0.0; 621 _marking_task_overhead = 1.0; 622 } else { 623 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 624 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 625 // if both are set 626 _sleep_factor = 0.0; 627 _marking_task_overhead = 1.0; 628 } else if (G1MarkingOverheadPercent > 0) { 629 // We will calculate the number of parallel marking threads based 630 // on a target overhead with respect to the soft real-time goal 631 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 632 double overall_cm_overhead = 633 (double) MaxGCPauseMillis * marking_overhead / 634 (double) GCPauseIntervalMillis; 635 double cpu_ratio = 1.0 / (double) os::processor_count(); 636 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 637 double marking_task_overhead = 638 overall_cm_overhead / marking_thread_num * 639 (double) os::processor_count(); 640 double sleep_factor = 641 (1.0 - marking_task_overhead) / marking_task_overhead; 642 643 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num); 644 _sleep_factor = sleep_factor; 645 _marking_task_overhead = marking_task_overhead; 646 } else { 647 // Calculate the number of parallel marking threads by scaling 648 // the number of parallel GC threads. 649 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads); 650 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num); 651 _sleep_factor = 0.0; 652 _marking_task_overhead = 1.0; 653 } 654 655 assert(ConcGCThreads > 0, "Should have been set"); 656 _parallel_marking_threads = (uint) ConcGCThreads; 657 _max_parallel_marking_threads = _parallel_marking_threads; 658 659 if (parallel_marking_threads() > 1) { 660 _cleanup_task_overhead = 1.0; 661 } else { 662 _cleanup_task_overhead = marking_task_overhead(); 663 } 664 _cleanup_sleep_factor = 665 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 666 667 #if 0 668 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 669 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 670 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 671 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 672 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 673 #endif 674 675 guarantee(parallel_marking_threads() > 0, "peace of mind"); 676 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 677 _max_parallel_marking_threads, false, true); 678 if (_parallel_workers == NULL) { 679 vm_exit_during_initialization("Failed necessary allocation."); 680 } else { 681 _parallel_workers->initialize_workers(); 682 } 683 } 684 685 if (FLAG_IS_DEFAULT(MarkStackSize)) { 686 uintx mark_stack_size = 687 MIN2(MarkStackSizeMax, 688 MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE))); 689 // Verify that the calculated value for MarkStackSize is in range. 690 // It would be nice to use the private utility routine from Arguments. 691 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 692 warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): " 693 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 694 mark_stack_size, (uintx) 1, MarkStackSizeMax); 695 return; 696 } 697 FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size); 698 } else { 699 // Verify MarkStackSize is in range. 700 if (FLAG_IS_CMDLINE(MarkStackSize)) { 701 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 702 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 703 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): " 704 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 705 MarkStackSize, (uintx) 1, MarkStackSizeMax); 706 return; 707 } 708 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 709 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 710 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")" 711 " or for MarkStackSizeMax (" UINTX_FORMAT ")", 712 MarkStackSize, MarkStackSizeMax); 713 return; 714 } 715 } 716 } 717 } 718 719 if (!_markStack.allocate(MarkStackSize)) { 720 warning("Failed to allocate CM marking stack"); 721 return; 722 } 723 724 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 725 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 726 727 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 728 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 729 730 BitMap::idx_t card_bm_size = _card_bm.size(); 731 732 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 733 _active_tasks = _max_worker_id; 734 735 size_t max_regions = (size_t) _g1h->max_regions(); 736 for (uint i = 0; i < _max_worker_id; ++i) { 737 CMTaskQueue* task_queue = new CMTaskQueue(); 738 task_queue->initialize(); 739 _task_queues->register_queue(i, task_queue); 740 741 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 742 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 743 744 _tasks[i] = new CMTask(i, this, 745 _count_marked_bytes[i], 746 &_count_card_bitmaps[i], 747 task_queue, _task_queues); 748 749 _accum_task_vtime[i] = 0.0; 750 } 751 752 // Calculate the card number for the bottom of the heap. Used 753 // in biasing indexes into the accounting card bitmaps. 754 _heap_bottom_card_num = 755 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 756 CardTableModRefBS::card_shift); 757 758 // Clear all the liveness counting data 759 clear_all_count_data(); 760 761 // so that the call below can read a sensible value 762 _heap_start = g1h->reserved_region().start(); 763 set_non_marking_state(); 764 _completed_initialization = true; 765 } 766 767 void ConcurrentMark::reset() { 768 // Starting values for these two. This should be called in a STW 769 // phase. 770 MemRegion reserved = _g1h->g1_reserved(); 771 _heap_start = reserved.start(); 772 _heap_end = reserved.end(); 773 774 // Separated the asserts so that we know which one fires. 775 assert(_heap_start != NULL, "heap bounds should look ok"); 776 assert(_heap_end != NULL, "heap bounds should look ok"); 777 assert(_heap_start < _heap_end, "heap bounds should look ok"); 778 779 // Reset all the marking data structures and any necessary flags 780 reset_marking_state(); 781 782 if (verbose_low()) { 783 gclog_or_tty->print_cr("[global] resetting"); 784 } 785 786 // We do reset all of them, since different phases will use 787 // different number of active threads. So, it's easiest to have all 788 // of them ready. 789 for (uint i = 0; i < _max_worker_id; ++i) { 790 _tasks[i]->reset(_nextMarkBitMap); 791 } 792 793 // we need this to make sure that the flag is on during the evac 794 // pause with initial mark piggy-backed 795 set_concurrent_marking_in_progress(); 796 } 797 798 799 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 800 _markStack.set_should_expand(); 801 _markStack.setEmpty(); // Also clears the _markStack overflow flag 802 if (clear_overflow) { 803 clear_has_overflown(); 804 } else { 805 assert(has_overflown(), "pre-condition"); 806 } 807 _finger = _heap_start; 808 809 for (uint i = 0; i < _max_worker_id; ++i) { 810 CMTaskQueue* queue = _task_queues->queue(i); 811 queue->set_empty(); 812 } 813 } 814 815 void ConcurrentMark::set_concurrency(uint active_tasks) { 816 assert(active_tasks <= _max_worker_id, "we should not have more"); 817 818 _active_tasks = active_tasks; 819 // Need to update the three data structures below according to the 820 // number of active threads for this phase. 821 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 822 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 823 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 824 } 825 826 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 827 set_concurrency(active_tasks); 828 829 _concurrent = concurrent; 830 // We propagate this to all tasks, not just the active ones. 831 for (uint i = 0; i < _max_worker_id; ++i) 832 _tasks[i]->set_concurrent(concurrent); 833 834 if (concurrent) { 835 set_concurrent_marking_in_progress(); 836 } else { 837 // We currently assume that the concurrent flag has been set to 838 // false before we start remark. At this point we should also be 839 // in a STW phase. 840 assert(!concurrent_marking_in_progress(), "invariant"); 841 assert(out_of_regions(), 842 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT, 843 p2i(_finger), p2i(_heap_end))); 844 } 845 } 846 847 void ConcurrentMark::set_non_marking_state() { 848 // We set the global marking state to some default values when we're 849 // not doing marking. 850 reset_marking_state(); 851 _active_tasks = 0; 852 clear_concurrent_marking_in_progress(); 853 } 854 855 ConcurrentMark::~ConcurrentMark() { 856 // The ConcurrentMark instance is never freed. 857 ShouldNotReachHere(); 858 } 859 860 void ConcurrentMark::clearNextBitmap() { 861 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 862 863 // Make sure that the concurrent mark thread looks to still be in 864 // the current cycle. 865 guarantee(cmThread()->during_cycle(), "invariant"); 866 867 // We are finishing up the current cycle by clearing the next 868 // marking bitmap and getting it ready for the next cycle. During 869 // this time no other cycle can start. So, let's make sure that this 870 // is the case. 871 guarantee(!g1h->mark_in_progress(), "invariant"); 872 873 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */); 874 g1h->heap_region_iterate(&cl); 875 876 // Clear the liveness counting data. If the marking has been aborted, the abort() 877 // call already did that. 878 if (cl.complete()) { 879 clear_all_count_data(); 880 } 881 882 // Repeat the asserts from above. 883 guarantee(cmThread()->during_cycle(), "invariant"); 884 guarantee(!g1h->mark_in_progress(), "invariant"); 885 } 886 887 class CheckBitmapClearHRClosure : public HeapRegionClosure { 888 CMBitMap* _bitmap; 889 bool _error; 890 public: 891 CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) { 892 } 893 894 virtual bool doHeapRegion(HeapRegion* r) { 895 return _bitmap->getNextMarkedWordAddress(r->bottom(), r->end()) != r->end(); 896 } 897 }; 898 899 bool ConcurrentMark::nextMarkBitmapIsClear() { 900 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 901 _g1h->heap_region_iterate(&cl); 902 return cl.complete(); 903 } 904 905 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 906 public: 907 bool doHeapRegion(HeapRegion* r) { 908 if (!r->continuesHumongous()) { 909 r->note_start_of_marking(); 910 } 911 return false; 912 } 913 }; 914 915 void ConcurrentMark::checkpointRootsInitialPre() { 916 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 917 G1CollectorPolicy* g1p = g1h->g1_policy(); 918 919 _has_aborted = false; 920 921 #ifndef PRODUCT 922 if (G1PrintReachableAtInitialMark) { 923 print_reachable("at-cycle-start", 924 VerifyOption_G1UsePrevMarking, true /* all */); 925 } 926 #endif 927 928 // Initialize marking structures. This has to be done in a STW phase. 929 reset(); 930 931 // For each region note start of marking. 932 NoteStartOfMarkHRClosure startcl; 933 g1h->heap_region_iterate(&startcl); 934 } 935 936 937 void ConcurrentMark::checkpointRootsInitialPost() { 938 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 939 940 // If we force an overflow during remark, the remark operation will 941 // actually abort and we'll restart concurrent marking. If we always 942 // force an overflow during remark we'll never actually complete the 943 // marking phase. So, we initialize this here, at the start of the 944 // cycle, so that at the remaining overflow number will decrease at 945 // every remark and we'll eventually not need to cause one. 946 force_overflow_stw()->init(); 947 948 // Start Concurrent Marking weak-reference discovery. 949 ReferenceProcessor* rp = g1h->ref_processor_cm(); 950 // enable ("weak") refs discovery 951 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 952 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 953 954 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 955 // This is the start of the marking cycle, we're expected all 956 // threads to have SATB queues with active set to false. 957 satb_mq_set.set_active_all_threads(true, /* new active value */ 958 false /* expected_active */); 959 960 _root_regions.prepare_for_scan(); 961 962 // update_g1_committed() will be called at the end of an evac pause 963 // when marking is on. So, it's also called at the end of the 964 // initial-mark pause to update the heap end, if the heap expands 965 // during it. No need to call it here. 966 } 967 968 /* 969 * Notice that in the next two methods, we actually leave the STS 970 * during the barrier sync and join it immediately afterwards. If we 971 * do not do this, the following deadlock can occur: one thread could 972 * be in the barrier sync code, waiting for the other thread to also 973 * sync up, whereas another one could be trying to yield, while also 974 * waiting for the other threads to sync up too. 975 * 976 * Note, however, that this code is also used during remark and in 977 * this case we should not attempt to leave / enter the STS, otherwise 978 * we'll either hit an assert (debug / fastdebug) or deadlock 979 * (product). So we should only leave / enter the STS if we are 980 * operating concurrently. 981 * 982 * Because the thread that does the sync barrier has left the STS, it 983 * is possible to be suspended for a Full GC or an evacuation pause 984 * could occur. This is actually safe, since the entering the sync 985 * barrier is one of the last things do_marking_step() does, and it 986 * doesn't manipulate any data structures afterwards. 987 */ 988 989 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 990 if (verbose_low()) { 991 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id); 992 } 993 994 if (concurrent()) { 995 SuspendibleThreadSet::leave(); 996 } 997 998 bool barrier_aborted = !_first_overflow_barrier_sync.enter(); 999 1000 if (concurrent()) { 1001 SuspendibleThreadSet::join(); 1002 } 1003 // at this point everyone should have synced up and not be doing any 1004 // more work 1005 1006 if (verbose_low()) { 1007 if (barrier_aborted) { 1008 gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id); 1009 } else { 1010 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); 1011 } 1012 } 1013 1014 if (barrier_aborted) { 1015 // If the barrier aborted we ignore the overflow condition and 1016 // just abort the whole marking phase as quickly as possible. 1017 return; 1018 } 1019 1020 // If we're executing the concurrent phase of marking, reset the marking 1021 // state; otherwise the marking state is reset after reference processing, 1022 // during the remark pause. 1023 // If we reset here as a result of an overflow during the remark we will 1024 // see assertion failures from any subsequent set_concurrency_and_phase() 1025 // calls. 1026 if (concurrent()) { 1027 // let the task associated with with worker 0 do this 1028 if (worker_id == 0) { 1029 // task 0 is responsible for clearing the global data structures 1030 // We should be here because of an overflow. During STW we should 1031 // not clear the overflow flag since we rely on it being true when 1032 // we exit this method to abort the pause and restart concurrent 1033 // marking. 1034 reset_marking_state(true /* clear_overflow */); 1035 force_overflow()->update(); 1036 1037 if (G1Log::fine()) { 1038 gclog_or_tty->gclog_stamp(concurrent_gc_id()); 1039 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 1040 } 1041 } 1042 } 1043 1044 // after this, each task should reset its own data structures then 1045 // then go into the second barrier 1046 } 1047 1048 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 1049 if (verbose_low()) { 1050 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id); 1051 } 1052 1053 if (concurrent()) { 1054 SuspendibleThreadSet::leave(); 1055 } 1056 1057 bool barrier_aborted = !_second_overflow_barrier_sync.enter(); 1058 1059 if (concurrent()) { 1060 SuspendibleThreadSet::join(); 1061 } 1062 // at this point everything should be re-initialized and ready to go 1063 1064 if (verbose_low()) { 1065 if (barrier_aborted) { 1066 gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id); 1067 } else { 1068 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); 1069 } 1070 } 1071 } 1072 1073 #ifndef PRODUCT 1074 void ForceOverflowSettings::init() { 1075 _num_remaining = G1ConcMarkForceOverflow; 1076 _force = false; 1077 update(); 1078 } 1079 1080 void ForceOverflowSettings::update() { 1081 if (_num_remaining > 0) { 1082 _num_remaining -= 1; 1083 _force = true; 1084 } else { 1085 _force = false; 1086 } 1087 } 1088 1089 bool ForceOverflowSettings::should_force() { 1090 if (_force) { 1091 _force = false; 1092 return true; 1093 } else { 1094 return false; 1095 } 1096 } 1097 #endif // !PRODUCT 1098 1099 class CMConcurrentMarkingTask: public AbstractGangTask { 1100 private: 1101 ConcurrentMark* _cm; 1102 ConcurrentMarkThread* _cmt; 1103 1104 public: 1105 void work(uint worker_id) { 1106 assert(Thread::current()->is_ConcurrentGC_thread(), 1107 "this should only be done by a conc GC thread"); 1108 ResourceMark rm; 1109 1110 double start_vtime = os::elapsedVTime(); 1111 1112 SuspendibleThreadSet::join(); 1113 1114 assert(worker_id < _cm->active_tasks(), "invariant"); 1115 CMTask* the_task = _cm->task(worker_id); 1116 the_task->record_start_time(); 1117 if (!_cm->has_aborted()) { 1118 do { 1119 double start_vtime_sec = os::elapsedVTime(); 1120 double start_time_sec = os::elapsedTime(); 1121 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1122 1123 the_task->do_marking_step(mark_step_duration_ms, 1124 true /* do_termination */, 1125 false /* is_serial*/); 1126 1127 double end_time_sec = os::elapsedTime(); 1128 double end_vtime_sec = os::elapsedVTime(); 1129 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1130 double elapsed_time_sec = end_time_sec - start_time_sec; 1131 _cm->clear_has_overflown(); 1132 1133 bool ret = _cm->do_yield_check(worker_id); 1134 1135 jlong sleep_time_ms; 1136 if (!_cm->has_aborted() && the_task->has_aborted()) { 1137 sleep_time_ms = 1138 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1139 SuspendibleThreadSet::leave(); 1140 os::sleep(Thread::current(), sleep_time_ms, false); 1141 SuspendibleThreadSet::join(); 1142 } 1143 double end_time2_sec = os::elapsedTime(); 1144 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1145 1146 #if 0 1147 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1148 "overhead %1.4lf", 1149 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1150 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1151 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1152 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1153 #endif 1154 } while (!_cm->has_aborted() && the_task->has_aborted()); 1155 } 1156 the_task->record_end_time(); 1157 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1158 1159 SuspendibleThreadSet::leave(); 1160 1161 double end_vtime = os::elapsedVTime(); 1162 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1163 } 1164 1165 CMConcurrentMarkingTask(ConcurrentMark* cm, 1166 ConcurrentMarkThread* cmt) : 1167 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1168 1169 ~CMConcurrentMarkingTask() { } 1170 }; 1171 1172 // Calculates the number of active workers for a concurrent 1173 // phase. 1174 uint ConcurrentMark::calc_parallel_marking_threads() { 1175 if (G1CollectedHeap::use_parallel_gc_threads()) { 1176 uint n_conc_workers = 0; 1177 if (!UseDynamicNumberOfGCThreads || 1178 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1179 !ForceDynamicNumberOfGCThreads)) { 1180 n_conc_workers = max_parallel_marking_threads(); 1181 } else { 1182 n_conc_workers = 1183 AdaptiveSizePolicy::calc_default_active_workers( 1184 max_parallel_marking_threads(), 1185 1, /* Minimum workers */ 1186 parallel_marking_threads(), 1187 Threads::number_of_non_daemon_threads()); 1188 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1189 // that scaling has already gone into "_max_parallel_marking_threads". 1190 } 1191 assert(n_conc_workers > 0, "Always need at least 1"); 1192 return n_conc_workers; 1193 } 1194 // If we are not running with any parallel GC threads we will not 1195 // have spawned any marking threads either. Hence the number of 1196 // concurrent workers should be 0. 1197 return 0; 1198 } 1199 1200 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1201 // Currently, only survivors can be root regions. 1202 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1203 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1204 1205 const uintx interval = PrefetchScanIntervalInBytes; 1206 HeapWord* curr = hr->bottom(); 1207 const HeapWord* end = hr->top(); 1208 while (curr < end) { 1209 Prefetch::read(curr, interval); 1210 oop obj = oop(curr); 1211 int size = obj->oop_iterate(&cl); 1212 assert(size == obj->size(), "sanity"); 1213 curr += size; 1214 } 1215 } 1216 1217 class CMRootRegionScanTask : public AbstractGangTask { 1218 private: 1219 ConcurrentMark* _cm; 1220 1221 public: 1222 CMRootRegionScanTask(ConcurrentMark* cm) : 1223 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1224 1225 void work(uint worker_id) { 1226 assert(Thread::current()->is_ConcurrentGC_thread(), 1227 "this should only be done by a conc GC thread"); 1228 1229 CMRootRegions* root_regions = _cm->root_regions(); 1230 HeapRegion* hr = root_regions->claim_next(); 1231 while (hr != NULL) { 1232 _cm->scanRootRegion(hr, worker_id); 1233 hr = root_regions->claim_next(); 1234 } 1235 } 1236 }; 1237 1238 void ConcurrentMark::scanRootRegions() { 1239 // Start of concurrent marking. 1240 ClassLoaderDataGraph::clear_claimed_marks(); 1241 1242 // scan_in_progress() will have been set to true only if there was 1243 // at least one root region to scan. So, if it's false, we 1244 // should not attempt to do any further work. 1245 if (root_regions()->scan_in_progress()) { 1246 _parallel_marking_threads = calc_parallel_marking_threads(); 1247 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1248 "Maximum number of marking threads exceeded"); 1249 uint active_workers = MAX2(1U, parallel_marking_threads()); 1250 1251 CMRootRegionScanTask task(this); 1252 if (use_parallel_marking_threads()) { 1253 _parallel_workers->set_active_workers((int) active_workers); 1254 _parallel_workers->run_task(&task); 1255 } else { 1256 task.work(0); 1257 } 1258 1259 // It's possible that has_aborted() is true here without actually 1260 // aborting the survivor scan earlier. This is OK as it's 1261 // mainly used for sanity checking. 1262 root_regions()->scan_finished(); 1263 } 1264 } 1265 1266 void ConcurrentMark::markFromRoots() { 1267 // we might be tempted to assert that: 1268 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1269 // "inconsistent argument?"); 1270 // However that wouldn't be right, because it's possible that 1271 // a safepoint is indeed in progress as a younger generation 1272 // stop-the-world GC happens even as we mark in this generation. 1273 1274 _restart_for_overflow = false; 1275 force_overflow_conc()->init(); 1276 1277 // _g1h has _n_par_threads 1278 _parallel_marking_threads = calc_parallel_marking_threads(); 1279 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1280 "Maximum number of marking threads exceeded"); 1281 1282 uint active_workers = MAX2(1U, parallel_marking_threads()); 1283 1284 // Parallel task terminator is set in "set_concurrency_and_phase()" 1285 set_concurrency_and_phase(active_workers, true /* concurrent */); 1286 1287 CMConcurrentMarkingTask markingTask(this, cmThread()); 1288 if (use_parallel_marking_threads()) { 1289 _parallel_workers->set_active_workers((int)active_workers); 1290 // Don't set _n_par_threads because it affects MT in process_roots() 1291 // and the decisions on that MT processing is made elsewhere. 1292 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1293 _parallel_workers->run_task(&markingTask); 1294 } else { 1295 markingTask.work(0); 1296 } 1297 print_stats(); 1298 } 1299 1300 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1301 // world is stopped at this checkpoint 1302 assert(SafepointSynchronize::is_at_safepoint(), 1303 "world should be stopped"); 1304 1305 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1306 1307 // If a full collection has happened, we shouldn't do this. 1308 if (has_aborted()) { 1309 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1310 return; 1311 } 1312 1313 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1314 1315 if (VerifyDuringGC) { 1316 HandleMark hm; // handle scope 1317 Universe::heap()->prepare_for_verify(); 1318 Universe::verify(VerifyOption_G1UsePrevMarking, 1319 " VerifyDuringGC:(before)"); 1320 } 1321 g1h->check_bitmaps("Remark Start"); 1322 1323 G1CollectorPolicy* g1p = g1h->g1_policy(); 1324 g1p->record_concurrent_mark_remark_start(); 1325 1326 double start = os::elapsedTime(); 1327 1328 checkpointRootsFinalWork(); 1329 1330 double mark_work_end = os::elapsedTime(); 1331 1332 weakRefsWork(clear_all_soft_refs); 1333 1334 if (has_overflown()) { 1335 // Oops. We overflowed. Restart concurrent marking. 1336 _restart_for_overflow = true; 1337 if (G1TraceMarkStackOverflow) { 1338 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1339 } 1340 1341 // Verify the heap w.r.t. the previous marking bitmap. 1342 if (VerifyDuringGC) { 1343 HandleMark hm; // handle scope 1344 Universe::heap()->prepare_for_verify(); 1345 Universe::verify(VerifyOption_G1UsePrevMarking, 1346 " VerifyDuringGC:(overflow)"); 1347 } 1348 1349 // Clear the marking state because we will be restarting 1350 // marking due to overflowing the global mark stack. 1351 reset_marking_state(); 1352 } else { 1353 // Aggregate the per-task counting data that we have accumulated 1354 // while marking. 1355 aggregate_count_data(); 1356 1357 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1358 // We're done with marking. 1359 // This is the end of the marking cycle, we're expected all 1360 // threads to have SATB queues with active set to true. 1361 satb_mq_set.set_active_all_threads(false, /* new active value */ 1362 true /* expected_active */); 1363 1364 if (VerifyDuringGC) { 1365 HandleMark hm; // handle scope 1366 Universe::heap()->prepare_for_verify(); 1367 Universe::verify(VerifyOption_G1UseNextMarking, 1368 " VerifyDuringGC:(after)"); 1369 } 1370 g1h->check_bitmaps("Remark End"); 1371 assert(!restart_for_overflow(), "sanity"); 1372 // Completely reset the marking state since marking completed 1373 set_non_marking_state(); 1374 } 1375 1376 // Expand the marking stack, if we have to and if we can. 1377 if (_markStack.should_expand()) { 1378 _markStack.expand(); 1379 } 1380 1381 // Statistics 1382 double now = os::elapsedTime(); 1383 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1384 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1385 _remark_times.add((now - start) * 1000.0); 1386 1387 g1p->record_concurrent_mark_remark_end(); 1388 1389 G1CMIsAliveClosure is_alive(g1h); 1390 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1391 } 1392 1393 // Base class of the closures that finalize and verify the 1394 // liveness counting data. 1395 class CMCountDataClosureBase: public HeapRegionClosure { 1396 protected: 1397 G1CollectedHeap* _g1h; 1398 ConcurrentMark* _cm; 1399 CardTableModRefBS* _ct_bs; 1400 1401 BitMap* _region_bm; 1402 BitMap* _card_bm; 1403 1404 // Takes a region that's not empty (i.e., it has at least one 1405 // live object in it and sets its corresponding bit on the region 1406 // bitmap to 1. If the region is "starts humongous" it will also set 1407 // to 1 the bits on the region bitmap that correspond to its 1408 // associated "continues humongous" regions. 1409 void set_bit_for_region(HeapRegion* hr) { 1410 assert(!hr->continuesHumongous(), "should have filtered those out"); 1411 1412 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1413 if (!hr->startsHumongous()) { 1414 // Normal (non-humongous) case: just set the bit. 1415 _region_bm->par_at_put(index, true); 1416 } else { 1417 // Starts humongous case: calculate how many regions are part of 1418 // this humongous region and then set the bit range. 1419 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1420 _region_bm->par_at_put_range(index, end_index, true); 1421 } 1422 } 1423 1424 public: 1425 CMCountDataClosureBase(G1CollectedHeap* g1h, 1426 BitMap* region_bm, BitMap* card_bm): 1427 _g1h(g1h), _cm(g1h->concurrent_mark()), 1428 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 1429 _region_bm(region_bm), _card_bm(card_bm) { } 1430 }; 1431 1432 // Closure that calculates the # live objects per region. Used 1433 // for verification purposes during the cleanup pause. 1434 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1435 CMBitMapRO* _bm; 1436 size_t _region_marked_bytes; 1437 1438 public: 1439 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1440 BitMap* region_bm, BitMap* card_bm) : 1441 CMCountDataClosureBase(g1h, region_bm, card_bm), 1442 _bm(bm), _region_marked_bytes(0) { } 1443 1444 bool doHeapRegion(HeapRegion* hr) { 1445 1446 if (hr->continuesHumongous()) { 1447 // We will ignore these here and process them when their 1448 // associated "starts humongous" region is processed (see 1449 // set_bit_for_heap_region()). Note that we cannot rely on their 1450 // associated "starts humongous" region to have their bit set to 1451 // 1 since, due to the region chunking in the parallel region 1452 // iteration, a "continues humongous" region might be visited 1453 // before its associated "starts humongous". 1454 return false; 1455 } 1456 1457 HeapWord* ntams = hr->next_top_at_mark_start(); 1458 HeapWord* start = hr->bottom(); 1459 1460 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1461 err_msg("Preconditions not met - " 1462 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT, 1463 p2i(start), p2i(ntams), p2i(hr->end()))); 1464 1465 // Find the first marked object at or after "start". 1466 start = _bm->getNextMarkedWordAddress(start, ntams); 1467 1468 size_t marked_bytes = 0; 1469 1470 while (start < ntams) { 1471 oop obj = oop(start); 1472 int obj_sz = obj->size(); 1473 HeapWord* obj_end = start + obj_sz; 1474 1475 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1476 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1477 1478 // Note: if we're looking at the last region in heap - obj_end 1479 // could be actually just beyond the end of the heap; end_idx 1480 // will then correspond to a (non-existent) card that is also 1481 // just beyond the heap. 1482 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1483 // end of object is not card aligned - increment to cover 1484 // all the cards spanned by the object 1485 end_idx += 1; 1486 } 1487 1488 // Set the bits in the card BM for the cards spanned by this object. 1489 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1490 1491 // Add the size of this object to the number of marked bytes. 1492 marked_bytes += (size_t)obj_sz * HeapWordSize; 1493 1494 // Find the next marked object after this one. 1495 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1496 } 1497 1498 // Mark the allocated-since-marking portion... 1499 HeapWord* top = hr->top(); 1500 if (ntams < top) { 1501 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1502 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1503 1504 // Note: if we're looking at the last region in heap - top 1505 // could be actually just beyond the end of the heap; end_idx 1506 // will then correspond to a (non-existent) card that is also 1507 // just beyond the heap. 1508 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1509 // end of object is not card aligned - increment to cover 1510 // all the cards spanned by the object 1511 end_idx += 1; 1512 } 1513 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1514 1515 // This definitely means the region has live objects. 1516 set_bit_for_region(hr); 1517 } 1518 1519 // Update the live region bitmap. 1520 if (marked_bytes > 0) { 1521 set_bit_for_region(hr); 1522 } 1523 1524 // Set the marked bytes for the current region so that 1525 // it can be queried by a calling verification routine 1526 _region_marked_bytes = marked_bytes; 1527 1528 return false; 1529 } 1530 1531 size_t region_marked_bytes() const { return _region_marked_bytes; } 1532 }; 1533 1534 // Heap region closure used for verifying the counting data 1535 // that was accumulated concurrently and aggregated during 1536 // the remark pause. This closure is applied to the heap 1537 // regions during the STW cleanup pause. 1538 1539 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1540 G1CollectedHeap* _g1h; 1541 ConcurrentMark* _cm; 1542 CalcLiveObjectsClosure _calc_cl; 1543 BitMap* _region_bm; // Region BM to be verified 1544 BitMap* _card_bm; // Card BM to be verified 1545 bool _verbose; // verbose output? 1546 1547 BitMap* _exp_region_bm; // Expected Region BM values 1548 BitMap* _exp_card_bm; // Expected card BM values 1549 1550 int _failures; 1551 1552 public: 1553 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1554 BitMap* region_bm, 1555 BitMap* card_bm, 1556 BitMap* exp_region_bm, 1557 BitMap* exp_card_bm, 1558 bool verbose) : 1559 _g1h(g1h), _cm(g1h->concurrent_mark()), 1560 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1561 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1562 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1563 _failures(0) { } 1564 1565 int failures() const { return _failures; } 1566 1567 bool doHeapRegion(HeapRegion* hr) { 1568 if (hr->continuesHumongous()) { 1569 // We will ignore these here and process them when their 1570 // associated "starts humongous" region is processed (see 1571 // set_bit_for_heap_region()). Note that we cannot rely on their 1572 // associated "starts humongous" region to have their bit set to 1573 // 1 since, due to the region chunking in the parallel region 1574 // iteration, a "continues humongous" region might be visited 1575 // before its associated "starts humongous". 1576 return false; 1577 } 1578 1579 int failures = 0; 1580 1581 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1582 // this region and set the corresponding bits in the expected region 1583 // and card bitmaps. 1584 bool res = _calc_cl.doHeapRegion(hr); 1585 assert(res == false, "should be continuing"); 1586 1587 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1588 Mutex::_no_safepoint_check_flag); 1589 1590 // Verify the marked bytes for this region. 1591 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1592 size_t act_marked_bytes = hr->next_marked_bytes(); 1593 1594 // We're not OK if expected marked bytes > actual marked bytes. It means 1595 // we have missed accounting some objects during the actual marking. 1596 if (exp_marked_bytes > act_marked_bytes) { 1597 if (_verbose) { 1598 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1599 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1600 hr->hrm_index(), exp_marked_bytes, act_marked_bytes); 1601 } 1602 failures += 1; 1603 } 1604 1605 // Verify the bit, for this region, in the actual and expected 1606 // (which was just calculated) region bit maps. 1607 // We're not OK if the bit in the calculated expected region 1608 // bitmap is set and the bit in the actual region bitmap is not. 1609 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1610 1611 bool expected = _exp_region_bm->at(index); 1612 bool actual = _region_bm->at(index); 1613 if (expected && !actual) { 1614 if (_verbose) { 1615 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1616 "expected: %s, actual: %s", 1617 hr->hrm_index(), 1618 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1619 } 1620 failures += 1; 1621 } 1622 1623 // Verify that the card bit maps for the cards spanned by the current 1624 // region match. We have an error if we have a set bit in the expected 1625 // bit map and the corresponding bit in the actual bitmap is not set. 1626 1627 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1628 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1629 1630 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1631 expected = _exp_card_bm->at(i); 1632 actual = _card_bm->at(i); 1633 1634 if (expected && !actual) { 1635 if (_verbose) { 1636 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1637 "expected: %s, actual: %s", 1638 hr->hrm_index(), i, 1639 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1640 } 1641 failures += 1; 1642 } 1643 } 1644 1645 if (failures > 0 && _verbose) { 1646 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1647 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1648 HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()), 1649 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1650 } 1651 1652 _failures += failures; 1653 1654 // We could stop iteration over the heap when we 1655 // find the first violating region by returning true. 1656 return false; 1657 } 1658 }; 1659 1660 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1661 protected: 1662 G1CollectedHeap* _g1h; 1663 ConcurrentMark* _cm; 1664 BitMap* _actual_region_bm; 1665 BitMap* _actual_card_bm; 1666 1667 uint _n_workers; 1668 1669 BitMap* _expected_region_bm; 1670 BitMap* _expected_card_bm; 1671 1672 int _failures; 1673 bool _verbose; 1674 1675 public: 1676 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1677 BitMap* region_bm, BitMap* card_bm, 1678 BitMap* expected_region_bm, BitMap* expected_card_bm) 1679 : AbstractGangTask("G1 verify final counting"), 1680 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1681 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1682 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1683 _failures(0), _verbose(false), 1684 _n_workers(0) { 1685 assert(VerifyDuringGC, "don't call this otherwise"); 1686 1687 // Use the value already set as the number of active threads 1688 // in the call to run_task(). 1689 if (G1CollectedHeap::use_parallel_gc_threads()) { 1690 assert( _g1h->workers()->active_workers() > 0, 1691 "Should have been previously set"); 1692 _n_workers = _g1h->workers()->active_workers(); 1693 } else { 1694 _n_workers = 1; 1695 } 1696 1697 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1698 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1699 1700 _verbose = _cm->verbose_medium(); 1701 } 1702 1703 void work(uint worker_id) { 1704 assert(worker_id < _n_workers, "invariant"); 1705 1706 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1707 _actual_region_bm, _actual_card_bm, 1708 _expected_region_bm, 1709 _expected_card_bm, 1710 _verbose); 1711 1712 if (G1CollectedHeap::use_parallel_gc_threads()) { 1713 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1714 worker_id, 1715 _n_workers, 1716 HeapRegion::VerifyCountClaimValue); 1717 } else { 1718 _g1h->heap_region_iterate(&verify_cl); 1719 } 1720 1721 Atomic::add(verify_cl.failures(), &_failures); 1722 } 1723 1724 int failures() const { return _failures; } 1725 }; 1726 1727 // Closure that finalizes the liveness counting data. 1728 // Used during the cleanup pause. 1729 // Sets the bits corresponding to the interval [NTAMS, top] 1730 // (which contains the implicitly live objects) in the 1731 // card liveness bitmap. Also sets the bit for each region, 1732 // containing live data, in the region liveness bitmap. 1733 1734 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1735 public: 1736 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1737 BitMap* region_bm, 1738 BitMap* card_bm) : 1739 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1740 1741 bool doHeapRegion(HeapRegion* hr) { 1742 1743 if (hr->continuesHumongous()) { 1744 // We will ignore these here and process them when their 1745 // associated "starts humongous" region is processed (see 1746 // set_bit_for_heap_region()). Note that we cannot rely on their 1747 // associated "starts humongous" region to have their bit set to 1748 // 1 since, due to the region chunking in the parallel region 1749 // iteration, a "continues humongous" region might be visited 1750 // before its associated "starts humongous". 1751 return false; 1752 } 1753 1754 HeapWord* ntams = hr->next_top_at_mark_start(); 1755 HeapWord* top = hr->top(); 1756 1757 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1758 1759 // Mark the allocated-since-marking portion... 1760 if (ntams < top) { 1761 // This definitely means the region has live objects. 1762 set_bit_for_region(hr); 1763 1764 // Now set the bits in the card bitmap for [ntams, top) 1765 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1766 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1767 1768 // Note: if we're looking at the last region in heap - top 1769 // could be actually just beyond the end of the heap; end_idx 1770 // will then correspond to a (non-existent) card that is also 1771 // just beyond the heap. 1772 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1773 // end of object is not card aligned - increment to cover 1774 // all the cards spanned by the object 1775 end_idx += 1; 1776 } 1777 1778 assert(end_idx <= _card_bm->size(), 1779 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1780 end_idx, _card_bm->size())); 1781 assert(start_idx < _card_bm->size(), 1782 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1783 start_idx, _card_bm->size())); 1784 1785 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1786 } 1787 1788 // Set the bit for the region if it contains live data 1789 if (hr->next_marked_bytes() > 0) { 1790 set_bit_for_region(hr); 1791 } 1792 1793 return false; 1794 } 1795 }; 1796 1797 class G1ParFinalCountTask: public AbstractGangTask { 1798 protected: 1799 G1CollectedHeap* _g1h; 1800 ConcurrentMark* _cm; 1801 BitMap* _actual_region_bm; 1802 BitMap* _actual_card_bm; 1803 1804 uint _n_workers; 1805 1806 public: 1807 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1808 : AbstractGangTask("G1 final counting"), 1809 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1810 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1811 _n_workers(0) { 1812 // Use the value already set as the number of active threads 1813 // in the call to run_task(). 1814 if (G1CollectedHeap::use_parallel_gc_threads()) { 1815 assert( _g1h->workers()->active_workers() > 0, 1816 "Should have been previously set"); 1817 _n_workers = _g1h->workers()->active_workers(); 1818 } else { 1819 _n_workers = 1; 1820 } 1821 } 1822 1823 void work(uint worker_id) { 1824 assert(worker_id < _n_workers, "invariant"); 1825 1826 FinalCountDataUpdateClosure final_update_cl(_g1h, 1827 _actual_region_bm, 1828 _actual_card_bm); 1829 1830 if (G1CollectedHeap::use_parallel_gc_threads()) { 1831 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1832 worker_id, 1833 _n_workers, 1834 HeapRegion::FinalCountClaimValue); 1835 } else { 1836 _g1h->heap_region_iterate(&final_update_cl); 1837 } 1838 } 1839 }; 1840 1841 class G1ParNoteEndTask; 1842 1843 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1844 G1CollectedHeap* _g1; 1845 size_t _max_live_bytes; 1846 uint _regions_claimed; 1847 size_t _freed_bytes; 1848 FreeRegionList* _local_cleanup_list; 1849 HeapRegionSetCount _old_regions_removed; 1850 HeapRegionSetCount _humongous_regions_removed; 1851 HRRSCleanupTask* _hrrs_cleanup_task; 1852 double _claimed_region_time; 1853 double _max_region_time; 1854 1855 public: 1856 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1857 FreeRegionList* local_cleanup_list, 1858 HRRSCleanupTask* hrrs_cleanup_task) : 1859 _g1(g1), 1860 _max_live_bytes(0), _regions_claimed(0), 1861 _freed_bytes(0), 1862 _claimed_region_time(0.0), _max_region_time(0.0), 1863 _local_cleanup_list(local_cleanup_list), 1864 _old_regions_removed(), 1865 _humongous_regions_removed(), 1866 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1867 1868 size_t freed_bytes() { return _freed_bytes; } 1869 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; } 1870 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } 1871 1872 bool doHeapRegion(HeapRegion *hr) { 1873 if (hr->continuesHumongous()) { 1874 return false; 1875 } 1876 // We use a claim value of zero here because all regions 1877 // were claimed with value 1 in the FinalCount task. 1878 _g1->reset_gc_time_stamps(hr); 1879 double start = os::elapsedTime(); 1880 _regions_claimed++; 1881 hr->note_end_of_marking(); 1882 _max_live_bytes += hr->max_live_bytes(); 1883 1884 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1885 _freed_bytes += hr->used(); 1886 hr->set_containing_set(NULL); 1887 if (hr->isHumongous()) { 1888 assert(hr->startsHumongous(), "we should only see starts humongous"); 1889 _humongous_regions_removed.increment(1u, hr->capacity()); 1890 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1891 } else { 1892 _old_regions_removed.increment(1u, hr->capacity()); 1893 _g1->free_region(hr, _local_cleanup_list, true); 1894 } 1895 } else { 1896 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1897 } 1898 1899 double region_time = (os::elapsedTime() - start); 1900 _claimed_region_time += region_time; 1901 if (region_time > _max_region_time) { 1902 _max_region_time = region_time; 1903 } 1904 return false; 1905 } 1906 1907 size_t max_live_bytes() { return _max_live_bytes; } 1908 uint regions_claimed() { return _regions_claimed; } 1909 double claimed_region_time_sec() { return _claimed_region_time; } 1910 double max_region_time_sec() { return _max_region_time; } 1911 }; 1912 1913 class G1ParNoteEndTask: public AbstractGangTask { 1914 friend class G1NoteEndOfConcMarkClosure; 1915 1916 protected: 1917 G1CollectedHeap* _g1h; 1918 size_t _max_live_bytes; 1919 size_t _freed_bytes; 1920 FreeRegionList* _cleanup_list; 1921 1922 public: 1923 G1ParNoteEndTask(G1CollectedHeap* g1h, 1924 FreeRegionList* cleanup_list) : 1925 AbstractGangTask("G1 note end"), _g1h(g1h), 1926 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1927 1928 void work(uint worker_id) { 1929 double start = os::elapsedTime(); 1930 FreeRegionList local_cleanup_list("Local Cleanup List"); 1931 HRRSCleanupTask hrrs_cleanup_task; 1932 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1933 &hrrs_cleanup_task); 1934 if (G1CollectedHeap::use_parallel_gc_threads()) { 1935 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1936 _g1h->workers()->active_workers(), 1937 HeapRegion::NoteEndClaimValue); 1938 } else { 1939 _g1h->heap_region_iterate(&g1_note_end); 1940 } 1941 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1942 1943 // Now update the lists 1944 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1945 { 1946 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1947 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1948 _max_live_bytes += g1_note_end.max_live_bytes(); 1949 _freed_bytes += g1_note_end.freed_bytes(); 1950 1951 // If we iterate over the global cleanup list at the end of 1952 // cleanup to do this printing we will not guarantee to only 1953 // generate output for the newly-reclaimed regions (the list 1954 // might not be empty at the beginning of cleanup; we might 1955 // still be working on its previous contents). So we do the 1956 // printing here, before we append the new regions to the global 1957 // cleanup list. 1958 1959 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1960 if (hr_printer->is_active()) { 1961 FreeRegionListIterator iter(&local_cleanup_list); 1962 while (iter.more_available()) { 1963 HeapRegion* hr = iter.get_next(); 1964 hr_printer->cleanup(hr); 1965 } 1966 } 1967 1968 _cleanup_list->add_ordered(&local_cleanup_list); 1969 assert(local_cleanup_list.is_empty(), "post-condition"); 1970 1971 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1972 } 1973 } 1974 size_t max_live_bytes() { return _max_live_bytes; } 1975 size_t freed_bytes() { return _freed_bytes; } 1976 }; 1977 1978 class G1ParScrubRemSetTask: public AbstractGangTask { 1979 protected: 1980 G1RemSet* _g1rs; 1981 BitMap* _region_bm; 1982 BitMap* _card_bm; 1983 public: 1984 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1985 BitMap* region_bm, BitMap* card_bm) : 1986 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1987 _region_bm(region_bm), _card_bm(card_bm) { } 1988 1989 void work(uint worker_id) { 1990 if (G1CollectedHeap::use_parallel_gc_threads()) { 1991 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1992 HeapRegion::ScrubRemSetClaimValue); 1993 } else { 1994 _g1rs->scrub(_region_bm, _card_bm); 1995 } 1996 } 1997 1998 }; 1999 2000 void ConcurrentMark::cleanup() { 2001 // world is stopped at this checkpoint 2002 assert(SafepointSynchronize::is_at_safepoint(), 2003 "world should be stopped"); 2004 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2005 2006 // If a full collection has happened, we shouldn't do this. 2007 if (has_aborted()) { 2008 g1h->set_marking_complete(); // So bitmap clearing isn't confused 2009 return; 2010 } 2011 2012 g1h->verify_region_sets_optional(); 2013 2014 if (VerifyDuringGC) { 2015 HandleMark hm; // handle scope 2016 Universe::heap()->prepare_for_verify(); 2017 Universe::verify(VerifyOption_G1UsePrevMarking, 2018 " VerifyDuringGC:(before)"); 2019 } 2020 g1h->check_bitmaps("Cleanup Start"); 2021 2022 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 2023 g1p->record_concurrent_mark_cleanup_start(); 2024 2025 double start = os::elapsedTime(); 2026 2027 HeapRegionRemSet::reset_for_cleanup_tasks(); 2028 2029 uint n_workers; 2030 2031 // Do counting once more with the world stopped for good measure. 2032 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 2033 2034 if (G1CollectedHeap::use_parallel_gc_threads()) { 2035 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 2036 "sanity check"); 2037 2038 g1h->set_par_threads(); 2039 n_workers = g1h->n_par_threads(); 2040 assert(g1h->n_par_threads() == n_workers, 2041 "Should not have been reset"); 2042 g1h->workers()->run_task(&g1_par_count_task); 2043 // Done with the parallel phase so reset to 0. 2044 g1h->set_par_threads(0); 2045 2046 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 2047 "sanity check"); 2048 } else { 2049 n_workers = 1; 2050 g1_par_count_task.work(0); 2051 } 2052 2053 if (VerifyDuringGC) { 2054 // Verify that the counting data accumulated during marking matches 2055 // that calculated by walking the marking bitmap. 2056 2057 // Bitmaps to hold expected values 2058 BitMap expected_region_bm(_region_bm.size(), true); 2059 BitMap expected_card_bm(_card_bm.size(), true); 2060 2061 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 2062 &_region_bm, 2063 &_card_bm, 2064 &expected_region_bm, 2065 &expected_card_bm); 2066 2067 if (G1CollectedHeap::use_parallel_gc_threads()) { 2068 g1h->set_par_threads((int)n_workers); 2069 g1h->workers()->run_task(&g1_par_verify_task); 2070 // Done with the parallel phase so reset to 0. 2071 g1h->set_par_threads(0); 2072 2073 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 2074 "sanity check"); 2075 } else { 2076 g1_par_verify_task.work(0); 2077 } 2078 2079 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 2080 } 2081 2082 size_t start_used_bytes = g1h->used(); 2083 g1h->set_marking_complete(); 2084 2085 double count_end = os::elapsedTime(); 2086 double this_final_counting_time = (count_end - start); 2087 _total_counting_time += this_final_counting_time; 2088 2089 if (G1PrintRegionLivenessInfo) { 2090 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 2091 _g1h->heap_region_iterate(&cl); 2092 } 2093 2094 // Install newly created mark bitMap as "prev". 2095 swapMarkBitMaps(); 2096 2097 g1h->reset_gc_time_stamp(); 2098 2099 // Note end of marking in all heap regions. 2100 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 2101 if (G1CollectedHeap::use_parallel_gc_threads()) { 2102 g1h->set_par_threads((int)n_workers); 2103 g1h->workers()->run_task(&g1_par_note_end_task); 2104 g1h->set_par_threads(0); 2105 2106 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 2107 "sanity check"); 2108 } else { 2109 g1_par_note_end_task.work(0); 2110 } 2111 g1h->check_gc_time_stamps(); 2112 2113 if (!cleanup_list_is_empty()) { 2114 // The cleanup list is not empty, so we'll have to process it 2115 // concurrently. Notify anyone else that might be wanting free 2116 // regions that there will be more free regions coming soon. 2117 g1h->set_free_regions_coming(); 2118 } 2119 2120 // call below, since it affects the metric by which we sort the heap 2121 // regions. 2122 if (G1ScrubRemSets) { 2123 double rs_scrub_start = os::elapsedTime(); 2124 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 2125 if (G1CollectedHeap::use_parallel_gc_threads()) { 2126 g1h->set_par_threads((int)n_workers); 2127 g1h->workers()->run_task(&g1_par_scrub_rs_task); 2128 g1h->set_par_threads(0); 2129 2130 assert(g1h->check_heap_region_claim_values( 2131 HeapRegion::ScrubRemSetClaimValue), 2132 "sanity check"); 2133 } else { 2134 g1_par_scrub_rs_task.work(0); 2135 } 2136 2137 double rs_scrub_end = os::elapsedTime(); 2138 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 2139 _total_rs_scrub_time += this_rs_scrub_time; 2140 } 2141 2142 // this will also free any regions totally full of garbage objects, 2143 // and sort the regions. 2144 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 2145 2146 // Statistics. 2147 double end = os::elapsedTime(); 2148 _cleanup_times.add((end - start) * 1000.0); 2149 2150 if (G1Log::fine()) { 2151 g1h->print_size_transition(gclog_or_tty, 2152 start_used_bytes, 2153 g1h->used(), 2154 g1h->capacity()); 2155 } 2156 2157 // Clean up will have freed any regions completely full of garbage. 2158 // Update the soft reference policy with the new heap occupancy. 2159 Universe::update_heap_info_at_gc(); 2160 2161 if (VerifyDuringGC) { 2162 HandleMark hm; // handle scope 2163 Universe::heap()->prepare_for_verify(); 2164 Universe::verify(VerifyOption_G1UsePrevMarking, 2165 " VerifyDuringGC:(after)"); 2166 } 2167 2168 g1h->check_bitmaps("Cleanup End"); 2169 2170 g1h->verify_region_sets_optional(); 2171 2172 // We need to make this be a "collection" so any collection pause that 2173 // races with it goes around and waits for completeCleanup to finish. 2174 g1h->increment_total_collections(); 2175 2176 // Clean out dead classes and update Metaspace sizes. 2177 if (ClassUnloadingWithConcurrentMark) { 2178 ClassLoaderDataGraph::purge(); 2179 } 2180 MetaspaceGC::compute_new_size(); 2181 2182 // We reclaimed old regions so we should calculate the sizes to make 2183 // sure we update the old gen/space data. 2184 g1h->g1mm()->update_sizes(); 2185 2186 g1h->trace_heap_after_concurrent_cycle(); 2187 } 2188 2189 void ConcurrentMark::completeCleanup() { 2190 if (has_aborted()) return; 2191 2192 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2193 2194 _cleanup_list.verify_optional(); 2195 FreeRegionList tmp_free_list("Tmp Free List"); 2196 2197 if (G1ConcRegionFreeingVerbose) { 2198 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2199 "cleanup list has %u entries", 2200 _cleanup_list.length()); 2201 } 2202 2203 // No one else should be accessing the _cleanup_list at this point, 2204 // so it is not necessary to take any locks 2205 while (!_cleanup_list.is_empty()) { 2206 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 2207 assert(hr != NULL, "Got NULL from a non-empty list"); 2208 hr->par_clear(); 2209 tmp_free_list.add_ordered(hr); 2210 2211 // Instead of adding one region at a time to the secondary_free_list, 2212 // we accumulate them in the local list and move them a few at a 2213 // time. This also cuts down on the number of notify_all() calls 2214 // we do during this process. We'll also append the local list when 2215 // _cleanup_list is empty (which means we just removed the last 2216 // region from the _cleanup_list). 2217 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2218 _cleanup_list.is_empty()) { 2219 if (G1ConcRegionFreeingVerbose) { 2220 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2221 "appending %u entries to the secondary_free_list, " 2222 "cleanup list still has %u entries", 2223 tmp_free_list.length(), 2224 _cleanup_list.length()); 2225 } 2226 2227 { 2228 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2229 g1h->secondary_free_list_add(&tmp_free_list); 2230 SecondaryFreeList_lock->notify_all(); 2231 } 2232 2233 if (G1StressConcRegionFreeing) { 2234 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2235 os::sleep(Thread::current(), (jlong) 1, false); 2236 } 2237 } 2238 } 2239 } 2240 assert(tmp_free_list.is_empty(), "post-condition"); 2241 } 2242 2243 // Supporting Object and Oop closures for reference discovery 2244 // and processing in during marking 2245 2246 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2247 HeapWord* addr = (HeapWord*)obj; 2248 return addr != NULL && 2249 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2250 } 2251 2252 // 'Keep Alive' oop closure used by both serial parallel reference processing. 2253 // Uses the CMTask associated with a worker thread (for serial reference 2254 // processing the CMTask for worker 0 is used) to preserve (mark) and 2255 // trace referent objects. 2256 // 2257 // Using the CMTask and embedded local queues avoids having the worker 2258 // threads operating on the global mark stack. This reduces the risk 2259 // of overflowing the stack - which we would rather avoid at this late 2260 // state. Also using the tasks' local queues removes the potential 2261 // of the workers interfering with each other that could occur if 2262 // operating on the global stack. 2263 2264 class G1CMKeepAliveAndDrainClosure: public OopClosure { 2265 ConcurrentMark* _cm; 2266 CMTask* _task; 2267 int _ref_counter_limit; 2268 int _ref_counter; 2269 bool _is_serial; 2270 public: 2271 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2272 _cm(cm), _task(task), _is_serial(is_serial), 2273 _ref_counter_limit(G1RefProcDrainInterval) { 2274 assert(_ref_counter_limit > 0, "sanity"); 2275 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2276 _ref_counter = _ref_counter_limit; 2277 } 2278 2279 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2280 virtual void do_oop( oop* p) { do_oop_work(p); } 2281 2282 template <class T> void do_oop_work(T* p) { 2283 if (!_cm->has_overflown()) { 2284 oop obj = oopDesc::load_decode_heap_oop(p); 2285 if (_cm->verbose_high()) { 2286 gclog_or_tty->print_cr("\t[%u] we're looking at location " 2287 "*"PTR_FORMAT" = "PTR_FORMAT, 2288 _task->worker_id(), p2i(p), p2i((void*) obj)); 2289 } 2290 2291 _task->deal_with_reference(obj); 2292 _ref_counter--; 2293 2294 if (_ref_counter == 0) { 2295 // We have dealt with _ref_counter_limit references, pushing them 2296 // and objects reachable from them on to the local stack (and 2297 // possibly the global stack). Call CMTask::do_marking_step() to 2298 // process these entries. 2299 // 2300 // We call CMTask::do_marking_step() in a loop, which we'll exit if 2301 // there's nothing more to do (i.e. we're done with the entries that 2302 // were pushed as a result of the CMTask::deal_with_reference() calls 2303 // above) or we overflow. 2304 // 2305 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2306 // flag while there may still be some work to do. (See the comment at 2307 // the beginning of CMTask::do_marking_step() for those conditions - 2308 // one of which is reaching the specified time target.) It is only 2309 // when CMTask::do_marking_step() returns without setting the 2310 // has_aborted() flag that the marking step has completed. 2311 do { 2312 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2313 _task->do_marking_step(mark_step_duration_ms, 2314 false /* do_termination */, 2315 _is_serial); 2316 } while (_task->has_aborted() && !_cm->has_overflown()); 2317 _ref_counter = _ref_counter_limit; 2318 } 2319 } else { 2320 if (_cm->verbose_high()) { 2321 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id()); 2322 } 2323 } 2324 } 2325 }; 2326 2327 // 'Drain' oop closure used by both serial and parallel reference processing. 2328 // Uses the CMTask associated with a given worker thread (for serial 2329 // reference processing the CMtask for worker 0 is used). Calls the 2330 // do_marking_step routine, with an unbelievably large timeout value, 2331 // to drain the marking data structures of the remaining entries 2332 // added by the 'keep alive' oop closure above. 2333 2334 class G1CMDrainMarkingStackClosure: public VoidClosure { 2335 ConcurrentMark* _cm; 2336 CMTask* _task; 2337 bool _is_serial; 2338 public: 2339 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2340 _cm(cm), _task(task), _is_serial(is_serial) { 2341 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2342 } 2343 2344 void do_void() { 2345 do { 2346 if (_cm->verbose_high()) { 2347 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s", 2348 _task->worker_id(), BOOL_TO_STR(_is_serial)); 2349 } 2350 2351 // We call CMTask::do_marking_step() to completely drain the local 2352 // and global marking stacks of entries pushed by the 'keep alive' 2353 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2354 // 2355 // CMTask::do_marking_step() is called in a loop, which we'll exit 2356 // if there's nothing more to do (i.e. we've completely drained the 2357 // entries that were pushed as a a result of applying the 'keep alive' 2358 // closure to the entries on the discovered ref lists) or we overflow 2359 // the global marking stack. 2360 // 2361 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2362 // flag while there may still be some work to do. (See the comment at 2363 // the beginning of CMTask::do_marking_step() for those conditions - 2364 // one of which is reaching the specified time target.) It is only 2365 // when CMTask::do_marking_step() returns without setting the 2366 // has_aborted() flag that the marking step has completed. 2367 2368 _task->do_marking_step(1000000000.0 /* something very large */, 2369 true /* do_termination */, 2370 _is_serial); 2371 } while (_task->has_aborted() && !_cm->has_overflown()); 2372 } 2373 }; 2374 2375 // Implementation of AbstractRefProcTaskExecutor for parallel 2376 // reference processing at the end of G1 concurrent marking 2377 2378 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2379 private: 2380 G1CollectedHeap* _g1h; 2381 ConcurrentMark* _cm; 2382 WorkGang* _workers; 2383 int _active_workers; 2384 2385 public: 2386 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2387 ConcurrentMark* cm, 2388 WorkGang* workers, 2389 int n_workers) : 2390 _g1h(g1h), _cm(cm), 2391 _workers(workers), _active_workers(n_workers) { } 2392 2393 // Executes the given task using concurrent marking worker threads. 2394 virtual void execute(ProcessTask& task); 2395 virtual void execute(EnqueueTask& task); 2396 }; 2397 2398 class G1CMRefProcTaskProxy: public AbstractGangTask { 2399 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2400 ProcessTask& _proc_task; 2401 G1CollectedHeap* _g1h; 2402 ConcurrentMark* _cm; 2403 2404 public: 2405 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2406 G1CollectedHeap* g1h, 2407 ConcurrentMark* cm) : 2408 AbstractGangTask("Process reference objects in parallel"), 2409 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2410 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2411 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2412 } 2413 2414 virtual void work(uint worker_id) { 2415 ResourceMark rm; 2416 HandleMark hm; 2417 CMTask* task = _cm->task(worker_id); 2418 G1CMIsAliveClosure g1_is_alive(_g1h); 2419 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2420 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2421 2422 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2423 } 2424 }; 2425 2426 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2427 assert(_workers != NULL, "Need parallel worker threads."); 2428 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2429 2430 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2431 2432 // We need to reset the concurrency level before each 2433 // proxy task execution, so that the termination protocol 2434 // and overflow handling in CMTask::do_marking_step() knows 2435 // how many workers to wait for. 2436 _cm->set_concurrency(_active_workers); 2437 _g1h->set_par_threads(_active_workers); 2438 _workers->run_task(&proc_task_proxy); 2439 _g1h->set_par_threads(0); 2440 } 2441 2442 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2443 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2444 EnqueueTask& _enq_task; 2445 2446 public: 2447 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2448 AbstractGangTask("Enqueue reference objects in parallel"), 2449 _enq_task(enq_task) { } 2450 2451 virtual void work(uint worker_id) { 2452 _enq_task.work(worker_id); 2453 } 2454 }; 2455 2456 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2457 assert(_workers != NULL, "Need parallel worker threads."); 2458 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2459 2460 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2461 2462 // Not strictly necessary but... 2463 // 2464 // We need to reset the concurrency level before each 2465 // proxy task execution, so that the termination protocol 2466 // and overflow handling in CMTask::do_marking_step() knows 2467 // how many workers to wait for. 2468 _cm->set_concurrency(_active_workers); 2469 _g1h->set_par_threads(_active_workers); 2470 _workers->run_task(&enq_task_proxy); 2471 _g1h->set_par_threads(0); 2472 } 2473 2474 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 2475 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 2476 } 2477 2478 // Helper class to get rid of some boilerplate code. 2479 class G1RemarkGCTraceTime : public GCTraceTime { 2480 static bool doit_and_prepend(bool doit) { 2481 if (doit) { 2482 gclog_or_tty->put(' '); 2483 } 2484 return doit; 2485 } 2486 2487 public: 2488 G1RemarkGCTraceTime(const char* title, bool doit) 2489 : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(), 2490 G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) { 2491 } 2492 }; 2493 2494 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2495 if (has_overflown()) { 2496 // Skip processing the discovered references if we have 2497 // overflown the global marking stack. Reference objects 2498 // only get discovered once so it is OK to not 2499 // de-populate the discovered reference lists. We could have, 2500 // but the only benefit would be that, when marking restarts, 2501 // less reference objects are discovered. 2502 return; 2503 } 2504 2505 ResourceMark rm; 2506 HandleMark hm; 2507 2508 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2509 2510 // Is alive closure. 2511 G1CMIsAliveClosure g1_is_alive(g1h); 2512 2513 // Inner scope to exclude the cleaning of the string and symbol 2514 // tables from the displayed time. 2515 { 2516 if (G1Log::finer()) { 2517 gclog_or_tty->put(' '); 2518 } 2519 GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm(), concurrent_gc_id()); 2520 2521 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2522 2523 // See the comment in G1CollectedHeap::ref_processing_init() 2524 // about how reference processing currently works in G1. 2525 2526 // Set the soft reference policy 2527 rp->setup_policy(clear_all_soft_refs); 2528 assert(_markStack.isEmpty(), "mark stack should be empty"); 2529 2530 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2531 // in serial reference processing. Note these closures are also 2532 // used for serially processing (by the the current thread) the 2533 // JNI references during parallel reference processing. 2534 // 2535 // These closures do not need to synchronize with the worker 2536 // threads involved in parallel reference processing as these 2537 // instances are executed serially by the current thread (e.g. 2538 // reference processing is not multi-threaded and is thus 2539 // performed by the current thread instead of a gang worker). 2540 // 2541 // The gang tasks involved in parallel reference processing create 2542 // their own instances of these closures, which do their own 2543 // synchronization among themselves. 2544 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2545 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2546 2547 // We need at least one active thread. If reference processing 2548 // is not multi-threaded we use the current (VMThread) thread, 2549 // otherwise we use the work gang from the G1CollectedHeap and 2550 // we utilize all the worker threads we can. 2551 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL; 2552 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2553 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2554 2555 // Parallel processing task executor. 2556 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2557 g1h->workers(), active_workers); 2558 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2559 2560 // Set the concurrency level. The phase was already set prior to 2561 // executing the remark task. 2562 set_concurrency(active_workers); 2563 2564 // Set the degree of MT processing here. If the discovery was done MT, 2565 // the number of threads involved during discovery could differ from 2566 // the number of active workers. This is OK as long as the discovered 2567 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2568 rp->set_active_mt_degree(active_workers); 2569 2570 // Process the weak references. 2571 const ReferenceProcessorStats& stats = 2572 rp->process_discovered_references(&g1_is_alive, 2573 &g1_keep_alive, 2574 &g1_drain_mark_stack, 2575 executor, 2576 g1h->gc_timer_cm(), 2577 concurrent_gc_id()); 2578 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2579 2580 // The do_oop work routines of the keep_alive and drain_marking_stack 2581 // oop closures will set the has_overflown flag if we overflow the 2582 // global marking stack. 2583 2584 assert(_markStack.overflow() || _markStack.isEmpty(), 2585 "mark stack should be empty (unless it overflowed)"); 2586 2587 if (_markStack.overflow()) { 2588 // This should have been done already when we tried to push an 2589 // entry on to the global mark stack. But let's do it again. 2590 set_has_overflown(); 2591 } 2592 2593 assert(rp->num_q() == active_workers, "why not"); 2594 2595 rp->enqueue_discovered_references(executor); 2596 2597 rp->verify_no_references_recorded(); 2598 assert(!rp->discovery_enabled(), "Post condition"); 2599 } 2600 2601 if (has_overflown()) { 2602 // We can not trust g1_is_alive if the marking stack overflowed 2603 return; 2604 } 2605 2606 assert(_markStack.isEmpty(), "Marking should have completed"); 2607 2608 // Unload Klasses, String, Symbols, Code Cache, etc. 2609 { 2610 G1RemarkGCTraceTime trace("Unloading", G1Log::finer()); 2611 2612 if (ClassUnloadingWithConcurrentMark) { 2613 bool purged_classes; 2614 2615 { 2616 G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest()); 2617 purged_classes = SystemDictionary::do_unloading(&g1_is_alive); 2618 } 2619 2620 { 2621 G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest()); 2622 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2623 } 2624 } 2625 2626 if (G1StringDedup::is_enabled()) { 2627 G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest()); 2628 G1StringDedup::unlink(&g1_is_alive); 2629 } 2630 } 2631 } 2632 2633 void ConcurrentMark::swapMarkBitMaps() { 2634 CMBitMapRO* temp = _prevMarkBitMap; 2635 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2636 _nextMarkBitMap = (CMBitMap*) temp; 2637 } 2638 2639 class CMObjectClosure; 2640 2641 // Closure for iterating over objects, currently only used for 2642 // processing SATB buffers. 2643 class CMObjectClosure : public ObjectClosure { 2644 private: 2645 CMTask* _task; 2646 2647 public: 2648 void do_object(oop obj) { 2649 _task->deal_with_reference(obj); 2650 } 2651 2652 CMObjectClosure(CMTask* task) : _task(task) { } 2653 }; 2654 2655 class G1RemarkThreadsClosure : public ThreadClosure { 2656 CMObjectClosure _cm_obj; 2657 G1CMOopClosure _cm_cl; 2658 MarkingCodeBlobClosure _code_cl; 2659 int _thread_parity; 2660 bool _is_par; 2661 2662 public: 2663 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) : 2664 _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2665 _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {} 2666 2667 void do_thread(Thread* thread) { 2668 if (thread->is_Java_thread()) { 2669 if (thread->claim_oops_do(_is_par, _thread_parity)) { 2670 JavaThread* jt = (JavaThread*)thread; 2671 2672 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2673 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2674 // * Alive if on the stack of an executing method 2675 // * Weakly reachable otherwise 2676 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2677 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2678 jt->nmethods_do(&_code_cl); 2679 2680 jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj); 2681 } 2682 } else if (thread->is_VM_thread()) { 2683 if (thread->claim_oops_do(_is_par, _thread_parity)) { 2684 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj); 2685 } 2686 } 2687 } 2688 }; 2689 2690 class CMRemarkTask: public AbstractGangTask { 2691 private: 2692 ConcurrentMark* _cm; 2693 bool _is_serial; 2694 public: 2695 void work(uint worker_id) { 2696 // Since all available tasks are actually started, we should 2697 // only proceed if we're supposed to be active. 2698 if (worker_id < _cm->active_tasks()) { 2699 CMTask* task = _cm->task(worker_id); 2700 task->record_start_time(); 2701 { 2702 ResourceMark rm; 2703 HandleMark hm; 2704 2705 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task, !_is_serial); 2706 Threads::threads_do(&threads_f); 2707 } 2708 2709 do { 2710 task->do_marking_step(1000000000.0 /* something very large */, 2711 true /* do_termination */, 2712 _is_serial); 2713 } while (task->has_aborted() && !_cm->has_overflown()); 2714 // If we overflow, then we do not want to restart. We instead 2715 // want to abort remark and do concurrent marking again. 2716 task->record_end_time(); 2717 } 2718 } 2719 2720 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) : 2721 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) { 2722 _cm->terminator()->reset_for_reuse(active_workers); 2723 } 2724 }; 2725 2726 void ConcurrentMark::checkpointRootsFinalWork() { 2727 ResourceMark rm; 2728 HandleMark hm; 2729 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2730 2731 G1RemarkGCTraceTime trace("Finalize Marking", G1Log::finer()); 2732 2733 g1h->ensure_parsability(false); 2734 2735 if (G1CollectedHeap::use_parallel_gc_threads()) { 2736 G1CollectedHeap::StrongRootsScope srs(g1h); 2737 // this is remark, so we'll use up all active threads 2738 uint active_workers = g1h->workers()->active_workers(); 2739 if (active_workers == 0) { 2740 assert(active_workers > 0, "Should have been set earlier"); 2741 active_workers = (uint) ParallelGCThreads; 2742 g1h->workers()->set_active_workers(active_workers); 2743 } 2744 set_concurrency_and_phase(active_workers, false /* concurrent */); 2745 // Leave _parallel_marking_threads at it's 2746 // value originally calculated in the ConcurrentMark 2747 // constructor and pass values of the active workers 2748 // through the gang in the task. 2749 2750 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */); 2751 // We will start all available threads, even if we decide that the 2752 // active_workers will be fewer. The extra ones will just bail out 2753 // immediately. 2754 g1h->set_par_threads(active_workers); 2755 g1h->workers()->run_task(&remarkTask); 2756 g1h->set_par_threads(0); 2757 } else { 2758 G1CollectedHeap::StrongRootsScope srs(g1h); 2759 uint active_workers = 1; 2760 set_concurrency_and_phase(active_workers, false /* concurrent */); 2761 2762 // Note - if there's no work gang then the VMThread will be 2763 // the thread to execute the remark - serially. We have 2764 // to pass true for the is_serial parameter so that 2765 // CMTask::do_marking_step() doesn't enter the sync 2766 // barriers in the event of an overflow. Doing so will 2767 // cause an assert that the current thread is not a 2768 // concurrent GC thread. 2769 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/); 2770 remarkTask.work(0); 2771 } 2772 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2773 guarantee(has_overflown() || 2774 satb_mq_set.completed_buffers_num() == 0, 2775 err_msg("Invariant: has_overflown = %s, num buffers = %d", 2776 BOOL_TO_STR(has_overflown()), 2777 satb_mq_set.completed_buffers_num())); 2778 2779 print_stats(); 2780 } 2781 2782 #ifndef PRODUCT 2783 2784 class PrintReachableOopClosure: public OopClosure { 2785 private: 2786 G1CollectedHeap* _g1h; 2787 outputStream* _out; 2788 VerifyOption _vo; 2789 bool _all; 2790 2791 public: 2792 PrintReachableOopClosure(outputStream* out, 2793 VerifyOption vo, 2794 bool all) : 2795 _g1h(G1CollectedHeap::heap()), 2796 _out(out), _vo(vo), _all(all) { } 2797 2798 void do_oop(narrowOop* p) { do_oop_work(p); } 2799 void do_oop( oop* p) { do_oop_work(p); } 2800 2801 template <class T> void do_oop_work(T* p) { 2802 oop obj = oopDesc::load_decode_heap_oop(p); 2803 const char* str = NULL; 2804 const char* str2 = ""; 2805 2806 if (obj == NULL) { 2807 str = ""; 2808 } else if (!_g1h->is_in_g1_reserved(obj)) { 2809 str = " O"; 2810 } else { 2811 HeapRegion* hr = _g1h->heap_region_containing(obj); 2812 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2813 bool marked = _g1h->is_marked(obj, _vo); 2814 2815 if (over_tams) { 2816 str = " >"; 2817 if (marked) { 2818 str2 = " AND MARKED"; 2819 } 2820 } else if (marked) { 2821 str = " M"; 2822 } else { 2823 str = " NOT"; 2824 } 2825 } 2826 2827 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2828 p2i(p), p2i((void*) obj), str, str2); 2829 } 2830 }; 2831 2832 class PrintReachableObjectClosure : public ObjectClosure { 2833 private: 2834 G1CollectedHeap* _g1h; 2835 outputStream* _out; 2836 VerifyOption _vo; 2837 bool _all; 2838 HeapRegion* _hr; 2839 2840 public: 2841 PrintReachableObjectClosure(outputStream* out, 2842 VerifyOption vo, 2843 bool all, 2844 HeapRegion* hr) : 2845 _g1h(G1CollectedHeap::heap()), 2846 _out(out), _vo(vo), _all(all), _hr(hr) { } 2847 2848 void do_object(oop o) { 2849 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2850 bool marked = _g1h->is_marked(o, _vo); 2851 bool print_it = _all || over_tams || marked; 2852 2853 if (print_it) { 2854 _out->print_cr(" "PTR_FORMAT"%s", 2855 p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : ""); 2856 PrintReachableOopClosure oopCl(_out, _vo, _all); 2857 o->oop_iterate_no_header(&oopCl); 2858 } 2859 } 2860 }; 2861 2862 class PrintReachableRegionClosure : public HeapRegionClosure { 2863 private: 2864 G1CollectedHeap* _g1h; 2865 outputStream* _out; 2866 VerifyOption _vo; 2867 bool _all; 2868 2869 public: 2870 bool doHeapRegion(HeapRegion* hr) { 2871 HeapWord* b = hr->bottom(); 2872 HeapWord* e = hr->end(); 2873 HeapWord* t = hr->top(); 2874 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2875 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2876 "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p)); 2877 _out->cr(); 2878 2879 HeapWord* from = b; 2880 HeapWord* to = t; 2881 2882 if (to > from) { 2883 _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to)); 2884 _out->cr(); 2885 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2886 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2887 _out->cr(); 2888 } 2889 2890 return false; 2891 } 2892 2893 PrintReachableRegionClosure(outputStream* out, 2894 VerifyOption vo, 2895 bool all) : 2896 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2897 }; 2898 2899 void ConcurrentMark::print_reachable(const char* str, 2900 VerifyOption vo, 2901 bool all) { 2902 gclog_or_tty->cr(); 2903 gclog_or_tty->print_cr("== Doing heap dump... "); 2904 2905 if (G1PrintReachableBaseFile == NULL) { 2906 gclog_or_tty->print_cr(" #### error: no base file defined"); 2907 return; 2908 } 2909 2910 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2911 (JVM_MAXPATHLEN - 1)) { 2912 gclog_or_tty->print_cr(" #### error: file name too long"); 2913 return; 2914 } 2915 2916 char file_name[JVM_MAXPATHLEN]; 2917 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2918 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2919 2920 fileStream fout(file_name); 2921 if (!fout.is_open()) { 2922 gclog_or_tty->print_cr(" #### error: could not open file"); 2923 return; 2924 } 2925 2926 outputStream* out = &fout; 2927 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2928 out->cr(); 2929 2930 out->print_cr("--- ITERATING OVER REGIONS"); 2931 out->cr(); 2932 PrintReachableRegionClosure rcl(out, vo, all); 2933 _g1h->heap_region_iterate(&rcl); 2934 out->cr(); 2935 2936 gclog_or_tty->print_cr(" done"); 2937 gclog_or_tty->flush(); 2938 } 2939 2940 #endif // PRODUCT 2941 2942 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2943 // Note we are overriding the read-only view of the prev map here, via 2944 // the cast. 2945 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2946 } 2947 2948 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2949 _nextMarkBitMap->clearRange(mr); 2950 } 2951 2952 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2953 clearRangePrevBitmap(mr); 2954 clearRangeNextBitmap(mr); 2955 } 2956 2957 HeapRegion* 2958 ConcurrentMark::claim_region(uint worker_id) { 2959 // "checkpoint" the finger 2960 HeapWord* finger = _finger; 2961 2962 // _heap_end will not change underneath our feet; it only changes at 2963 // yield points. 2964 while (finger < _heap_end) { 2965 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2966 2967 // Note on how this code handles humongous regions. In the 2968 // normal case the finger will reach the start of a "starts 2969 // humongous" (SH) region. Its end will either be the end of the 2970 // last "continues humongous" (CH) region in the sequence, or the 2971 // standard end of the SH region (if the SH is the only region in 2972 // the sequence). That way claim_region() will skip over the CH 2973 // regions. However, there is a subtle race between a CM thread 2974 // executing this method and a mutator thread doing a humongous 2975 // object allocation. The two are not mutually exclusive as the CM 2976 // thread does not need to hold the Heap_lock when it gets 2977 // here. So there is a chance that claim_region() will come across 2978 // a free region that's in the progress of becoming a SH or a CH 2979 // region. In the former case, it will either 2980 // a) Miss the update to the region's end, in which case it will 2981 // visit every subsequent CH region, will find their bitmaps 2982 // empty, and do nothing, or 2983 // b) Will observe the update of the region's end (in which case 2984 // it will skip the subsequent CH regions). 2985 // If it comes across a region that suddenly becomes CH, the 2986 // scenario will be similar to b). So, the race between 2987 // claim_region() and a humongous object allocation might force us 2988 // to do a bit of unnecessary work (due to some unnecessary bitmap 2989 // iterations) but it should not introduce and correctness issues. 2990 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2991 2992 // Above heap_region_containing_raw may return NULL as we always scan claim 2993 // until the end of the heap. In this case, just jump to the next region. 2994 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 2995 2996 // Is the gap between reading the finger and doing the CAS too long? 2997 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2998 if (res == finger && curr_region != NULL) { 2999 // we succeeded 3000 HeapWord* bottom = curr_region->bottom(); 3001 HeapWord* limit = curr_region->next_top_at_mark_start(); 3002 3003 if (verbose_low()) { 3004 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" " 3005 "["PTR_FORMAT", "PTR_FORMAT"), " 3006 "limit = "PTR_FORMAT, 3007 worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit)); 3008 } 3009 3010 // notice that _finger == end cannot be guaranteed here since, 3011 // someone else might have moved the finger even further 3012 assert(_finger >= end, "the finger should have moved forward"); 3013 3014 if (verbose_low()) { 3015 gclog_or_tty->print_cr("[%u] we were successful with region = " 3016 PTR_FORMAT, worker_id, p2i(curr_region)); 3017 } 3018 3019 if (limit > bottom) { 3020 if (verbose_low()) { 3021 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, " 3022 "returning it ", worker_id, p2i(curr_region)); 3023 } 3024 return curr_region; 3025 } else { 3026 assert(limit == bottom, 3027 "the region limit should be at bottom"); 3028 if (verbose_low()) { 3029 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, " 3030 "returning NULL", worker_id, p2i(curr_region)); 3031 } 3032 // we return NULL and the caller should try calling 3033 // claim_region() again. 3034 return NULL; 3035 } 3036 } else { 3037 assert(_finger > finger, "the finger should have moved forward"); 3038 if (verbose_low()) { 3039 if (curr_region == NULL) { 3040 gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, " 3041 "global finger = "PTR_FORMAT", " 3042 "our finger = "PTR_FORMAT, 3043 worker_id, p2i(_finger), p2i(finger)); 3044 } else { 3045 gclog_or_tty->print_cr("[%u] somebody else moved the finger, " 3046 "global finger = "PTR_FORMAT", " 3047 "our finger = "PTR_FORMAT, 3048 worker_id, p2i(_finger), p2i(finger)); 3049 } 3050 } 3051 3052 // read it again 3053 finger = _finger; 3054 } 3055 } 3056 3057 return NULL; 3058 } 3059 3060 #ifndef PRODUCT 3061 enum VerifyNoCSetOopsPhase { 3062 VerifyNoCSetOopsStack, 3063 VerifyNoCSetOopsQueues, 3064 VerifyNoCSetOopsSATBCompleted, 3065 VerifyNoCSetOopsSATBThread 3066 }; 3067 3068 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 3069 private: 3070 G1CollectedHeap* _g1h; 3071 VerifyNoCSetOopsPhase _phase; 3072 int _info; 3073 3074 const char* phase_str() { 3075 switch (_phase) { 3076 case VerifyNoCSetOopsStack: return "Stack"; 3077 case VerifyNoCSetOopsQueues: return "Queue"; 3078 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 3079 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 3080 default: ShouldNotReachHere(); 3081 } 3082 return NULL; 3083 } 3084 3085 void do_object_work(oop obj) { 3086 guarantee(!_g1h->obj_in_cs(obj), 3087 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 3088 p2i((void*) obj), phase_str(), _info)); 3089 } 3090 3091 public: 3092 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 3093 3094 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 3095 _phase = phase; 3096 _info = info; 3097 } 3098 3099 virtual void do_oop(oop* p) { 3100 oop obj = oopDesc::load_decode_heap_oop(p); 3101 do_object_work(obj); 3102 } 3103 3104 virtual void do_oop(narrowOop* p) { 3105 // We should not come across narrow oops while scanning marking 3106 // stacks and SATB buffers. 3107 ShouldNotReachHere(); 3108 } 3109 3110 virtual void do_object(oop obj) { 3111 do_object_work(obj); 3112 } 3113 }; 3114 3115 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 3116 bool verify_enqueued_buffers, 3117 bool verify_thread_buffers, 3118 bool verify_fingers) { 3119 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 3120 if (!G1CollectedHeap::heap()->mark_in_progress()) { 3121 return; 3122 } 3123 3124 VerifyNoCSetOopsClosure cl; 3125 3126 if (verify_stacks) { 3127 // Verify entries on the global mark stack 3128 cl.set_phase(VerifyNoCSetOopsStack); 3129 _markStack.oops_do(&cl); 3130 3131 // Verify entries on the task queues 3132 for (uint i = 0; i < _max_worker_id; i += 1) { 3133 cl.set_phase(VerifyNoCSetOopsQueues, i); 3134 CMTaskQueue* queue = _task_queues->queue(i); 3135 queue->oops_do(&cl); 3136 } 3137 } 3138 3139 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 3140 3141 // Verify entries on the enqueued SATB buffers 3142 if (verify_enqueued_buffers) { 3143 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 3144 satb_qs.iterate_completed_buffers_read_only(&cl); 3145 } 3146 3147 // Verify entries on the per-thread SATB buffers 3148 if (verify_thread_buffers) { 3149 cl.set_phase(VerifyNoCSetOopsSATBThread); 3150 satb_qs.iterate_thread_buffers_read_only(&cl); 3151 } 3152 3153 if (verify_fingers) { 3154 // Verify the global finger 3155 HeapWord* global_finger = finger(); 3156 if (global_finger != NULL && global_finger < _heap_end) { 3157 // The global finger always points to a heap region boundary. We 3158 // use heap_region_containing_raw() to get the containing region 3159 // given that the global finger could be pointing to a free region 3160 // which subsequently becomes continues humongous. If that 3161 // happens, heap_region_containing() will return the bottom of the 3162 // corresponding starts humongous region and the check below will 3163 // not hold any more. 3164 // Since we always iterate over all regions, we might get a NULL HeapRegion 3165 // here. 3166 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 3167 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 3168 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 3169 p2i(global_finger), HR_FORMAT_PARAMS(global_hr))); 3170 } 3171 3172 // Verify the task fingers 3173 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 3174 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 3175 CMTask* task = _tasks[i]; 3176 HeapWord* task_finger = task->finger(); 3177 if (task_finger != NULL && task_finger < _heap_end) { 3178 // See above note on the global finger verification. 3179 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 3180 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 3181 !task_hr->in_collection_set(), 3182 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 3183 p2i(task_finger), HR_FORMAT_PARAMS(task_hr))); 3184 } 3185 } 3186 } 3187 } 3188 #endif // PRODUCT 3189 3190 // Aggregate the counting data that was constructed concurrently 3191 // with marking. 3192 class AggregateCountDataHRClosure: public HeapRegionClosure { 3193 G1CollectedHeap* _g1h; 3194 ConcurrentMark* _cm; 3195 CardTableModRefBS* _ct_bs; 3196 BitMap* _cm_card_bm; 3197 uint _max_worker_id; 3198 3199 public: 3200 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 3201 BitMap* cm_card_bm, 3202 uint max_worker_id) : 3203 _g1h(g1h), _cm(g1h->concurrent_mark()), 3204 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 3205 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 3206 3207 bool doHeapRegion(HeapRegion* hr) { 3208 if (hr->continuesHumongous()) { 3209 // We will ignore these here and process them when their 3210 // associated "starts humongous" region is processed. 3211 // Note that we cannot rely on their associated 3212 // "starts humongous" region to have their bit set to 1 3213 // since, due to the region chunking in the parallel region 3214 // iteration, a "continues humongous" region might be visited 3215 // before its associated "starts humongous". 3216 return false; 3217 } 3218 3219 HeapWord* start = hr->bottom(); 3220 HeapWord* limit = hr->next_top_at_mark_start(); 3221 HeapWord* end = hr->end(); 3222 3223 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 3224 err_msg("Preconditions not met - " 3225 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 3226 "top: "PTR_FORMAT", end: "PTR_FORMAT, 3227 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()))); 3228 3229 assert(hr->next_marked_bytes() == 0, "Precondition"); 3230 3231 if (start == limit) { 3232 // NTAMS of this region has not been set so nothing to do. 3233 return false; 3234 } 3235 3236 // 'start' should be in the heap. 3237 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 3238 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 3239 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 3240 3241 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 3242 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 3243 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 3244 3245 // If ntams is not card aligned then we bump card bitmap index 3246 // for limit so that we get the all the cards spanned by 3247 // the object ending at ntams. 3248 // Note: if this is the last region in the heap then ntams 3249 // could be actually just beyond the end of the the heap; 3250 // limit_idx will then correspond to a (non-existent) card 3251 // that is also outside the heap. 3252 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 3253 limit_idx += 1; 3254 } 3255 3256 assert(limit_idx <= end_idx, "or else use atomics"); 3257 3258 // Aggregate the "stripe" in the count data associated with hr. 3259 uint hrm_index = hr->hrm_index(); 3260 size_t marked_bytes = 0; 3261 3262 for (uint i = 0; i < _max_worker_id; i += 1) { 3263 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 3264 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 3265 3266 // Fetch the marked_bytes in this region for task i and 3267 // add it to the running total for this region. 3268 marked_bytes += marked_bytes_array[hrm_index]; 3269 3270 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 3271 // into the global card bitmap. 3272 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 3273 3274 while (scan_idx < limit_idx) { 3275 assert(task_card_bm->at(scan_idx) == true, "should be"); 3276 _cm_card_bm->set_bit(scan_idx); 3277 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 3278 3279 // BitMap::get_next_one_offset() can handle the case when 3280 // its left_offset parameter is greater than its right_offset 3281 // parameter. It does, however, have an early exit if 3282 // left_offset == right_offset. So let's limit the value 3283 // passed in for left offset here. 3284 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 3285 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 3286 } 3287 } 3288 3289 // Update the marked bytes for this region. 3290 hr->add_to_marked_bytes(marked_bytes); 3291 3292 // Next heap region 3293 return false; 3294 } 3295 }; 3296 3297 class G1AggregateCountDataTask: public AbstractGangTask { 3298 protected: 3299 G1CollectedHeap* _g1h; 3300 ConcurrentMark* _cm; 3301 BitMap* _cm_card_bm; 3302 uint _max_worker_id; 3303 int _active_workers; 3304 3305 public: 3306 G1AggregateCountDataTask(G1CollectedHeap* g1h, 3307 ConcurrentMark* cm, 3308 BitMap* cm_card_bm, 3309 uint max_worker_id, 3310 int n_workers) : 3311 AbstractGangTask("Count Aggregation"), 3312 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 3313 _max_worker_id(max_worker_id), 3314 _active_workers(n_workers) { } 3315 3316 void work(uint worker_id) { 3317 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 3318 3319 if (G1CollectedHeap::use_parallel_gc_threads()) { 3320 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 3321 _active_workers, 3322 HeapRegion::AggregateCountClaimValue); 3323 } else { 3324 _g1h->heap_region_iterate(&cl); 3325 } 3326 } 3327 }; 3328 3329 3330 void ConcurrentMark::aggregate_count_data() { 3331 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3332 _g1h->workers()->active_workers() : 3333 1); 3334 3335 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3336 _max_worker_id, n_workers); 3337 3338 if (G1CollectedHeap::use_parallel_gc_threads()) { 3339 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3340 "sanity check"); 3341 _g1h->set_par_threads(n_workers); 3342 _g1h->workers()->run_task(&g1_par_agg_task); 3343 _g1h->set_par_threads(0); 3344 3345 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3346 "sanity check"); 3347 _g1h->reset_heap_region_claim_values(); 3348 } else { 3349 g1_par_agg_task.work(0); 3350 } 3351 } 3352 3353 // Clear the per-worker arrays used to store the per-region counting data 3354 void ConcurrentMark::clear_all_count_data() { 3355 // Clear the global card bitmap - it will be filled during 3356 // liveness count aggregation (during remark) and the 3357 // final counting task. 3358 _card_bm.clear(); 3359 3360 // Clear the global region bitmap - it will be filled as part 3361 // of the final counting task. 3362 _region_bm.clear(); 3363 3364 uint max_regions = _g1h->max_regions(); 3365 assert(_max_worker_id > 0, "uninitialized"); 3366 3367 for (uint i = 0; i < _max_worker_id; i += 1) { 3368 BitMap* task_card_bm = count_card_bitmap_for(i); 3369 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3370 3371 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3372 assert(marked_bytes_array != NULL, "uninitialized"); 3373 3374 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3375 task_card_bm->clear(); 3376 } 3377 } 3378 3379 void ConcurrentMark::print_stats() { 3380 if (verbose_stats()) { 3381 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3382 for (size_t i = 0; i < _active_tasks; ++i) { 3383 _tasks[i]->print_stats(); 3384 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3385 } 3386 } 3387 } 3388 3389 // abandon current marking iteration due to a Full GC 3390 void ConcurrentMark::abort() { 3391 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 3392 // concurrent bitmap clearing. 3393 _nextMarkBitMap->clearAll(); 3394 3395 // Note we cannot clear the previous marking bitmap here 3396 // since VerifyDuringGC verifies the objects marked during 3397 // a full GC against the previous bitmap. 3398 3399 // Clear the liveness counting data 3400 clear_all_count_data(); 3401 // Empty mark stack 3402 reset_marking_state(); 3403 for (uint i = 0; i < _max_worker_id; ++i) { 3404 _tasks[i]->clear_region_fields(); 3405 } 3406 _first_overflow_barrier_sync.abort(); 3407 _second_overflow_barrier_sync.abort(); 3408 const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id(); 3409 if (!gc_id.is_undefined()) { 3410 // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance 3411 // to detect that it was aborted. Only keep track of the first GC id that we aborted. 3412 _aborted_gc_id = gc_id; 3413 } 3414 _has_aborted = true; 3415 3416 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3417 satb_mq_set.abandon_partial_marking(); 3418 // This can be called either during or outside marking, we'll read 3419 // the expected_active value from the SATB queue set. 3420 satb_mq_set.set_active_all_threads( 3421 false, /* new active value */ 3422 satb_mq_set.is_active() /* expected_active */); 3423 3424 _g1h->trace_heap_after_concurrent_cycle(); 3425 _g1h->register_concurrent_cycle_end(); 3426 } 3427 3428 const GCId& ConcurrentMark::concurrent_gc_id() { 3429 if (has_aborted()) { 3430 return _aborted_gc_id; 3431 } 3432 return _g1h->gc_tracer_cm()->gc_id(); 3433 } 3434 3435 static void print_ms_time_info(const char* prefix, const char* name, 3436 NumberSeq& ns) { 3437 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3438 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3439 if (ns.num() > 0) { 3440 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3441 prefix, ns.sd(), ns.maximum()); 3442 } 3443 } 3444 3445 void ConcurrentMark::print_summary_info() { 3446 gclog_or_tty->print_cr(" Concurrent marking:"); 3447 print_ms_time_info(" ", "init marks", _init_times); 3448 print_ms_time_info(" ", "remarks", _remark_times); 3449 { 3450 print_ms_time_info(" ", "final marks", _remark_mark_times); 3451 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3452 3453 } 3454 print_ms_time_info(" ", "cleanups", _cleanup_times); 3455 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3456 _total_counting_time, 3457 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3458 (double)_cleanup_times.num() 3459 : 0.0)); 3460 if (G1ScrubRemSets) { 3461 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3462 _total_rs_scrub_time, 3463 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3464 (double)_cleanup_times.num() 3465 : 0.0)); 3466 } 3467 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3468 (_init_times.sum() + _remark_times.sum() + 3469 _cleanup_times.sum())/1000.0); 3470 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3471 "(%8.2f s marking).", 3472 cmThread()->vtime_accum(), 3473 cmThread()->vtime_mark_accum()); 3474 } 3475 3476 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3477 if (use_parallel_marking_threads()) { 3478 _parallel_workers->print_worker_threads_on(st); 3479 } 3480 } 3481 3482 void ConcurrentMark::print_on_error(outputStream* st) const { 3483 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 3484 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 3485 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 3486 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 3487 } 3488 3489 // We take a break if someone is trying to stop the world. 3490 bool ConcurrentMark::do_yield_check(uint worker_id) { 3491 if (SuspendibleThreadSet::should_yield()) { 3492 if (worker_id == 0) { 3493 _g1h->g1_policy()->record_concurrent_pause(); 3494 } 3495 SuspendibleThreadSet::yield(); 3496 return true; 3497 } else { 3498 return false; 3499 } 3500 } 3501 3502 bool ConcurrentMark::containing_card_is_marked(void* p) { 3503 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3504 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3505 } 3506 3507 bool ConcurrentMark::containing_cards_are_marked(void* start, 3508 void* last) { 3509 return containing_card_is_marked(start) && 3510 containing_card_is_marked(last); 3511 } 3512 3513 #ifndef PRODUCT 3514 // for debugging purposes 3515 void ConcurrentMark::print_finger() { 3516 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3517 p2i(_heap_start), p2i(_heap_end), p2i(_finger)); 3518 for (uint i = 0; i < _max_worker_id; ++i) { 3519 gclog_or_tty->print(" %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger())); 3520 } 3521 gclog_or_tty->cr(); 3522 } 3523 #endif 3524 3525 void CMTask::scan_object(oop obj) { 3526 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3527 3528 if (_cm->verbose_high()) { 3529 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT, 3530 _worker_id, p2i((void*) obj)); 3531 } 3532 3533 size_t obj_size = obj->size(); 3534 _words_scanned += obj_size; 3535 3536 obj->oop_iterate(_cm_oop_closure); 3537 statsOnly( ++_objs_scanned ); 3538 check_limits(); 3539 } 3540 3541 // Closure for iteration over bitmaps 3542 class CMBitMapClosure : public BitMapClosure { 3543 private: 3544 // the bitmap that is being iterated over 3545 CMBitMap* _nextMarkBitMap; 3546 ConcurrentMark* _cm; 3547 CMTask* _task; 3548 3549 public: 3550 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3551 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3552 3553 bool do_bit(size_t offset) { 3554 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3555 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3556 assert( addr < _cm->finger(), "invariant"); 3557 3558 statsOnly( _task->increase_objs_found_on_bitmap() ); 3559 assert(addr >= _task->finger(), "invariant"); 3560 3561 // We move that task's local finger along. 3562 _task->move_finger_to(addr); 3563 3564 _task->scan_object(oop(addr)); 3565 // we only partially drain the local queue and global stack 3566 _task->drain_local_queue(true); 3567 _task->drain_global_stack(true); 3568 3569 // if the has_aborted flag has been raised, we need to bail out of 3570 // the iteration 3571 return !_task->has_aborted(); 3572 } 3573 }; 3574 3575 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3576 ConcurrentMark* cm, 3577 CMTask* task) 3578 : _g1h(g1h), _cm(cm), _task(task) { 3579 assert(_ref_processor == NULL, "should be initialized to NULL"); 3580 3581 if (G1UseConcMarkReferenceProcessing) { 3582 _ref_processor = g1h->ref_processor_cm(); 3583 assert(_ref_processor != NULL, "should not be NULL"); 3584 } 3585 } 3586 3587 void CMTask::setup_for_region(HeapRegion* hr) { 3588 assert(hr != NULL, 3589 "claim_region() should have filtered out NULL regions"); 3590 assert(!hr->continuesHumongous(), 3591 "claim_region() should have filtered out continues humongous regions"); 3592 3593 if (_cm->verbose_low()) { 3594 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT, 3595 _worker_id, p2i(hr)); 3596 } 3597 3598 _curr_region = hr; 3599 _finger = hr->bottom(); 3600 update_region_limit(); 3601 } 3602 3603 void CMTask::update_region_limit() { 3604 HeapRegion* hr = _curr_region; 3605 HeapWord* bottom = hr->bottom(); 3606 HeapWord* limit = hr->next_top_at_mark_start(); 3607 3608 if (limit == bottom) { 3609 if (_cm->verbose_low()) { 3610 gclog_or_tty->print_cr("[%u] found an empty region " 3611 "["PTR_FORMAT", "PTR_FORMAT")", 3612 _worker_id, p2i(bottom), p2i(limit)); 3613 } 3614 // The region was collected underneath our feet. 3615 // We set the finger to bottom to ensure that the bitmap 3616 // iteration that will follow this will not do anything. 3617 // (this is not a condition that holds when we set the region up, 3618 // as the region is not supposed to be empty in the first place) 3619 _finger = bottom; 3620 } else if (limit >= _region_limit) { 3621 assert(limit >= _finger, "peace of mind"); 3622 } else { 3623 assert(limit < _region_limit, "only way to get here"); 3624 // This can happen under some pretty unusual circumstances. An 3625 // evacuation pause empties the region underneath our feet (NTAMS 3626 // at bottom). We then do some allocation in the region (NTAMS 3627 // stays at bottom), followed by the region being used as a GC 3628 // alloc region (NTAMS will move to top() and the objects 3629 // originally below it will be grayed). All objects now marked in 3630 // the region are explicitly grayed, if below the global finger, 3631 // and we do not need in fact to scan anything else. So, we simply 3632 // set _finger to be limit to ensure that the bitmap iteration 3633 // doesn't do anything. 3634 _finger = limit; 3635 } 3636 3637 _region_limit = limit; 3638 } 3639 3640 void CMTask::giveup_current_region() { 3641 assert(_curr_region != NULL, "invariant"); 3642 if (_cm->verbose_low()) { 3643 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT, 3644 _worker_id, p2i(_curr_region)); 3645 } 3646 clear_region_fields(); 3647 } 3648 3649 void CMTask::clear_region_fields() { 3650 // Values for these three fields that indicate that we're not 3651 // holding on to a region. 3652 _curr_region = NULL; 3653 _finger = NULL; 3654 _region_limit = NULL; 3655 } 3656 3657 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3658 if (cm_oop_closure == NULL) { 3659 assert(_cm_oop_closure != NULL, "invariant"); 3660 } else { 3661 assert(_cm_oop_closure == NULL, "invariant"); 3662 } 3663 _cm_oop_closure = cm_oop_closure; 3664 } 3665 3666 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3667 guarantee(nextMarkBitMap != NULL, "invariant"); 3668 3669 if (_cm->verbose_low()) { 3670 gclog_or_tty->print_cr("[%u] resetting", _worker_id); 3671 } 3672 3673 _nextMarkBitMap = nextMarkBitMap; 3674 clear_region_fields(); 3675 3676 _calls = 0; 3677 _elapsed_time_ms = 0.0; 3678 _termination_time_ms = 0.0; 3679 _termination_start_time_ms = 0.0; 3680 3681 #if _MARKING_STATS_ 3682 _local_pushes = 0; 3683 _local_pops = 0; 3684 _local_max_size = 0; 3685 _objs_scanned = 0; 3686 _global_pushes = 0; 3687 _global_pops = 0; 3688 _global_max_size = 0; 3689 _global_transfers_to = 0; 3690 _global_transfers_from = 0; 3691 _regions_claimed = 0; 3692 _objs_found_on_bitmap = 0; 3693 _satb_buffers_processed = 0; 3694 _steal_attempts = 0; 3695 _steals = 0; 3696 _aborted = 0; 3697 _aborted_overflow = 0; 3698 _aborted_cm_aborted = 0; 3699 _aborted_yield = 0; 3700 _aborted_timed_out = 0; 3701 _aborted_satb = 0; 3702 _aborted_termination = 0; 3703 #endif // _MARKING_STATS_ 3704 } 3705 3706 bool CMTask::should_exit_termination() { 3707 regular_clock_call(); 3708 // This is called when we are in the termination protocol. We should 3709 // quit if, for some reason, this task wants to abort or the global 3710 // stack is not empty (this means that we can get work from it). 3711 return !_cm->mark_stack_empty() || has_aborted(); 3712 } 3713 3714 void CMTask::reached_limit() { 3715 assert(_words_scanned >= _words_scanned_limit || 3716 _refs_reached >= _refs_reached_limit , 3717 "shouldn't have been called otherwise"); 3718 regular_clock_call(); 3719 } 3720 3721 void CMTask::regular_clock_call() { 3722 if (has_aborted()) return; 3723 3724 // First, we need to recalculate the words scanned and refs reached 3725 // limits for the next clock call. 3726 recalculate_limits(); 3727 3728 // During the regular clock call we do the following 3729 3730 // (1) If an overflow has been flagged, then we abort. 3731 if (_cm->has_overflown()) { 3732 set_has_aborted(); 3733 return; 3734 } 3735 3736 // If we are not concurrent (i.e. we're doing remark) we don't need 3737 // to check anything else. The other steps are only needed during 3738 // the concurrent marking phase. 3739 if (!concurrent()) return; 3740 3741 // (2) If marking has been aborted for Full GC, then we also abort. 3742 if (_cm->has_aborted()) { 3743 set_has_aborted(); 3744 statsOnly( ++_aborted_cm_aborted ); 3745 return; 3746 } 3747 3748 double curr_time_ms = os::elapsedVTime() * 1000.0; 3749 3750 // (3) If marking stats are enabled, then we update the step history. 3751 #if _MARKING_STATS_ 3752 if (_words_scanned >= _words_scanned_limit) { 3753 ++_clock_due_to_scanning; 3754 } 3755 if (_refs_reached >= _refs_reached_limit) { 3756 ++_clock_due_to_marking; 3757 } 3758 3759 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3760 _interval_start_time_ms = curr_time_ms; 3761 _all_clock_intervals_ms.add(last_interval_ms); 3762 3763 if (_cm->verbose_medium()) { 3764 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, " 3765 "scanned = %d%s, refs reached = %d%s", 3766 _worker_id, last_interval_ms, 3767 _words_scanned, 3768 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3769 _refs_reached, 3770 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3771 } 3772 #endif // _MARKING_STATS_ 3773 3774 // (4) We check whether we should yield. If we have to, then we abort. 3775 if (SuspendibleThreadSet::should_yield()) { 3776 // We should yield. To do this we abort the task. The caller is 3777 // responsible for yielding. 3778 set_has_aborted(); 3779 statsOnly( ++_aborted_yield ); 3780 return; 3781 } 3782 3783 // (5) We check whether we've reached our time quota. If we have, 3784 // then we abort. 3785 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3786 if (elapsed_time_ms > _time_target_ms) { 3787 set_has_aborted(); 3788 _has_timed_out = true; 3789 statsOnly( ++_aborted_timed_out ); 3790 return; 3791 } 3792 3793 // (6) Finally, we check whether there are enough completed STAB 3794 // buffers available for processing. If there are, we abort. 3795 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3796 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3797 if (_cm->verbose_low()) { 3798 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers", 3799 _worker_id); 3800 } 3801 // we do need to process SATB buffers, we'll abort and restart 3802 // the marking task to do so 3803 set_has_aborted(); 3804 statsOnly( ++_aborted_satb ); 3805 return; 3806 } 3807 } 3808 3809 void CMTask::recalculate_limits() { 3810 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3811 _words_scanned_limit = _real_words_scanned_limit; 3812 3813 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3814 _refs_reached_limit = _real_refs_reached_limit; 3815 } 3816 3817 void CMTask::decrease_limits() { 3818 // This is called when we believe that we're going to do an infrequent 3819 // operation which will increase the per byte scanned cost (i.e. move 3820 // entries to/from the global stack). It basically tries to decrease the 3821 // scanning limit so that the clock is called earlier. 3822 3823 if (_cm->verbose_medium()) { 3824 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id); 3825 } 3826 3827 _words_scanned_limit = _real_words_scanned_limit - 3828 3 * words_scanned_period / 4; 3829 _refs_reached_limit = _real_refs_reached_limit - 3830 3 * refs_reached_period / 4; 3831 } 3832 3833 void CMTask::move_entries_to_global_stack() { 3834 // local array where we'll store the entries that will be popped 3835 // from the local queue 3836 oop buffer[global_stack_transfer_size]; 3837 3838 int n = 0; 3839 oop obj; 3840 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3841 buffer[n] = obj; 3842 ++n; 3843 } 3844 3845 if (n > 0) { 3846 // we popped at least one entry from the local queue 3847 3848 statsOnly( ++_global_transfers_to; _local_pops += n ); 3849 3850 if (!_cm->mark_stack_push(buffer, n)) { 3851 if (_cm->verbose_low()) { 3852 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow", 3853 _worker_id); 3854 } 3855 set_has_aborted(); 3856 } else { 3857 // the transfer was successful 3858 3859 if (_cm->verbose_medium()) { 3860 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack", 3861 _worker_id, n); 3862 } 3863 statsOnly( int tmp_size = _cm->mark_stack_size(); 3864 if (tmp_size > _global_max_size) { 3865 _global_max_size = tmp_size; 3866 } 3867 _global_pushes += n ); 3868 } 3869 } 3870 3871 // this operation was quite expensive, so decrease the limits 3872 decrease_limits(); 3873 } 3874 3875 void CMTask::get_entries_from_global_stack() { 3876 // local array where we'll store the entries that will be popped 3877 // from the global stack. 3878 oop buffer[global_stack_transfer_size]; 3879 int n; 3880 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3881 assert(n <= global_stack_transfer_size, 3882 "we should not pop more than the given limit"); 3883 if (n > 0) { 3884 // yes, we did actually pop at least one entry 3885 3886 statsOnly( ++_global_transfers_from; _global_pops += n ); 3887 if (_cm->verbose_medium()) { 3888 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack", 3889 _worker_id, n); 3890 } 3891 for (int i = 0; i < n; ++i) { 3892 bool success = _task_queue->push(buffer[i]); 3893 // We only call this when the local queue is empty or under a 3894 // given target limit. So, we do not expect this push to fail. 3895 assert(success, "invariant"); 3896 } 3897 3898 statsOnly( int tmp_size = _task_queue->size(); 3899 if (tmp_size > _local_max_size) { 3900 _local_max_size = tmp_size; 3901 } 3902 _local_pushes += n ); 3903 } 3904 3905 // this operation was quite expensive, so decrease the limits 3906 decrease_limits(); 3907 } 3908 3909 void CMTask::drain_local_queue(bool partially) { 3910 if (has_aborted()) return; 3911 3912 // Decide what the target size is, depending whether we're going to 3913 // drain it partially (so that other tasks can steal if they run out 3914 // of things to do) or totally (at the very end). 3915 size_t target_size; 3916 if (partially) { 3917 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3918 } else { 3919 target_size = 0; 3920 } 3921 3922 if (_task_queue->size() > target_size) { 3923 if (_cm->verbose_high()) { 3924 gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT, 3925 _worker_id, target_size); 3926 } 3927 3928 oop obj; 3929 bool ret = _task_queue->pop_local(obj); 3930 while (ret) { 3931 statsOnly( ++_local_pops ); 3932 3933 if (_cm->verbose_high()) { 3934 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id, 3935 p2i((void*) obj)); 3936 } 3937 3938 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3939 assert(!_g1h->is_on_master_free_list( 3940 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3941 3942 scan_object(obj); 3943 3944 if (_task_queue->size() <= target_size || has_aborted()) { 3945 ret = false; 3946 } else { 3947 ret = _task_queue->pop_local(obj); 3948 } 3949 } 3950 3951 if (_cm->verbose_high()) { 3952 gclog_or_tty->print_cr("[%u] drained local queue, size = %u", 3953 _worker_id, _task_queue->size()); 3954 } 3955 } 3956 } 3957 3958 void CMTask::drain_global_stack(bool partially) { 3959 if (has_aborted()) return; 3960 3961 // We have a policy to drain the local queue before we attempt to 3962 // drain the global stack. 3963 assert(partially || _task_queue->size() == 0, "invariant"); 3964 3965 // Decide what the target size is, depending whether we're going to 3966 // drain it partially (so that other tasks can steal if they run out 3967 // of things to do) or totally (at the very end). Notice that, 3968 // because we move entries from the global stack in chunks or 3969 // because another task might be doing the same, we might in fact 3970 // drop below the target. But, this is not a problem. 3971 size_t target_size; 3972 if (partially) { 3973 target_size = _cm->partial_mark_stack_size_target(); 3974 } else { 3975 target_size = 0; 3976 } 3977 3978 if (_cm->mark_stack_size() > target_size) { 3979 if (_cm->verbose_low()) { 3980 gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT, 3981 _worker_id, target_size); 3982 } 3983 3984 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3985 get_entries_from_global_stack(); 3986 drain_local_queue(partially); 3987 } 3988 3989 if (_cm->verbose_low()) { 3990 gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT, 3991 _worker_id, _cm->mark_stack_size()); 3992 } 3993 } 3994 } 3995 3996 // SATB Queue has several assumptions on whether to call the par or 3997 // non-par versions of the methods. this is why some of the code is 3998 // replicated. We should really get rid of the single-threaded version 3999 // of the code to simplify things. 4000 void CMTask::drain_satb_buffers() { 4001 if (has_aborted()) return; 4002 4003 // We set this so that the regular clock knows that we're in the 4004 // middle of draining buffers and doesn't set the abort flag when it 4005 // notices that SATB buffers are available for draining. It'd be 4006 // very counter productive if it did that. :-) 4007 _draining_satb_buffers = true; 4008 4009 CMObjectClosure oc(this); 4010 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 4011 if (G1CollectedHeap::use_parallel_gc_threads()) { 4012 satb_mq_set.set_par_closure(_worker_id, &oc); 4013 } else { 4014 satb_mq_set.set_closure(&oc); 4015 } 4016 4017 // This keeps claiming and applying the closure to completed buffers 4018 // until we run out of buffers or we need to abort. 4019 if (G1CollectedHeap::use_parallel_gc_threads()) { 4020 while (!has_aborted() && 4021 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) { 4022 if (_cm->verbose_medium()) { 4023 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 4024 } 4025 statsOnly( ++_satb_buffers_processed ); 4026 regular_clock_call(); 4027 } 4028 } else { 4029 while (!has_aborted() && 4030 satb_mq_set.apply_closure_to_completed_buffer()) { 4031 if (_cm->verbose_medium()) { 4032 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 4033 } 4034 statsOnly( ++_satb_buffers_processed ); 4035 regular_clock_call(); 4036 } 4037 } 4038 4039 _draining_satb_buffers = false; 4040 4041 assert(has_aborted() || 4042 concurrent() || 4043 satb_mq_set.completed_buffers_num() == 0, "invariant"); 4044 4045 if (G1CollectedHeap::use_parallel_gc_threads()) { 4046 satb_mq_set.set_par_closure(_worker_id, NULL); 4047 } else { 4048 satb_mq_set.set_closure(NULL); 4049 } 4050 4051 // again, this was a potentially expensive operation, decrease the 4052 // limits to get the regular clock call early 4053 decrease_limits(); 4054 } 4055 4056 void CMTask::print_stats() { 4057 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 4058 _worker_id, _calls); 4059 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 4060 _elapsed_time_ms, _termination_time_ms); 4061 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4062 _step_times_ms.num(), _step_times_ms.avg(), 4063 _step_times_ms.sd()); 4064 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4065 _step_times_ms.maximum(), _step_times_ms.sum()); 4066 4067 #if _MARKING_STATS_ 4068 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4069 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 4070 _all_clock_intervals_ms.sd()); 4071 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4072 _all_clock_intervals_ms.maximum(), 4073 _all_clock_intervals_ms.sum()); 4074 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 4075 _clock_due_to_scanning, _clock_due_to_marking); 4076 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 4077 _objs_scanned, _objs_found_on_bitmap); 4078 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 4079 _local_pushes, _local_pops, _local_max_size); 4080 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 4081 _global_pushes, _global_pops, _global_max_size); 4082 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 4083 _global_transfers_to,_global_transfers_from); 4084 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 4085 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 4086 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 4087 _steal_attempts, _steals); 4088 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 4089 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 4090 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 4091 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 4092 _aborted_timed_out, _aborted_satb, _aborted_termination); 4093 #endif // _MARKING_STATS_ 4094 } 4095 4096 /***************************************************************************** 4097 4098 The do_marking_step(time_target_ms, ...) method is the building 4099 block of the parallel marking framework. It can be called in parallel 4100 with other invocations of do_marking_step() on different tasks 4101 (but only one per task, obviously) and concurrently with the 4102 mutator threads, or during remark, hence it eliminates the need 4103 for two versions of the code. When called during remark, it will 4104 pick up from where the task left off during the concurrent marking 4105 phase. Interestingly, tasks are also claimable during evacuation 4106 pauses too, since do_marking_step() ensures that it aborts before 4107 it needs to yield. 4108 4109 The data structures that it uses to do marking work are the 4110 following: 4111 4112 (1) Marking Bitmap. If there are gray objects that appear only 4113 on the bitmap (this happens either when dealing with an overflow 4114 or when the initial marking phase has simply marked the roots 4115 and didn't push them on the stack), then tasks claim heap 4116 regions whose bitmap they then scan to find gray objects. A 4117 global finger indicates where the end of the last claimed region 4118 is. A local finger indicates how far into the region a task has 4119 scanned. The two fingers are used to determine how to gray an 4120 object (i.e. whether simply marking it is OK, as it will be 4121 visited by a task in the future, or whether it needs to be also 4122 pushed on a stack). 4123 4124 (2) Local Queue. The local queue of the task which is accessed 4125 reasonably efficiently by the task. Other tasks can steal from 4126 it when they run out of work. Throughout the marking phase, a 4127 task attempts to keep its local queue short but not totally 4128 empty, so that entries are available for stealing by other 4129 tasks. Only when there is no more work, a task will totally 4130 drain its local queue. 4131 4132 (3) Global Mark Stack. This handles local queue overflow. During 4133 marking only sets of entries are moved between it and the local 4134 queues, as access to it requires a mutex and more fine-grain 4135 interaction with it which might cause contention. If it 4136 overflows, then the marking phase should restart and iterate 4137 over the bitmap to identify gray objects. Throughout the marking 4138 phase, tasks attempt to keep the global mark stack at a small 4139 length but not totally empty, so that entries are available for 4140 popping by other tasks. Only when there is no more work, tasks 4141 will totally drain the global mark stack. 4142 4143 (4) SATB Buffer Queue. This is where completed SATB buffers are 4144 made available. Buffers are regularly removed from this queue 4145 and scanned for roots, so that the queue doesn't get too 4146 long. During remark, all completed buffers are processed, as 4147 well as the filled in parts of any uncompleted buffers. 4148 4149 The do_marking_step() method tries to abort when the time target 4150 has been reached. There are a few other cases when the 4151 do_marking_step() method also aborts: 4152 4153 (1) When the marking phase has been aborted (after a Full GC). 4154 4155 (2) When a global overflow (on the global stack) has been 4156 triggered. Before the task aborts, it will actually sync up with 4157 the other tasks to ensure that all the marking data structures 4158 (local queues, stacks, fingers etc.) are re-initialized so that 4159 when do_marking_step() completes, the marking phase can 4160 immediately restart. 4161 4162 (3) When enough completed SATB buffers are available. The 4163 do_marking_step() method only tries to drain SATB buffers right 4164 at the beginning. So, if enough buffers are available, the 4165 marking step aborts and the SATB buffers are processed at 4166 the beginning of the next invocation. 4167 4168 (4) To yield. when we have to yield then we abort and yield 4169 right at the end of do_marking_step(). This saves us from a lot 4170 of hassle as, by yielding we might allow a Full GC. If this 4171 happens then objects will be compacted underneath our feet, the 4172 heap might shrink, etc. We save checking for this by just 4173 aborting and doing the yield right at the end. 4174 4175 From the above it follows that the do_marking_step() method should 4176 be called in a loop (or, otherwise, regularly) until it completes. 4177 4178 If a marking step completes without its has_aborted() flag being 4179 true, it means it has completed the current marking phase (and 4180 also all other marking tasks have done so and have all synced up). 4181 4182 A method called regular_clock_call() is invoked "regularly" (in 4183 sub ms intervals) throughout marking. It is this clock method that 4184 checks all the abort conditions which were mentioned above and 4185 decides when the task should abort. A work-based scheme is used to 4186 trigger this clock method: when the number of object words the 4187 marking phase has scanned or the number of references the marking 4188 phase has visited reach a given limit. Additional invocations to 4189 the method clock have been planted in a few other strategic places 4190 too. The initial reason for the clock method was to avoid calling 4191 vtime too regularly, as it is quite expensive. So, once it was in 4192 place, it was natural to piggy-back all the other conditions on it 4193 too and not constantly check them throughout the code. 4194 4195 If do_termination is true then do_marking_step will enter its 4196 termination protocol. 4197 4198 The value of is_serial must be true when do_marking_step is being 4199 called serially (i.e. by the VMThread) and do_marking_step should 4200 skip any synchronization in the termination and overflow code. 4201 Examples include the serial remark code and the serial reference 4202 processing closures. 4203 4204 The value of is_serial must be false when do_marking_step is 4205 being called by any of the worker threads in a work gang. 4206 Examples include the concurrent marking code (CMMarkingTask), 4207 the MT remark code, and the MT reference processing closures. 4208 4209 *****************************************************************************/ 4210 4211 void CMTask::do_marking_step(double time_target_ms, 4212 bool do_termination, 4213 bool is_serial) { 4214 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 4215 assert(concurrent() == _cm->concurrent(), "they should be the same"); 4216 4217 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4218 assert(_task_queues != NULL, "invariant"); 4219 assert(_task_queue != NULL, "invariant"); 4220 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 4221 4222 assert(!_claimed, 4223 "only one thread should claim this task at any one time"); 4224 4225 // OK, this doesn't safeguard again all possible scenarios, as it is 4226 // possible for two threads to set the _claimed flag at the same 4227 // time. But it is only for debugging purposes anyway and it will 4228 // catch most problems. 4229 _claimed = true; 4230 4231 _start_time_ms = os::elapsedVTime() * 1000.0; 4232 statsOnly( _interval_start_time_ms = _start_time_ms ); 4233 4234 // If do_stealing is true then do_marking_step will attempt to 4235 // steal work from the other CMTasks. It only makes sense to 4236 // enable stealing when the termination protocol is enabled 4237 // and do_marking_step() is not being called serially. 4238 bool do_stealing = do_termination && !is_serial; 4239 4240 double diff_prediction_ms = 4241 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4242 _time_target_ms = time_target_ms - diff_prediction_ms; 4243 4244 // set up the variables that are used in the work-based scheme to 4245 // call the regular clock method 4246 _words_scanned = 0; 4247 _refs_reached = 0; 4248 recalculate_limits(); 4249 4250 // clear all flags 4251 clear_has_aborted(); 4252 _has_timed_out = false; 4253 _draining_satb_buffers = false; 4254 4255 ++_calls; 4256 4257 if (_cm->verbose_low()) { 4258 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, " 4259 "target = %1.2lfms >>>>>>>>>>", 4260 _worker_id, _calls, _time_target_ms); 4261 } 4262 4263 // Set up the bitmap and oop closures. Anything that uses them is 4264 // eventually called from this method, so it is OK to allocate these 4265 // statically. 4266 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4267 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 4268 set_cm_oop_closure(&cm_oop_closure); 4269 4270 if (_cm->has_overflown()) { 4271 // This can happen if the mark stack overflows during a GC pause 4272 // and this task, after a yield point, restarts. We have to abort 4273 // as we need to get into the overflow protocol which happens 4274 // right at the end of this task. 4275 set_has_aborted(); 4276 } 4277 4278 // First drain any available SATB buffers. After this, we will not 4279 // look at SATB buffers before the next invocation of this method. 4280 // If enough completed SATB buffers are queued up, the regular clock 4281 // will abort this task so that it restarts. 4282 drain_satb_buffers(); 4283 // ...then partially drain the local queue and the global stack 4284 drain_local_queue(true); 4285 drain_global_stack(true); 4286 4287 do { 4288 if (!has_aborted() && _curr_region != NULL) { 4289 // This means that we're already holding on to a region. 4290 assert(_finger != NULL, "if region is not NULL, then the finger " 4291 "should not be NULL either"); 4292 4293 // We might have restarted this task after an evacuation pause 4294 // which might have evacuated the region we're holding on to 4295 // underneath our feet. Let's read its limit again to make sure 4296 // that we do not iterate over a region of the heap that 4297 // contains garbage (update_region_limit() will also move 4298 // _finger to the start of the region if it is found empty). 4299 update_region_limit(); 4300 // We will start from _finger not from the start of the region, 4301 // as we might be restarting this task after aborting half-way 4302 // through scanning this region. In this case, _finger points to 4303 // the address where we last found a marked object. If this is a 4304 // fresh region, _finger points to start(). 4305 MemRegion mr = MemRegion(_finger, _region_limit); 4306 4307 if (_cm->verbose_low()) { 4308 gclog_or_tty->print_cr("[%u] we're scanning part " 4309 "["PTR_FORMAT", "PTR_FORMAT") " 4310 "of region "HR_FORMAT, 4311 _worker_id, p2i(_finger), p2i(_region_limit), 4312 HR_FORMAT_PARAMS(_curr_region)); 4313 } 4314 4315 assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(), 4316 "humongous regions should go around loop once only"); 4317 4318 // Some special cases: 4319 // If the memory region is empty, we can just give up the region. 4320 // If the current region is humongous then we only need to check 4321 // the bitmap for the bit associated with the start of the object, 4322 // scan the object if it's live, and give up the region. 4323 // Otherwise, let's iterate over the bitmap of the part of the region 4324 // that is left. 4325 // If the iteration is successful, give up the region. 4326 if (mr.is_empty()) { 4327 giveup_current_region(); 4328 regular_clock_call(); 4329 } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) { 4330 if (_nextMarkBitMap->isMarked(mr.start())) { 4331 // The object is marked - apply the closure 4332 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 4333 bitmap_closure.do_bit(offset); 4334 } 4335 // Even if this task aborted while scanning the humongous object 4336 // we can (and should) give up the current region. 4337 giveup_current_region(); 4338 regular_clock_call(); 4339 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4340 giveup_current_region(); 4341 regular_clock_call(); 4342 } else { 4343 assert(has_aborted(), "currently the only way to do so"); 4344 // The only way to abort the bitmap iteration is to return 4345 // false from the do_bit() method. However, inside the 4346 // do_bit() method we move the _finger to point to the 4347 // object currently being looked at. So, if we bail out, we 4348 // have definitely set _finger to something non-null. 4349 assert(_finger != NULL, "invariant"); 4350 4351 // Region iteration was actually aborted. So now _finger 4352 // points to the address of the object we last scanned. If we 4353 // leave it there, when we restart this task, we will rescan 4354 // the object. It is easy to avoid this. We move the finger by 4355 // enough to point to the next possible object header (the 4356 // bitmap knows by how much we need to move it as it knows its 4357 // granularity). 4358 assert(_finger < _region_limit, "invariant"); 4359 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 4360 // Check if bitmap iteration was aborted while scanning the last object 4361 if (new_finger >= _region_limit) { 4362 giveup_current_region(); 4363 } else { 4364 move_finger_to(new_finger); 4365 } 4366 } 4367 } 4368 // At this point we have either completed iterating over the 4369 // region we were holding on to, or we have aborted. 4370 4371 // We then partially drain the local queue and the global stack. 4372 // (Do we really need this?) 4373 drain_local_queue(true); 4374 drain_global_stack(true); 4375 4376 // Read the note on the claim_region() method on why it might 4377 // return NULL with potentially more regions available for 4378 // claiming and why we have to check out_of_regions() to determine 4379 // whether we're done or not. 4380 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4381 // We are going to try to claim a new region. We should have 4382 // given up on the previous one. 4383 // Separated the asserts so that we know which one fires. 4384 assert(_curr_region == NULL, "invariant"); 4385 assert(_finger == NULL, "invariant"); 4386 assert(_region_limit == NULL, "invariant"); 4387 if (_cm->verbose_low()) { 4388 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id); 4389 } 4390 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 4391 if (claimed_region != NULL) { 4392 // Yes, we managed to claim one 4393 statsOnly( ++_regions_claimed ); 4394 4395 if (_cm->verbose_low()) { 4396 gclog_or_tty->print_cr("[%u] we successfully claimed " 4397 "region "PTR_FORMAT, 4398 _worker_id, p2i(claimed_region)); 4399 } 4400 4401 setup_for_region(claimed_region); 4402 assert(_curr_region == claimed_region, "invariant"); 4403 } 4404 // It is important to call the regular clock here. It might take 4405 // a while to claim a region if, for example, we hit a large 4406 // block of empty regions. So we need to call the regular clock 4407 // method once round the loop to make sure it's called 4408 // frequently enough. 4409 regular_clock_call(); 4410 } 4411 4412 if (!has_aborted() && _curr_region == NULL) { 4413 assert(_cm->out_of_regions(), 4414 "at this point we should be out of regions"); 4415 } 4416 } while ( _curr_region != NULL && !has_aborted()); 4417 4418 if (!has_aborted()) { 4419 // We cannot check whether the global stack is empty, since other 4420 // tasks might be pushing objects to it concurrently. 4421 assert(_cm->out_of_regions(), 4422 "at this point we should be out of regions"); 4423 4424 if (_cm->verbose_low()) { 4425 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id); 4426 } 4427 4428 // Try to reduce the number of available SATB buffers so that 4429 // remark has less work to do. 4430 drain_satb_buffers(); 4431 } 4432 4433 // Since we've done everything else, we can now totally drain the 4434 // local queue and global stack. 4435 drain_local_queue(false); 4436 drain_global_stack(false); 4437 4438 // Attempt at work stealing from other task's queues. 4439 if (do_stealing && !has_aborted()) { 4440 // We have not aborted. This means that we have finished all that 4441 // we could. Let's try to do some stealing... 4442 4443 // We cannot check whether the global stack is empty, since other 4444 // tasks might be pushing objects to it concurrently. 4445 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4446 "only way to reach here"); 4447 4448 if (_cm->verbose_low()) { 4449 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id); 4450 } 4451 4452 while (!has_aborted()) { 4453 oop obj; 4454 statsOnly( ++_steal_attempts ); 4455 4456 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 4457 if (_cm->verbose_medium()) { 4458 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully", 4459 _worker_id, p2i((void*) obj)); 4460 } 4461 4462 statsOnly( ++_steals ); 4463 4464 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4465 "any stolen object should be marked"); 4466 scan_object(obj); 4467 4468 // And since we're towards the end, let's totally drain the 4469 // local queue and global stack. 4470 drain_local_queue(false); 4471 drain_global_stack(false); 4472 } else { 4473 break; 4474 } 4475 } 4476 } 4477 4478 // If we are about to wrap up and go into termination, check if we 4479 // should raise the overflow flag. 4480 if (do_termination && !has_aborted()) { 4481 if (_cm->force_overflow()->should_force()) { 4482 _cm->set_has_overflown(); 4483 regular_clock_call(); 4484 } 4485 } 4486 4487 // We still haven't aborted. Now, let's try to get into the 4488 // termination protocol. 4489 if (do_termination && !has_aborted()) { 4490 // We cannot check whether the global stack is empty, since other 4491 // tasks might be concurrently pushing objects on it. 4492 // Separated the asserts so that we know which one fires. 4493 assert(_cm->out_of_regions(), "only way to reach here"); 4494 assert(_task_queue->size() == 0, "only way to reach here"); 4495 4496 if (_cm->verbose_low()) { 4497 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id); 4498 } 4499 4500 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4501 4502 // The CMTask class also extends the TerminatorTerminator class, 4503 // hence its should_exit_termination() method will also decide 4504 // whether to exit the termination protocol or not. 4505 bool finished = (is_serial || 4506 _cm->terminator()->offer_termination(this)); 4507 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4508 _termination_time_ms += 4509 termination_end_time_ms - _termination_start_time_ms; 4510 4511 if (finished) { 4512 // We're all done. 4513 4514 if (_worker_id == 0) { 4515 // let's allow task 0 to do this 4516 if (concurrent()) { 4517 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4518 // we need to set this to false before the next 4519 // safepoint. This way we ensure that the marking phase 4520 // doesn't observe any more heap expansions. 4521 _cm->clear_concurrent_marking_in_progress(); 4522 } 4523 } 4524 4525 // We can now guarantee that the global stack is empty, since 4526 // all other tasks have finished. We separated the guarantees so 4527 // that, if a condition is false, we can immediately find out 4528 // which one. 4529 guarantee(_cm->out_of_regions(), "only way to reach here"); 4530 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4531 guarantee(_task_queue->size() == 0, "only way to reach here"); 4532 guarantee(!_cm->has_overflown(), "only way to reach here"); 4533 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4534 4535 if (_cm->verbose_low()) { 4536 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id); 4537 } 4538 } else { 4539 // Apparently there's more work to do. Let's abort this task. It 4540 // will restart it and we can hopefully find more things to do. 4541 4542 if (_cm->verbose_low()) { 4543 gclog_or_tty->print_cr("[%u] apparently there is more work to do", 4544 _worker_id); 4545 } 4546 4547 set_has_aborted(); 4548 statsOnly( ++_aborted_termination ); 4549 } 4550 } 4551 4552 // Mainly for debugging purposes to make sure that a pointer to the 4553 // closure which was statically allocated in this frame doesn't 4554 // escape it by accident. 4555 set_cm_oop_closure(NULL); 4556 double end_time_ms = os::elapsedVTime() * 1000.0; 4557 double elapsed_time_ms = end_time_ms - _start_time_ms; 4558 // Update the step history. 4559 _step_times_ms.add(elapsed_time_ms); 4560 4561 if (has_aborted()) { 4562 // The task was aborted for some reason. 4563 4564 statsOnly( ++_aborted ); 4565 4566 if (_has_timed_out) { 4567 double diff_ms = elapsed_time_ms - _time_target_ms; 4568 // Keep statistics of how well we did with respect to hitting 4569 // our target only if we actually timed out (if we aborted for 4570 // other reasons, then the results might get skewed). 4571 _marking_step_diffs_ms.add(diff_ms); 4572 } 4573 4574 if (_cm->has_overflown()) { 4575 // This is the interesting one. We aborted because a global 4576 // overflow was raised. This means we have to restart the 4577 // marking phase and start iterating over regions. However, in 4578 // order to do this we have to make sure that all tasks stop 4579 // what they are doing and re-initialize in a safe manner. We 4580 // will achieve this with the use of two barrier sync points. 4581 4582 if (_cm->verbose_low()) { 4583 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id); 4584 } 4585 4586 if (!is_serial) { 4587 // We only need to enter the sync barrier if being called 4588 // from a parallel context 4589 _cm->enter_first_sync_barrier(_worker_id); 4590 4591 // When we exit this sync barrier we know that all tasks have 4592 // stopped doing marking work. So, it's now safe to 4593 // re-initialize our data structures. At the end of this method, 4594 // task 0 will clear the global data structures. 4595 } 4596 4597 statsOnly( ++_aborted_overflow ); 4598 4599 // We clear the local state of this task... 4600 clear_region_fields(); 4601 4602 if (!is_serial) { 4603 // ...and enter the second barrier. 4604 _cm->enter_second_sync_barrier(_worker_id); 4605 } 4606 // At this point, if we're during the concurrent phase of 4607 // marking, everything has been re-initialized and we're 4608 // ready to restart. 4609 } 4610 4611 if (_cm->verbose_low()) { 4612 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4613 "elapsed = %1.2lfms <<<<<<<<<<", 4614 _worker_id, _time_target_ms, elapsed_time_ms); 4615 if (_cm->has_aborted()) { 4616 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========", 4617 _worker_id); 4618 } 4619 } 4620 } else { 4621 if (_cm->verbose_low()) { 4622 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4623 "elapsed = %1.2lfms <<<<<<<<<<", 4624 _worker_id, _time_target_ms, elapsed_time_ms); 4625 } 4626 } 4627 4628 _claimed = false; 4629 } 4630 4631 CMTask::CMTask(uint worker_id, 4632 ConcurrentMark* cm, 4633 size_t* marked_bytes, 4634 BitMap* card_bm, 4635 CMTaskQueue* task_queue, 4636 CMTaskQueueSet* task_queues) 4637 : _g1h(G1CollectedHeap::heap()), 4638 _worker_id(worker_id), _cm(cm), 4639 _claimed(false), 4640 _nextMarkBitMap(NULL), _hash_seed(17), 4641 _task_queue(task_queue), 4642 _task_queues(task_queues), 4643 _cm_oop_closure(NULL), 4644 _marked_bytes_array(marked_bytes), 4645 _card_bm(card_bm) { 4646 guarantee(task_queue != NULL, "invariant"); 4647 guarantee(task_queues != NULL, "invariant"); 4648 4649 statsOnly( _clock_due_to_scanning = 0; 4650 _clock_due_to_marking = 0 ); 4651 4652 _marking_step_diffs_ms.add(0.5); 4653 } 4654 4655 // These are formatting macros that are used below to ensure 4656 // consistent formatting. The *_H_* versions are used to format the 4657 // header for a particular value and they should be kept consistent 4658 // with the corresponding macro. Also note that most of the macros add 4659 // the necessary white space (as a prefix) which makes them a bit 4660 // easier to compose. 4661 4662 // All the output lines are prefixed with this string to be able to 4663 // identify them easily in a large log file. 4664 #define G1PPRL_LINE_PREFIX "###" 4665 4666 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4667 #ifdef _LP64 4668 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4669 #else // _LP64 4670 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4671 #endif // _LP64 4672 4673 // For per-region info 4674 #define G1PPRL_TYPE_FORMAT " %-4s" 4675 #define G1PPRL_TYPE_H_FORMAT " %4s" 4676 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4677 #define G1PPRL_BYTE_H_FORMAT " %9s" 4678 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4679 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4680 4681 // For summary info 4682 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4683 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4684 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4685 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4686 4687 G1PrintRegionLivenessInfoClosure:: 4688 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4689 : _out(out), 4690 _total_used_bytes(0), _total_capacity_bytes(0), 4691 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4692 _hum_used_bytes(0), _hum_capacity_bytes(0), 4693 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 4694 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 4695 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4696 MemRegion g1_reserved = g1h->g1_reserved(); 4697 double now = os::elapsedTime(); 4698 4699 // Print the header of the output. 4700 _out->cr(); 4701 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4702 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4703 G1PPRL_SUM_ADDR_FORMAT("reserved") 4704 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4705 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 4706 HeapRegion::GrainBytes); 4707 _out->print_cr(G1PPRL_LINE_PREFIX); 4708 _out->print_cr(G1PPRL_LINE_PREFIX 4709 G1PPRL_TYPE_H_FORMAT 4710 G1PPRL_ADDR_BASE_H_FORMAT 4711 G1PPRL_BYTE_H_FORMAT 4712 G1PPRL_BYTE_H_FORMAT 4713 G1PPRL_BYTE_H_FORMAT 4714 G1PPRL_DOUBLE_H_FORMAT 4715 G1PPRL_BYTE_H_FORMAT 4716 G1PPRL_BYTE_H_FORMAT, 4717 "type", "address-range", 4718 "used", "prev-live", "next-live", "gc-eff", 4719 "remset", "code-roots"); 4720 _out->print_cr(G1PPRL_LINE_PREFIX 4721 G1PPRL_TYPE_H_FORMAT 4722 G1PPRL_ADDR_BASE_H_FORMAT 4723 G1PPRL_BYTE_H_FORMAT 4724 G1PPRL_BYTE_H_FORMAT 4725 G1PPRL_BYTE_H_FORMAT 4726 G1PPRL_DOUBLE_H_FORMAT 4727 G1PPRL_BYTE_H_FORMAT 4728 G1PPRL_BYTE_H_FORMAT, 4729 "", "", 4730 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 4731 "(bytes)", "(bytes)"); 4732 } 4733 4734 // It takes as a parameter a reference to one of the _hum_* fields, it 4735 // deduces the corresponding value for a region in a humongous region 4736 // series (either the region size, or what's left if the _hum_* field 4737 // is < the region size), and updates the _hum_* field accordingly. 4738 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4739 size_t bytes = 0; 4740 // The > 0 check is to deal with the prev and next live bytes which 4741 // could be 0. 4742 if (*hum_bytes > 0) { 4743 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4744 *hum_bytes -= bytes; 4745 } 4746 return bytes; 4747 } 4748 4749 // It deduces the values for a region in a humongous region series 4750 // from the _hum_* fields and updates those accordingly. It assumes 4751 // that that _hum_* fields have already been set up from the "starts 4752 // humongous" region and we visit the regions in address order. 4753 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4754 size_t* capacity_bytes, 4755 size_t* prev_live_bytes, 4756 size_t* next_live_bytes) { 4757 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4758 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4759 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4760 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4761 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4762 } 4763 4764 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4765 const char* type = ""; 4766 HeapWord* bottom = r->bottom(); 4767 HeapWord* end = r->end(); 4768 size_t capacity_bytes = r->capacity(); 4769 size_t used_bytes = r->used(); 4770 size_t prev_live_bytes = r->live_bytes(); 4771 size_t next_live_bytes = r->next_live_bytes(); 4772 double gc_eff = r->gc_efficiency(); 4773 size_t remset_bytes = r->rem_set()->mem_size(); 4774 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 4775 4776 if (r->used() == 0) { 4777 type = "FREE"; 4778 } else if (r->is_survivor()) { 4779 type = "SURV"; 4780 } else if (r->is_young()) { 4781 type = "EDEN"; 4782 } else if (r->startsHumongous()) { 4783 type = "HUMS"; 4784 4785 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4786 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4787 "they should have been zeroed after the last time we used them"); 4788 // Set up the _hum_* fields. 4789 _hum_capacity_bytes = capacity_bytes; 4790 _hum_used_bytes = used_bytes; 4791 _hum_prev_live_bytes = prev_live_bytes; 4792 _hum_next_live_bytes = next_live_bytes; 4793 get_hum_bytes(&used_bytes, &capacity_bytes, 4794 &prev_live_bytes, &next_live_bytes); 4795 end = bottom + HeapRegion::GrainWords; 4796 } else if (r->continuesHumongous()) { 4797 type = "HUMC"; 4798 get_hum_bytes(&used_bytes, &capacity_bytes, 4799 &prev_live_bytes, &next_live_bytes); 4800 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4801 } else { 4802 type = "OLD"; 4803 } 4804 4805 _total_used_bytes += used_bytes; 4806 _total_capacity_bytes += capacity_bytes; 4807 _total_prev_live_bytes += prev_live_bytes; 4808 _total_next_live_bytes += next_live_bytes; 4809 _total_remset_bytes += remset_bytes; 4810 _total_strong_code_roots_bytes += strong_code_roots_bytes; 4811 4812 // Print a line for this particular region. 4813 _out->print_cr(G1PPRL_LINE_PREFIX 4814 G1PPRL_TYPE_FORMAT 4815 G1PPRL_ADDR_BASE_FORMAT 4816 G1PPRL_BYTE_FORMAT 4817 G1PPRL_BYTE_FORMAT 4818 G1PPRL_BYTE_FORMAT 4819 G1PPRL_DOUBLE_FORMAT 4820 G1PPRL_BYTE_FORMAT 4821 G1PPRL_BYTE_FORMAT, 4822 type, p2i(bottom), p2i(end), 4823 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 4824 remset_bytes, strong_code_roots_bytes); 4825 4826 return false; 4827 } 4828 4829 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4830 // add static memory usages to remembered set sizes 4831 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 4832 // Print the footer of the output. 4833 _out->print_cr(G1PPRL_LINE_PREFIX); 4834 _out->print_cr(G1PPRL_LINE_PREFIX 4835 " SUMMARY" 4836 G1PPRL_SUM_MB_FORMAT("capacity") 4837 G1PPRL_SUM_MB_PERC_FORMAT("used") 4838 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4839 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 4840 G1PPRL_SUM_MB_FORMAT("remset") 4841 G1PPRL_SUM_MB_FORMAT("code-roots"), 4842 bytes_to_mb(_total_capacity_bytes), 4843 bytes_to_mb(_total_used_bytes), 4844 perc(_total_used_bytes, _total_capacity_bytes), 4845 bytes_to_mb(_total_prev_live_bytes), 4846 perc(_total_prev_live_bytes, _total_capacity_bytes), 4847 bytes_to_mb(_total_next_live_bytes), 4848 perc(_total_next_live_bytes, _total_capacity_bytes), 4849 bytes_to_mb(_total_remset_bytes), 4850 bytes_to_mb(_total_strong_code_roots_bytes)); 4851 _out->cr(); 4852 }