1 /* 2 * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "code/codeCache.hpp" 28 #include "gc_implementation/g1/concurrentMark.inline.hpp" 29 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 30 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 31 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 32 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 33 #include "gc_implementation/g1/g1Log.hpp" 34 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 35 #include "gc_implementation/g1/g1RemSet.hpp" 36 #include "gc_implementation/g1/heapRegion.inline.hpp" 37 #include "gc_implementation/g1/heapRegionManager.inline.hpp" 38 #include "gc_implementation/g1/heapRegionRemSet.hpp" 39 #include "gc_implementation/g1/heapRegionSet.inline.hpp" 40 #include "gc_implementation/shared/vmGCOperations.hpp" 41 #include "gc_implementation/shared/gcTimer.hpp" 42 #include "gc_implementation/shared/gcTrace.hpp" 43 #include "gc_implementation/shared/gcTraceTime.hpp" 44 #include "memory/allocation.hpp" 45 #include "memory/genOopClosures.inline.hpp" 46 #include "memory/referencePolicy.hpp" 47 #include "memory/resourceArea.hpp" 48 #include "oops/oop.inline.hpp" 49 #include "runtime/handles.inline.hpp" 50 #include "runtime/java.hpp" 51 #include "runtime/atomic.inline.hpp" 52 #include "runtime/prefetch.inline.hpp" 53 #include "services/memTracker.hpp" 54 55 // Concurrent marking bit map wrapper 56 57 CMBitMapRO::CMBitMapRO(int shifter) : 58 _bm(), 59 _shifter(shifter) { 60 _bmStartWord = 0; 61 _bmWordSize = 0; 62 } 63 64 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 65 const HeapWord* limit) const { 66 // First we must round addr *up* to a possible object boundary. 67 addr = (HeapWord*)align_size_up((intptr_t)addr, 68 HeapWordSize << _shifter); 69 size_t addrOffset = heapWordToOffset(addr); 70 if (limit == NULL) { 71 limit = _bmStartWord + _bmWordSize; 72 } 73 size_t limitOffset = heapWordToOffset(limit); 74 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 75 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 76 assert(nextAddr >= addr, "get_next_one postcondition"); 77 assert(nextAddr == limit || isMarked(nextAddr), 78 "get_next_one postcondition"); 79 return nextAddr; 80 } 81 82 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr, 83 const HeapWord* limit) const { 84 size_t addrOffset = heapWordToOffset(addr); 85 if (limit == NULL) { 86 limit = _bmStartWord + _bmWordSize; 87 } 88 size_t limitOffset = heapWordToOffset(limit); 89 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 90 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 91 assert(nextAddr >= addr, "get_next_one postcondition"); 92 assert(nextAddr == limit || !isMarked(nextAddr), 93 "get_next_one postcondition"); 94 return nextAddr; 95 } 96 97 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 98 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 99 return (int) (diff >> _shifter); 100 } 101 102 #ifndef PRODUCT 103 bool CMBitMapRO::covers(MemRegion heap_rs) const { 104 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 105 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 106 "size inconsistency"); 107 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 108 _bmWordSize == heap_rs.word_size(); 109 } 110 #endif 111 112 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 113 _bm.print_on_error(st, prefix); 114 } 115 116 size_t CMBitMap::compute_size(size_t heap_size) { 117 return heap_size / mark_distance(); 118 } 119 120 size_t CMBitMap::mark_distance() { 121 return MinObjAlignmentInBytes * BitsPerByte; 122 } 123 124 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 125 _bmStartWord = heap.start(); 126 _bmWordSize = heap.word_size(); 127 128 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start()); 129 _bm.set_size(_bmWordSize >> _shifter); 130 131 storage->set_mapping_changed_listener(&_listener); 132 } 133 134 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions) { 135 // We need to clear the bitmap on commit, removing any existing information. 136 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 137 _bm->clearRange(mr); 138 } 139 140 // Closure used for clearing the given mark bitmap. 141 class ClearBitmapHRClosure : public HeapRegionClosure { 142 private: 143 ConcurrentMark* _cm; 144 CMBitMap* _bitmap; 145 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration. 146 public: 147 ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) { 148 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield."); 149 } 150 151 virtual bool doHeapRegion(HeapRegion* r) { 152 size_t const chunk_size_in_words = M / HeapWordSize; 153 154 HeapWord* cur = r->bottom(); 155 HeapWord* const end = r->end(); 156 157 while (cur < end) { 158 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 159 _bitmap->clearRange(mr); 160 161 cur += chunk_size_in_words; 162 163 // Abort iteration if after yielding the marking has been aborted. 164 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) { 165 return true; 166 } 167 // Repeat the asserts from before the start of the closure. We will do them 168 // as asserts here to minimize their overhead on the product. However, we 169 // will have them as guarantees at the beginning / end of the bitmap 170 // clearing to get some checking in the product. 171 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant"); 172 assert(!_may_yield || !G1CollectedHeap::heap()->mark_in_progress(), "invariant"); 173 } 174 175 return false; 176 } 177 }; 178 179 void CMBitMap::clearAll() { 180 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */); 181 G1CollectedHeap::heap()->heap_region_iterate(&cl); 182 guarantee(cl.complete(), "Must have completed iteration."); 183 return; 184 } 185 186 void CMBitMap::markRange(MemRegion mr) { 187 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 188 assert(!mr.is_empty(), "unexpected empty region"); 189 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 190 ((HeapWord *) mr.end())), 191 "markRange memory region end is not card aligned"); 192 // convert address range into offset range 193 _bm.at_put_range(heapWordToOffset(mr.start()), 194 heapWordToOffset(mr.end()), true); 195 } 196 197 void CMBitMap::clearRange(MemRegion mr) { 198 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 199 assert(!mr.is_empty(), "unexpected empty region"); 200 // convert address range into offset range 201 _bm.at_put_range(heapWordToOffset(mr.start()), 202 heapWordToOffset(mr.end()), false); 203 } 204 205 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 206 HeapWord* end_addr) { 207 HeapWord* start = getNextMarkedWordAddress(addr); 208 start = MIN2(start, end_addr); 209 HeapWord* end = getNextUnmarkedWordAddress(start); 210 end = MIN2(end, end_addr); 211 assert(start <= end, "Consistency check"); 212 MemRegion mr(start, end); 213 if (!mr.is_empty()) { 214 clearRange(mr); 215 } 216 return mr; 217 } 218 219 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 220 _base(NULL), _cm(cm) 221 #ifdef ASSERT 222 , _drain_in_progress(false) 223 , _drain_in_progress_yields(false) 224 #endif 225 {} 226 227 bool CMMarkStack::allocate(size_t capacity) { 228 // allocate a stack of the requisite depth 229 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 230 if (!rs.is_reserved()) { 231 warning("ConcurrentMark MarkStack allocation failure"); 232 return false; 233 } 234 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 235 if (!_virtual_space.initialize(rs, rs.size())) { 236 warning("ConcurrentMark MarkStack backing store failure"); 237 // Release the virtual memory reserved for the marking stack 238 rs.release(); 239 return false; 240 } 241 assert(_virtual_space.committed_size() == rs.size(), 242 "Didn't reserve backing store for all of ConcurrentMark stack?"); 243 _base = (oop*) _virtual_space.low(); 244 setEmpty(); 245 _capacity = (jint) capacity; 246 _saved_index = -1; 247 _should_expand = false; 248 NOT_PRODUCT(_max_depth = 0); 249 return true; 250 } 251 252 void CMMarkStack::expand() { 253 // Called, during remark, if we've overflown the marking stack during marking. 254 assert(isEmpty(), "stack should been emptied while handling overflow"); 255 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 256 // Clear expansion flag 257 _should_expand = false; 258 if (_capacity == (jint) MarkStackSizeMax) { 259 if (PrintGCDetails && Verbose) { 260 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 261 } 262 return; 263 } 264 // Double capacity if possible 265 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 266 // Do not give up existing stack until we have managed to 267 // get the double capacity that we desired. 268 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 269 sizeof(oop))); 270 if (rs.is_reserved()) { 271 // Release the backing store associated with old stack 272 _virtual_space.release(); 273 // Reinitialize virtual space for new stack 274 if (!_virtual_space.initialize(rs, rs.size())) { 275 fatal("Not enough swap for expanded marking stack capacity"); 276 } 277 _base = (oop*)(_virtual_space.low()); 278 _index = 0; 279 _capacity = new_capacity; 280 } else { 281 if (PrintGCDetails && Verbose) { 282 // Failed to double capacity, continue; 283 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 284 SIZE_FORMAT"K to " SIZE_FORMAT"K", 285 _capacity / K, new_capacity / K); 286 } 287 } 288 } 289 290 void CMMarkStack::set_should_expand() { 291 // If we're resetting the marking state because of an 292 // marking stack overflow, record that we should, if 293 // possible, expand the stack. 294 _should_expand = _cm->has_overflown(); 295 } 296 297 CMMarkStack::~CMMarkStack() { 298 if (_base != NULL) { 299 _base = NULL; 300 _virtual_space.release(); 301 } 302 } 303 304 void CMMarkStack::par_push(oop ptr) { 305 while (true) { 306 if (isFull()) { 307 _overflow = true; 308 return; 309 } 310 // Otherwise... 311 jint index = _index; 312 jint next_index = index+1; 313 jint res = Atomic::cmpxchg(next_index, &_index, index); 314 if (res == index) { 315 _base[index] = ptr; 316 // Note that we don't maintain this atomically. We could, but it 317 // doesn't seem necessary. 318 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 319 return; 320 } 321 // Otherwise, we need to try again. 322 } 323 } 324 325 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 326 while (true) { 327 if (isFull()) { 328 _overflow = true; 329 return; 330 } 331 // Otherwise... 332 jint index = _index; 333 jint next_index = index + n; 334 if (next_index > _capacity) { 335 _overflow = true; 336 return; 337 } 338 jint res = Atomic::cmpxchg(next_index, &_index, index); 339 if (res == index) { 340 for (int i = 0; i < n; i++) { 341 int ind = index + i; 342 assert(ind < _capacity, "By overflow test above."); 343 _base[ind] = ptr_arr[i]; 344 } 345 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 346 return; 347 } 348 // Otherwise, we need to try again. 349 } 350 } 351 352 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 353 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 354 jint start = _index; 355 jint next_index = start + n; 356 if (next_index > _capacity) { 357 _overflow = true; 358 return; 359 } 360 // Otherwise. 361 _index = next_index; 362 for (int i = 0; i < n; i++) { 363 int ind = start + i; 364 assert(ind < _capacity, "By overflow test above."); 365 _base[ind] = ptr_arr[i]; 366 } 367 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 368 } 369 370 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 371 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 372 jint index = _index; 373 if (index == 0) { 374 *n = 0; 375 return false; 376 } else { 377 int k = MIN2(max, index); 378 jint new_ind = index - k; 379 for (int j = 0; j < k; j++) { 380 ptr_arr[j] = _base[new_ind + j]; 381 } 382 _index = new_ind; 383 *n = k; 384 return true; 385 } 386 } 387 388 template<class OopClosureClass> 389 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 390 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 391 || SafepointSynchronize::is_at_safepoint(), 392 "Drain recursion must be yield-safe."); 393 bool res = true; 394 debug_only(_drain_in_progress = true); 395 debug_only(_drain_in_progress_yields = yield_after); 396 while (!isEmpty()) { 397 oop newOop = pop(); 398 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 399 assert(newOop->is_oop(), "Expected an oop"); 400 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 401 "only grey objects on this stack"); 402 newOop->oop_iterate(cl); 403 if (yield_after && _cm->do_yield_check()) { 404 res = false; 405 break; 406 } 407 } 408 debug_only(_drain_in_progress = false); 409 return res; 410 } 411 412 void CMMarkStack::note_start_of_gc() { 413 assert(_saved_index == -1, 414 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 415 _saved_index = _index; 416 } 417 418 void CMMarkStack::note_end_of_gc() { 419 // This is intentionally a guarantee, instead of an assert. If we 420 // accidentally add something to the mark stack during GC, it 421 // will be a correctness issue so it's better if we crash. we'll 422 // only check this once per GC anyway, so it won't be a performance 423 // issue in any way. 424 guarantee(_saved_index == _index, 425 err_msg("saved index: %d index: %d", _saved_index, _index)); 426 _saved_index = -1; 427 } 428 429 void CMMarkStack::oops_do(OopClosure* f) { 430 assert(_saved_index == _index, 431 err_msg("saved index: %d index: %d", _saved_index, _index)); 432 for (int i = 0; i < _index; i += 1) { 433 f->do_oop(&_base[i]); 434 } 435 } 436 437 CMRootRegions::CMRootRegions() : 438 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 439 _should_abort(false), _next_survivor(NULL) { } 440 441 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 442 _young_list = g1h->young_list(); 443 _cm = cm; 444 } 445 446 void CMRootRegions::prepare_for_scan() { 447 assert(!scan_in_progress(), "pre-condition"); 448 449 // Currently, only survivors can be root regions. 450 assert(_next_survivor == NULL, "pre-condition"); 451 _next_survivor = _young_list->first_survivor_region(); 452 _scan_in_progress = (_next_survivor != NULL); 453 _should_abort = false; 454 } 455 456 HeapRegion* CMRootRegions::claim_next() { 457 if (_should_abort) { 458 // If someone has set the should_abort flag, we return NULL to 459 // force the caller to bail out of their loop. 460 return NULL; 461 } 462 463 // Currently, only survivors can be root regions. 464 HeapRegion* res = _next_survivor; 465 if (res != NULL) { 466 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 467 // Read it again in case it changed while we were waiting for the lock. 468 res = _next_survivor; 469 if (res != NULL) { 470 if (res == _young_list->last_survivor_region()) { 471 // We just claimed the last survivor so store NULL to indicate 472 // that we're done. 473 _next_survivor = NULL; 474 } else { 475 _next_survivor = res->get_next_young_region(); 476 } 477 } else { 478 // Someone else claimed the last survivor while we were trying 479 // to take the lock so nothing else to do. 480 } 481 } 482 assert(res == NULL || res->is_survivor(), "post-condition"); 483 484 return res; 485 } 486 487 void CMRootRegions::scan_finished() { 488 assert(scan_in_progress(), "pre-condition"); 489 490 // Currently, only survivors can be root regions. 491 if (!_should_abort) { 492 assert(_next_survivor == NULL, "we should have claimed all survivors"); 493 } 494 _next_survivor = NULL; 495 496 { 497 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 498 _scan_in_progress = false; 499 RootRegionScan_lock->notify_all(); 500 } 501 } 502 503 bool CMRootRegions::wait_until_scan_finished() { 504 if (!scan_in_progress()) return false; 505 506 { 507 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 508 while (scan_in_progress()) { 509 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 510 } 511 } 512 return true; 513 } 514 515 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 516 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 517 #endif // _MSC_VER 518 519 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 520 return MAX2((n_par_threads + 2) / 4, 1U); 521 } 522 523 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 524 _g1h(g1h), 525 _markBitMap1(), 526 _markBitMap2(), 527 _parallel_marking_threads(0), 528 _max_parallel_marking_threads(0), 529 _sleep_factor(0.0), 530 _marking_task_overhead(1.0), 531 _cleanup_sleep_factor(0.0), 532 _cleanup_task_overhead(1.0), 533 _cleanup_list("Cleanup List"), 534 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 535 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> 536 CardTableModRefBS::card_shift, 537 false /* in_resource_area*/), 538 539 _prevMarkBitMap(&_markBitMap1), 540 _nextMarkBitMap(&_markBitMap2), 541 542 _markStack(this), 543 // _finger set in set_non_marking_state 544 545 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)), 546 // _active_tasks set in set_non_marking_state 547 // _tasks set inside the constructor 548 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 549 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 550 551 _has_overflown(false), 552 _concurrent(false), 553 _has_aborted(false), 554 _aborted_gc_id(GCId::undefined()), 555 _restart_for_overflow(false), 556 _concurrent_marking_in_progress(false), 557 558 // _verbose_level set below 559 560 _init_times(), 561 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 562 _cleanup_times(), 563 _total_counting_time(0.0), 564 _total_rs_scrub_time(0.0), 565 566 _parallel_workers(NULL), 567 568 _count_card_bitmaps(NULL), 569 _count_marked_bytes(NULL), 570 _completed_initialization(false) { 571 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 572 if (verbose_level < no_verbose) { 573 verbose_level = no_verbose; 574 } 575 if (verbose_level > high_verbose) { 576 verbose_level = high_verbose; 577 } 578 _verbose_level = verbose_level; 579 580 if (verbose_low()) { 581 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 582 "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end)); 583 } 584 585 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 586 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 587 588 // Create & start a ConcurrentMark thread. 589 _cmThread = new ConcurrentMarkThread(this); 590 assert(cmThread() != NULL, "CM Thread should have been created"); 591 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 592 if (_cmThread->osthread() == NULL) { 593 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 594 } 595 596 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 597 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 598 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 599 600 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 601 satb_qs.set_buffer_size(G1SATBBufferSize); 602 603 _root_regions.init(_g1h, this); 604 605 if (ConcGCThreads > ParallelGCThreads) { 606 warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") " 607 "than ParallelGCThreads (" UINTX_FORMAT ").", 608 ConcGCThreads, ParallelGCThreads); 609 return; 610 } 611 if (ParallelGCThreads == 0) { 612 // if we are not running with any parallel GC threads we will not 613 // spawn any marking threads either 614 _parallel_marking_threads = 0; 615 _max_parallel_marking_threads = 0; 616 _sleep_factor = 0.0; 617 _marking_task_overhead = 1.0; 618 } else { 619 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 620 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 621 // if both are set 622 _sleep_factor = 0.0; 623 _marking_task_overhead = 1.0; 624 } else if (G1MarkingOverheadPercent > 0) { 625 // We will calculate the number of parallel marking threads based 626 // on a target overhead with respect to the soft real-time goal 627 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 628 double overall_cm_overhead = 629 (double) MaxGCPauseMillis * marking_overhead / 630 (double) GCPauseIntervalMillis; 631 double cpu_ratio = 1.0 / (double) os::processor_count(); 632 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 633 double marking_task_overhead = 634 overall_cm_overhead / marking_thread_num * 635 (double) os::processor_count(); 636 double sleep_factor = 637 (1.0 - marking_task_overhead) / marking_task_overhead; 638 639 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num); 640 _sleep_factor = sleep_factor; 641 _marking_task_overhead = marking_task_overhead; 642 } else { 643 // Calculate the number of parallel marking threads by scaling 644 // the number of parallel GC threads. 645 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads); 646 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num); 647 _sleep_factor = 0.0; 648 _marking_task_overhead = 1.0; 649 } 650 651 assert(ConcGCThreads > 0, "Should have been set"); 652 _parallel_marking_threads = (uint) ConcGCThreads; 653 _max_parallel_marking_threads = _parallel_marking_threads; 654 655 if (parallel_marking_threads() > 1) { 656 _cleanup_task_overhead = 1.0; 657 } else { 658 _cleanup_task_overhead = marking_task_overhead(); 659 } 660 _cleanup_sleep_factor = 661 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 662 663 #if 0 664 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 665 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 666 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 667 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 668 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 669 #endif 670 671 guarantee(parallel_marking_threads() > 0, "peace of mind"); 672 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 673 _max_parallel_marking_threads, false, true); 674 if (_parallel_workers == NULL) { 675 vm_exit_during_initialization("Failed necessary allocation."); 676 } else { 677 _parallel_workers->initialize_workers(); 678 } 679 } 680 681 if (FLAG_IS_DEFAULT(MarkStackSize)) { 682 uintx mark_stack_size = 683 MIN2(MarkStackSizeMax, 684 MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE))); 685 // Verify that the calculated value for MarkStackSize is in range. 686 // It would be nice to use the private utility routine from Arguments. 687 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 688 warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): " 689 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 690 mark_stack_size, (uintx) 1, MarkStackSizeMax); 691 return; 692 } 693 FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size); 694 } else { 695 // Verify MarkStackSize is in range. 696 if (FLAG_IS_CMDLINE(MarkStackSize)) { 697 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 698 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 699 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): " 700 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 701 MarkStackSize, (uintx) 1, MarkStackSizeMax); 702 return; 703 } 704 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 705 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 706 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")" 707 " or for MarkStackSizeMax (" UINTX_FORMAT ")", 708 MarkStackSize, MarkStackSizeMax); 709 return; 710 } 711 } 712 } 713 } 714 715 if (!_markStack.allocate(MarkStackSize)) { 716 warning("Failed to allocate CM marking stack"); 717 return; 718 } 719 720 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 721 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 722 723 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 724 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 725 726 BitMap::idx_t card_bm_size = _card_bm.size(); 727 728 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 729 _active_tasks = _max_worker_id; 730 731 size_t max_regions = (size_t) _g1h->max_regions(); 732 for (uint i = 0; i < _max_worker_id; ++i) { 733 CMTaskQueue* task_queue = new CMTaskQueue(); 734 task_queue->initialize(); 735 _task_queues->register_queue(i, task_queue); 736 737 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 738 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 739 740 _tasks[i] = new CMTask(i, this, 741 _count_marked_bytes[i], 742 &_count_card_bitmaps[i], 743 task_queue, _task_queues); 744 745 _accum_task_vtime[i] = 0.0; 746 } 747 748 // Calculate the card number for the bottom of the heap. Used 749 // in biasing indexes into the accounting card bitmaps. 750 _heap_bottom_card_num = 751 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 752 CardTableModRefBS::card_shift); 753 754 // Clear all the liveness counting data 755 clear_all_count_data(); 756 757 // so that the call below can read a sensible value 758 _heap_start = g1h->reserved_region().start(); 759 set_non_marking_state(); 760 _completed_initialization = true; 761 } 762 763 void ConcurrentMark::reset() { 764 // Starting values for these two. This should be called in a STW 765 // phase. 766 MemRegion reserved = _g1h->g1_reserved(); 767 _heap_start = reserved.start(); 768 _heap_end = reserved.end(); 769 770 // Separated the asserts so that we know which one fires. 771 assert(_heap_start != NULL, "heap bounds should look ok"); 772 assert(_heap_end != NULL, "heap bounds should look ok"); 773 assert(_heap_start < _heap_end, "heap bounds should look ok"); 774 775 // Reset all the marking data structures and any necessary flags 776 reset_marking_state(); 777 778 if (verbose_low()) { 779 gclog_or_tty->print_cr("[global] resetting"); 780 } 781 782 // We do reset all of them, since different phases will use 783 // different number of active threads. So, it's easiest to have all 784 // of them ready. 785 for (uint i = 0; i < _max_worker_id; ++i) { 786 _tasks[i]->reset(_nextMarkBitMap); 787 } 788 789 // we need this to make sure that the flag is on during the evac 790 // pause with initial mark piggy-backed 791 set_concurrent_marking_in_progress(); 792 } 793 794 795 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 796 _markStack.set_should_expand(); 797 _markStack.setEmpty(); // Also clears the _markStack overflow flag 798 if (clear_overflow) { 799 clear_has_overflown(); 800 } else { 801 assert(has_overflown(), "pre-condition"); 802 } 803 _finger = _heap_start; 804 805 for (uint i = 0; i < _max_worker_id; ++i) { 806 CMTaskQueue* queue = _task_queues->queue(i); 807 queue->set_empty(); 808 } 809 } 810 811 void ConcurrentMark::set_concurrency(uint active_tasks) { 812 assert(active_tasks <= _max_worker_id, "we should not have more"); 813 814 _active_tasks = active_tasks; 815 // Need to update the three data structures below according to the 816 // number of active threads for this phase. 817 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 818 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 819 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 820 } 821 822 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 823 set_concurrency(active_tasks); 824 825 _concurrent = concurrent; 826 // We propagate this to all tasks, not just the active ones. 827 for (uint i = 0; i < _max_worker_id; ++i) 828 _tasks[i]->set_concurrent(concurrent); 829 830 if (concurrent) { 831 set_concurrent_marking_in_progress(); 832 } else { 833 // We currently assume that the concurrent flag has been set to 834 // false before we start remark. At this point we should also be 835 // in a STW phase. 836 assert(!concurrent_marking_in_progress(), "invariant"); 837 assert(out_of_regions(), 838 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT, 839 p2i(_finger), p2i(_heap_end))); 840 } 841 } 842 843 void ConcurrentMark::set_non_marking_state() { 844 // We set the global marking state to some default values when we're 845 // not doing marking. 846 reset_marking_state(); 847 _active_tasks = 0; 848 clear_concurrent_marking_in_progress(); 849 } 850 851 ConcurrentMark::~ConcurrentMark() { 852 // The ConcurrentMark instance is never freed. 853 ShouldNotReachHere(); 854 } 855 856 void ConcurrentMark::clearNextBitmap() { 857 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 858 859 // Make sure that the concurrent mark thread looks to still be in 860 // the current cycle. 861 guarantee(cmThread()->during_cycle(), "invariant"); 862 863 // We are finishing up the current cycle by clearing the next 864 // marking bitmap and getting it ready for the next cycle. During 865 // this time no other cycle can start. So, let's make sure that this 866 // is the case. 867 guarantee(!g1h->mark_in_progress(), "invariant"); 868 869 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */); 870 g1h->heap_region_iterate(&cl); 871 872 // Clear the liveness counting data. If the marking has been aborted, the abort() 873 // call already did that. 874 if (cl.complete()) { 875 clear_all_count_data(); 876 } 877 878 // Repeat the asserts from above. 879 guarantee(cmThread()->during_cycle(), "invariant"); 880 guarantee(!g1h->mark_in_progress(), "invariant"); 881 } 882 883 class CheckBitmapClearHRClosure : public HeapRegionClosure { 884 CMBitMap* _bitmap; 885 bool _error; 886 public: 887 CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) { 888 } 889 890 virtual bool doHeapRegion(HeapRegion* r) { 891 return _bitmap->getNextMarkedWordAddress(r->bottom(), r->end()) != r->end(); 892 } 893 }; 894 895 bool ConcurrentMark::nextMarkBitmapIsClear() { 896 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 897 _g1h->heap_region_iterate(&cl); 898 return cl.complete(); 899 } 900 901 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 902 public: 903 bool doHeapRegion(HeapRegion* r) { 904 if (!r->continuesHumongous()) { 905 r->note_start_of_marking(); 906 } 907 return false; 908 } 909 }; 910 911 void ConcurrentMark::checkpointRootsInitialPre() { 912 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 913 G1CollectorPolicy* g1p = g1h->g1_policy(); 914 915 _has_aborted = false; 916 917 #ifndef PRODUCT 918 if (G1PrintReachableAtInitialMark) { 919 print_reachable("at-cycle-start", 920 VerifyOption_G1UsePrevMarking, true /* all */); 921 } 922 #endif 923 924 // Initialize marking structures. This has to be done in a STW phase. 925 reset(); 926 927 // For each region note start of marking. 928 NoteStartOfMarkHRClosure startcl; 929 g1h->heap_region_iterate(&startcl); 930 } 931 932 933 void ConcurrentMark::checkpointRootsInitialPost() { 934 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 935 936 // If we force an overflow during remark, the remark operation will 937 // actually abort and we'll restart concurrent marking. If we always 938 // force an overflow during remark we'll never actually complete the 939 // marking phase. So, we initialize this here, at the start of the 940 // cycle, so that at the remaining overflow number will decrease at 941 // every remark and we'll eventually not need to cause one. 942 force_overflow_stw()->init(); 943 944 // Start Concurrent Marking weak-reference discovery. 945 ReferenceProcessor* rp = g1h->ref_processor_cm(); 946 // enable ("weak") refs discovery 947 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 948 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 949 950 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 951 // This is the start of the marking cycle, we're expected all 952 // threads to have SATB queues with active set to false. 953 satb_mq_set.set_active_all_threads(true, /* new active value */ 954 false /* expected_active */); 955 956 _root_regions.prepare_for_scan(); 957 958 // update_g1_committed() will be called at the end of an evac pause 959 // when marking is on. So, it's also called at the end of the 960 // initial-mark pause to update the heap end, if the heap expands 961 // during it. No need to call it here. 962 } 963 964 /* 965 * Notice that in the next two methods, we actually leave the STS 966 * during the barrier sync and join it immediately afterwards. If we 967 * do not do this, the following deadlock can occur: one thread could 968 * be in the barrier sync code, waiting for the other thread to also 969 * sync up, whereas another one could be trying to yield, while also 970 * waiting for the other threads to sync up too. 971 * 972 * Note, however, that this code is also used during remark and in 973 * this case we should not attempt to leave / enter the STS, otherwise 974 * we'll either hit an assert (debug / fastdebug) or deadlock 975 * (product). So we should only leave / enter the STS if we are 976 * operating concurrently. 977 * 978 * Because the thread that does the sync barrier has left the STS, it 979 * is possible to be suspended for a Full GC or an evacuation pause 980 * could occur. This is actually safe, since the entering the sync 981 * barrier is one of the last things do_marking_step() does, and it 982 * doesn't manipulate any data structures afterwards. 983 */ 984 985 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 986 if (verbose_low()) { 987 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id); 988 } 989 990 if (concurrent()) { 991 SuspendibleThreadSet::leave(); 992 } 993 994 bool barrier_aborted = !_first_overflow_barrier_sync.enter(); 995 996 if (concurrent()) { 997 SuspendibleThreadSet::join(); 998 } 999 // at this point everyone should have synced up and not be doing any 1000 // more work 1001 1002 if (verbose_low()) { 1003 if (barrier_aborted) { 1004 gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id); 1005 } else { 1006 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); 1007 } 1008 } 1009 1010 if (barrier_aborted) { 1011 // If the barrier aborted we ignore the overflow condition and 1012 // just abort the whole marking phase as quickly as possible. 1013 return; 1014 } 1015 1016 // If we're executing the concurrent phase of marking, reset the marking 1017 // state; otherwise the marking state is reset after reference processing, 1018 // during the remark pause. 1019 // If we reset here as a result of an overflow during the remark we will 1020 // see assertion failures from any subsequent set_concurrency_and_phase() 1021 // calls. 1022 if (concurrent()) { 1023 // let the task associated with with worker 0 do this 1024 if (worker_id == 0) { 1025 // task 0 is responsible for clearing the global data structures 1026 // We should be here because of an overflow. During STW we should 1027 // not clear the overflow flag since we rely on it being true when 1028 // we exit this method to abort the pause and restart concurrent 1029 // marking. 1030 reset_marking_state(true /* clear_overflow */); 1031 force_overflow()->update(); 1032 1033 if (G1Log::fine()) { 1034 gclog_or_tty->gclog_stamp(concurrent_gc_id()); 1035 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 1036 } 1037 } 1038 } 1039 1040 // after this, each task should reset its own data structures then 1041 // then go into the second barrier 1042 } 1043 1044 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 1045 if (verbose_low()) { 1046 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id); 1047 } 1048 1049 if (concurrent()) { 1050 SuspendibleThreadSet::leave(); 1051 } 1052 1053 bool barrier_aborted = !_second_overflow_barrier_sync.enter(); 1054 1055 if (concurrent()) { 1056 SuspendibleThreadSet::join(); 1057 } 1058 // at this point everything should be re-initialized and ready to go 1059 1060 if (verbose_low()) { 1061 if (barrier_aborted) { 1062 gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id); 1063 } else { 1064 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); 1065 } 1066 } 1067 } 1068 1069 #ifndef PRODUCT 1070 void ForceOverflowSettings::init() { 1071 _num_remaining = G1ConcMarkForceOverflow; 1072 _force = false; 1073 update(); 1074 } 1075 1076 void ForceOverflowSettings::update() { 1077 if (_num_remaining > 0) { 1078 _num_remaining -= 1; 1079 _force = true; 1080 } else { 1081 _force = false; 1082 } 1083 } 1084 1085 bool ForceOverflowSettings::should_force() { 1086 if (_force) { 1087 _force = false; 1088 return true; 1089 } else { 1090 return false; 1091 } 1092 } 1093 #endif // !PRODUCT 1094 1095 class CMConcurrentMarkingTask: public AbstractGangTask { 1096 private: 1097 ConcurrentMark* _cm; 1098 ConcurrentMarkThread* _cmt; 1099 1100 public: 1101 void work(uint worker_id) { 1102 assert(Thread::current()->is_ConcurrentGC_thread(), 1103 "this should only be done by a conc GC thread"); 1104 ResourceMark rm; 1105 1106 double start_vtime = os::elapsedVTime(); 1107 1108 SuspendibleThreadSet::join(); 1109 1110 assert(worker_id < _cm->active_tasks(), "invariant"); 1111 CMTask* the_task = _cm->task(worker_id); 1112 the_task->record_start_time(); 1113 if (!_cm->has_aborted()) { 1114 do { 1115 double start_vtime_sec = os::elapsedVTime(); 1116 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1117 1118 the_task->do_marking_step(mark_step_duration_ms, 1119 true /* do_termination */, 1120 false /* is_serial*/); 1121 1122 double end_vtime_sec = os::elapsedVTime(); 1123 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1124 _cm->clear_has_overflown(); 1125 1126 _cm->do_yield_check(worker_id); 1127 1128 jlong sleep_time_ms; 1129 if (!_cm->has_aborted() && the_task->has_aborted()) { 1130 sleep_time_ms = 1131 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1132 SuspendibleThreadSet::leave(); 1133 os::sleep(Thread::current(), sleep_time_ms, false); 1134 SuspendibleThreadSet::join(); 1135 } 1136 } while (!_cm->has_aborted() && the_task->has_aborted()); 1137 } 1138 the_task->record_end_time(); 1139 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1140 1141 SuspendibleThreadSet::leave(); 1142 1143 double end_vtime = os::elapsedVTime(); 1144 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1145 } 1146 1147 CMConcurrentMarkingTask(ConcurrentMark* cm, 1148 ConcurrentMarkThread* cmt) : 1149 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1150 1151 ~CMConcurrentMarkingTask() { } 1152 }; 1153 1154 // Calculates the number of active workers for a concurrent 1155 // phase. 1156 uint ConcurrentMark::calc_parallel_marking_threads() { 1157 if (G1CollectedHeap::use_parallel_gc_threads()) { 1158 uint n_conc_workers = 0; 1159 if (!UseDynamicNumberOfGCThreads || 1160 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1161 !ForceDynamicNumberOfGCThreads)) { 1162 n_conc_workers = max_parallel_marking_threads(); 1163 } else { 1164 n_conc_workers = 1165 AdaptiveSizePolicy::calc_default_active_workers( 1166 max_parallel_marking_threads(), 1167 1, /* Minimum workers */ 1168 parallel_marking_threads(), 1169 Threads::number_of_non_daemon_threads()); 1170 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1171 // that scaling has already gone into "_max_parallel_marking_threads". 1172 } 1173 assert(n_conc_workers > 0, "Always need at least 1"); 1174 return n_conc_workers; 1175 } 1176 // If we are not running with any parallel GC threads we will not 1177 // have spawned any marking threads either. Hence the number of 1178 // concurrent workers should be 0. 1179 return 0; 1180 } 1181 1182 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1183 // Currently, only survivors can be root regions. 1184 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1185 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1186 1187 const uintx interval = PrefetchScanIntervalInBytes; 1188 HeapWord* curr = hr->bottom(); 1189 const HeapWord* end = hr->top(); 1190 while (curr < end) { 1191 Prefetch::read(curr, interval); 1192 oop obj = oop(curr); 1193 int size = obj->oop_iterate(&cl); 1194 assert(size == obj->size(), "sanity"); 1195 curr += size; 1196 } 1197 } 1198 1199 class CMRootRegionScanTask : public AbstractGangTask { 1200 private: 1201 ConcurrentMark* _cm; 1202 1203 public: 1204 CMRootRegionScanTask(ConcurrentMark* cm) : 1205 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1206 1207 void work(uint worker_id) { 1208 assert(Thread::current()->is_ConcurrentGC_thread(), 1209 "this should only be done by a conc GC thread"); 1210 1211 CMRootRegions* root_regions = _cm->root_regions(); 1212 HeapRegion* hr = root_regions->claim_next(); 1213 while (hr != NULL) { 1214 _cm->scanRootRegion(hr, worker_id); 1215 hr = root_regions->claim_next(); 1216 } 1217 } 1218 }; 1219 1220 void ConcurrentMark::scanRootRegions() { 1221 // Start of concurrent marking. 1222 ClassLoaderDataGraph::clear_claimed_marks(); 1223 1224 // scan_in_progress() will have been set to true only if there was 1225 // at least one root region to scan. So, if it's false, we 1226 // should not attempt to do any further work. 1227 if (root_regions()->scan_in_progress()) { 1228 _parallel_marking_threads = calc_parallel_marking_threads(); 1229 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1230 "Maximum number of marking threads exceeded"); 1231 uint active_workers = MAX2(1U, parallel_marking_threads()); 1232 1233 CMRootRegionScanTask task(this); 1234 if (use_parallel_marking_threads()) { 1235 _parallel_workers->set_active_workers((int) active_workers); 1236 _parallel_workers->run_task(&task); 1237 } else { 1238 task.work(0); 1239 } 1240 1241 // It's possible that has_aborted() is true here without actually 1242 // aborting the survivor scan earlier. This is OK as it's 1243 // mainly used for sanity checking. 1244 root_regions()->scan_finished(); 1245 } 1246 } 1247 1248 void ConcurrentMark::markFromRoots() { 1249 // we might be tempted to assert that: 1250 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1251 // "inconsistent argument?"); 1252 // However that wouldn't be right, because it's possible that 1253 // a safepoint is indeed in progress as a younger generation 1254 // stop-the-world GC happens even as we mark in this generation. 1255 1256 _restart_for_overflow = false; 1257 force_overflow_conc()->init(); 1258 1259 // _g1h has _n_par_threads 1260 _parallel_marking_threads = calc_parallel_marking_threads(); 1261 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1262 "Maximum number of marking threads exceeded"); 1263 1264 uint active_workers = MAX2(1U, parallel_marking_threads()); 1265 1266 // Parallel task terminator is set in "set_concurrency_and_phase()" 1267 set_concurrency_and_phase(active_workers, true /* concurrent */); 1268 1269 CMConcurrentMarkingTask markingTask(this, cmThread()); 1270 if (use_parallel_marking_threads()) { 1271 _parallel_workers->set_active_workers((int)active_workers); 1272 // Don't set _n_par_threads because it affects MT in process_roots() 1273 // and the decisions on that MT processing is made elsewhere. 1274 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1275 _parallel_workers->run_task(&markingTask); 1276 } else { 1277 markingTask.work(0); 1278 } 1279 print_stats(); 1280 } 1281 1282 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1283 // world is stopped at this checkpoint 1284 assert(SafepointSynchronize::is_at_safepoint(), 1285 "world should be stopped"); 1286 1287 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1288 1289 // If a full collection has happened, we shouldn't do this. 1290 if (has_aborted()) { 1291 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1292 return; 1293 } 1294 1295 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1296 1297 if (VerifyDuringGC) { 1298 HandleMark hm; // handle scope 1299 Universe::heap()->prepare_for_verify(); 1300 Universe::verify(VerifyOption_G1UsePrevMarking, 1301 " VerifyDuringGC:(before)"); 1302 } 1303 g1h->check_bitmaps("Remark Start"); 1304 1305 G1CollectorPolicy* g1p = g1h->g1_policy(); 1306 g1p->record_concurrent_mark_remark_start(); 1307 1308 double start = os::elapsedTime(); 1309 1310 checkpointRootsFinalWork(); 1311 1312 double mark_work_end = os::elapsedTime(); 1313 1314 weakRefsWork(clear_all_soft_refs); 1315 1316 if (has_overflown()) { 1317 // Oops. We overflowed. Restart concurrent marking. 1318 _restart_for_overflow = true; 1319 if (G1TraceMarkStackOverflow) { 1320 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1321 } 1322 1323 // Verify the heap w.r.t. the previous marking bitmap. 1324 if (VerifyDuringGC) { 1325 HandleMark hm; // handle scope 1326 Universe::heap()->prepare_for_verify(); 1327 Universe::verify(VerifyOption_G1UsePrevMarking, 1328 " VerifyDuringGC:(overflow)"); 1329 } 1330 1331 // Clear the marking state because we will be restarting 1332 // marking due to overflowing the global mark stack. 1333 reset_marking_state(); 1334 } else { 1335 // Aggregate the per-task counting data that we have accumulated 1336 // while marking. 1337 aggregate_count_data(); 1338 1339 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1340 // We're done with marking. 1341 // This is the end of the marking cycle, we're expected all 1342 // threads to have SATB queues with active set to true. 1343 satb_mq_set.set_active_all_threads(false, /* new active value */ 1344 true /* expected_active */); 1345 1346 if (VerifyDuringGC) { 1347 HandleMark hm; // handle scope 1348 Universe::heap()->prepare_for_verify(); 1349 Universe::verify(VerifyOption_G1UseNextMarking, 1350 " VerifyDuringGC:(after)"); 1351 } 1352 g1h->check_bitmaps("Remark End"); 1353 assert(!restart_for_overflow(), "sanity"); 1354 // Completely reset the marking state since marking completed 1355 set_non_marking_state(); 1356 } 1357 1358 // Expand the marking stack, if we have to and if we can. 1359 if (_markStack.should_expand()) { 1360 _markStack.expand(); 1361 } 1362 1363 // Statistics 1364 double now = os::elapsedTime(); 1365 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1366 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1367 _remark_times.add((now - start) * 1000.0); 1368 1369 g1p->record_concurrent_mark_remark_end(); 1370 1371 G1CMIsAliveClosure is_alive(g1h); 1372 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1373 } 1374 1375 // Base class of the closures that finalize and verify the 1376 // liveness counting data. 1377 class CMCountDataClosureBase: public HeapRegionClosure { 1378 protected: 1379 G1CollectedHeap* _g1h; 1380 ConcurrentMark* _cm; 1381 CardTableModRefBS* _ct_bs; 1382 1383 BitMap* _region_bm; 1384 BitMap* _card_bm; 1385 1386 // Takes a region that's not empty (i.e., it has at least one 1387 // live object in it and sets its corresponding bit on the region 1388 // bitmap to 1. If the region is "starts humongous" it will also set 1389 // to 1 the bits on the region bitmap that correspond to its 1390 // associated "continues humongous" regions. 1391 void set_bit_for_region(HeapRegion* hr) { 1392 assert(!hr->continuesHumongous(), "should have filtered those out"); 1393 1394 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1395 if (!hr->startsHumongous()) { 1396 // Normal (non-humongous) case: just set the bit. 1397 _region_bm->par_at_put(index, true); 1398 } else { 1399 // Starts humongous case: calculate how many regions are part of 1400 // this humongous region and then set the bit range. 1401 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1402 _region_bm->par_at_put_range(index, end_index, true); 1403 } 1404 } 1405 1406 public: 1407 CMCountDataClosureBase(G1CollectedHeap* g1h, 1408 BitMap* region_bm, BitMap* card_bm): 1409 _g1h(g1h), _cm(g1h->concurrent_mark()), 1410 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 1411 _region_bm(region_bm), _card_bm(card_bm) { } 1412 }; 1413 1414 // Closure that calculates the # live objects per region. Used 1415 // for verification purposes during the cleanup pause. 1416 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1417 CMBitMapRO* _bm; 1418 size_t _region_marked_bytes; 1419 1420 public: 1421 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1422 BitMap* region_bm, BitMap* card_bm) : 1423 CMCountDataClosureBase(g1h, region_bm, card_bm), 1424 _bm(bm), _region_marked_bytes(0) { } 1425 1426 bool doHeapRegion(HeapRegion* hr) { 1427 1428 if (hr->continuesHumongous()) { 1429 // We will ignore these here and process them when their 1430 // associated "starts humongous" region is processed (see 1431 // set_bit_for_heap_region()). Note that we cannot rely on their 1432 // associated "starts humongous" region to have their bit set to 1433 // 1 since, due to the region chunking in the parallel region 1434 // iteration, a "continues humongous" region might be visited 1435 // before its associated "starts humongous". 1436 return false; 1437 } 1438 1439 HeapWord* ntams = hr->next_top_at_mark_start(); 1440 HeapWord* start = hr->bottom(); 1441 1442 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1443 err_msg("Preconditions not met - " 1444 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT, 1445 p2i(start), p2i(ntams), p2i(hr->end()))); 1446 1447 // Find the first marked object at or after "start". 1448 start = _bm->getNextMarkedWordAddress(start, ntams); 1449 1450 size_t marked_bytes = 0; 1451 1452 while (start < ntams) { 1453 oop obj = oop(start); 1454 int obj_sz = obj->size(); 1455 HeapWord* obj_end = start + obj_sz; 1456 1457 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1458 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1459 1460 // Note: if we're looking at the last region in heap - obj_end 1461 // could be actually just beyond the end of the heap; end_idx 1462 // will then correspond to a (non-existent) card that is also 1463 // just beyond the heap. 1464 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1465 // end of object is not card aligned - increment to cover 1466 // all the cards spanned by the object 1467 end_idx += 1; 1468 } 1469 1470 // Set the bits in the card BM for the cards spanned by this object. 1471 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1472 1473 // Add the size of this object to the number of marked bytes. 1474 marked_bytes += (size_t)obj_sz * HeapWordSize; 1475 1476 // Find the next marked object after this one. 1477 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1478 } 1479 1480 // Mark the allocated-since-marking portion... 1481 HeapWord* top = hr->top(); 1482 if (ntams < top) { 1483 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1484 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1485 1486 // Note: if we're looking at the last region in heap - top 1487 // could be actually just beyond the end of the heap; end_idx 1488 // will then correspond to a (non-existent) card that is also 1489 // just beyond the heap. 1490 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1491 // end of object is not card aligned - increment to cover 1492 // all the cards spanned by the object 1493 end_idx += 1; 1494 } 1495 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1496 1497 // This definitely means the region has live objects. 1498 set_bit_for_region(hr); 1499 } 1500 1501 // Update the live region bitmap. 1502 if (marked_bytes > 0) { 1503 set_bit_for_region(hr); 1504 } 1505 1506 // Set the marked bytes for the current region so that 1507 // it can be queried by a calling verification routine 1508 _region_marked_bytes = marked_bytes; 1509 1510 return false; 1511 } 1512 1513 size_t region_marked_bytes() const { return _region_marked_bytes; } 1514 }; 1515 1516 // Heap region closure used for verifying the counting data 1517 // that was accumulated concurrently and aggregated during 1518 // the remark pause. This closure is applied to the heap 1519 // regions during the STW cleanup pause. 1520 1521 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1522 G1CollectedHeap* _g1h; 1523 ConcurrentMark* _cm; 1524 CalcLiveObjectsClosure _calc_cl; 1525 BitMap* _region_bm; // Region BM to be verified 1526 BitMap* _card_bm; // Card BM to be verified 1527 bool _verbose; // verbose output? 1528 1529 BitMap* _exp_region_bm; // Expected Region BM values 1530 BitMap* _exp_card_bm; // Expected card BM values 1531 1532 int _failures; 1533 1534 public: 1535 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1536 BitMap* region_bm, 1537 BitMap* card_bm, 1538 BitMap* exp_region_bm, 1539 BitMap* exp_card_bm, 1540 bool verbose) : 1541 _g1h(g1h), _cm(g1h->concurrent_mark()), 1542 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1543 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1544 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1545 _failures(0) { } 1546 1547 int failures() const { return _failures; } 1548 1549 bool doHeapRegion(HeapRegion* hr) { 1550 if (hr->continuesHumongous()) { 1551 // We will ignore these here and process them when their 1552 // associated "starts humongous" region is processed (see 1553 // set_bit_for_heap_region()). Note that we cannot rely on their 1554 // associated "starts humongous" region to have their bit set to 1555 // 1 since, due to the region chunking in the parallel region 1556 // iteration, a "continues humongous" region might be visited 1557 // before its associated "starts humongous". 1558 return false; 1559 } 1560 1561 int failures = 0; 1562 1563 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1564 // this region and set the corresponding bits in the expected region 1565 // and card bitmaps. 1566 bool res = _calc_cl.doHeapRegion(hr); 1567 assert(res == false, "should be continuing"); 1568 1569 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1570 Mutex::_no_safepoint_check_flag); 1571 1572 // Verify the marked bytes for this region. 1573 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1574 size_t act_marked_bytes = hr->next_marked_bytes(); 1575 1576 // We're not OK if expected marked bytes > actual marked bytes. It means 1577 // we have missed accounting some objects during the actual marking. 1578 if (exp_marked_bytes > act_marked_bytes) { 1579 if (_verbose) { 1580 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1581 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1582 hr->hrm_index(), exp_marked_bytes, act_marked_bytes); 1583 } 1584 failures += 1; 1585 } 1586 1587 // Verify the bit, for this region, in the actual and expected 1588 // (which was just calculated) region bit maps. 1589 // We're not OK if the bit in the calculated expected region 1590 // bitmap is set and the bit in the actual region bitmap is not. 1591 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1592 1593 bool expected = _exp_region_bm->at(index); 1594 bool actual = _region_bm->at(index); 1595 if (expected && !actual) { 1596 if (_verbose) { 1597 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1598 "expected: %s, actual: %s", 1599 hr->hrm_index(), 1600 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1601 } 1602 failures += 1; 1603 } 1604 1605 // Verify that the card bit maps for the cards spanned by the current 1606 // region match. We have an error if we have a set bit in the expected 1607 // bit map and the corresponding bit in the actual bitmap is not set. 1608 1609 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1610 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1611 1612 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1613 expected = _exp_card_bm->at(i); 1614 actual = _card_bm->at(i); 1615 1616 if (expected && !actual) { 1617 if (_verbose) { 1618 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1619 "expected: %s, actual: %s", 1620 hr->hrm_index(), i, 1621 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1622 } 1623 failures += 1; 1624 } 1625 } 1626 1627 if (failures > 0 && _verbose) { 1628 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1629 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1630 HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()), 1631 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1632 } 1633 1634 _failures += failures; 1635 1636 // We could stop iteration over the heap when we 1637 // find the first violating region by returning true. 1638 return false; 1639 } 1640 }; 1641 1642 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1643 protected: 1644 G1CollectedHeap* _g1h; 1645 ConcurrentMark* _cm; 1646 BitMap* _actual_region_bm; 1647 BitMap* _actual_card_bm; 1648 1649 uint _n_workers; 1650 1651 BitMap* _expected_region_bm; 1652 BitMap* _expected_card_bm; 1653 1654 int _failures; 1655 bool _verbose; 1656 1657 public: 1658 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1659 BitMap* region_bm, BitMap* card_bm, 1660 BitMap* expected_region_bm, BitMap* expected_card_bm) 1661 : AbstractGangTask("G1 verify final counting"), 1662 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1663 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1664 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1665 _failures(0), _verbose(false), 1666 _n_workers(0) { 1667 assert(VerifyDuringGC, "don't call this otherwise"); 1668 1669 // Use the value already set as the number of active threads 1670 // in the call to run_task(). 1671 if (G1CollectedHeap::use_parallel_gc_threads()) { 1672 assert( _g1h->workers()->active_workers() > 0, 1673 "Should have been previously set"); 1674 _n_workers = _g1h->workers()->active_workers(); 1675 } else { 1676 _n_workers = 1; 1677 } 1678 1679 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1680 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1681 1682 _verbose = _cm->verbose_medium(); 1683 } 1684 1685 void work(uint worker_id) { 1686 assert(worker_id < _n_workers, "invariant"); 1687 1688 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1689 _actual_region_bm, _actual_card_bm, 1690 _expected_region_bm, 1691 _expected_card_bm, 1692 _verbose); 1693 1694 if (G1CollectedHeap::use_parallel_gc_threads()) { 1695 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1696 worker_id, 1697 _n_workers, 1698 HeapRegion::VerifyCountClaimValue); 1699 } else { 1700 _g1h->heap_region_iterate(&verify_cl); 1701 } 1702 1703 Atomic::add(verify_cl.failures(), &_failures); 1704 } 1705 1706 int failures() const { return _failures; } 1707 }; 1708 1709 // Closure that finalizes the liveness counting data. 1710 // Used during the cleanup pause. 1711 // Sets the bits corresponding to the interval [NTAMS, top] 1712 // (which contains the implicitly live objects) in the 1713 // card liveness bitmap. Also sets the bit for each region, 1714 // containing live data, in the region liveness bitmap. 1715 1716 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1717 public: 1718 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1719 BitMap* region_bm, 1720 BitMap* card_bm) : 1721 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1722 1723 bool doHeapRegion(HeapRegion* hr) { 1724 1725 if (hr->continuesHumongous()) { 1726 // We will ignore these here and process them when their 1727 // associated "starts humongous" region is processed (see 1728 // set_bit_for_heap_region()). Note that we cannot rely on their 1729 // associated "starts humongous" region to have their bit set to 1730 // 1 since, due to the region chunking in the parallel region 1731 // iteration, a "continues humongous" region might be visited 1732 // before its associated "starts humongous". 1733 return false; 1734 } 1735 1736 HeapWord* ntams = hr->next_top_at_mark_start(); 1737 HeapWord* top = hr->top(); 1738 1739 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1740 1741 // Mark the allocated-since-marking portion... 1742 if (ntams < top) { 1743 // This definitely means the region has live objects. 1744 set_bit_for_region(hr); 1745 1746 // Now set the bits in the card bitmap for [ntams, top) 1747 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1748 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1749 1750 // Note: if we're looking at the last region in heap - top 1751 // could be actually just beyond the end of the heap; end_idx 1752 // will then correspond to a (non-existent) card that is also 1753 // just beyond the heap. 1754 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1755 // end of object is not card aligned - increment to cover 1756 // all the cards spanned by the object 1757 end_idx += 1; 1758 } 1759 1760 assert(end_idx <= _card_bm->size(), 1761 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1762 end_idx, _card_bm->size())); 1763 assert(start_idx < _card_bm->size(), 1764 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1765 start_idx, _card_bm->size())); 1766 1767 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1768 } 1769 1770 // Set the bit for the region if it contains live data 1771 if (hr->next_marked_bytes() > 0) { 1772 set_bit_for_region(hr); 1773 } 1774 1775 return false; 1776 } 1777 }; 1778 1779 class G1ParFinalCountTask: public AbstractGangTask { 1780 protected: 1781 G1CollectedHeap* _g1h; 1782 ConcurrentMark* _cm; 1783 BitMap* _actual_region_bm; 1784 BitMap* _actual_card_bm; 1785 1786 uint _n_workers; 1787 1788 public: 1789 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1790 : AbstractGangTask("G1 final counting"), 1791 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1792 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1793 _n_workers(0) { 1794 // Use the value already set as the number of active threads 1795 // in the call to run_task(). 1796 if (G1CollectedHeap::use_parallel_gc_threads()) { 1797 assert( _g1h->workers()->active_workers() > 0, 1798 "Should have been previously set"); 1799 _n_workers = _g1h->workers()->active_workers(); 1800 } else { 1801 _n_workers = 1; 1802 } 1803 } 1804 1805 void work(uint worker_id) { 1806 assert(worker_id < _n_workers, "invariant"); 1807 1808 FinalCountDataUpdateClosure final_update_cl(_g1h, 1809 _actual_region_bm, 1810 _actual_card_bm); 1811 1812 if (G1CollectedHeap::use_parallel_gc_threads()) { 1813 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1814 worker_id, 1815 _n_workers, 1816 HeapRegion::FinalCountClaimValue); 1817 } else { 1818 _g1h->heap_region_iterate(&final_update_cl); 1819 } 1820 } 1821 }; 1822 1823 class G1ParNoteEndTask; 1824 1825 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1826 G1CollectedHeap* _g1; 1827 size_t _max_live_bytes; 1828 uint _regions_claimed; 1829 size_t _freed_bytes; 1830 FreeRegionList* _local_cleanup_list; 1831 HeapRegionSetCount _old_regions_removed; 1832 HeapRegionSetCount _humongous_regions_removed; 1833 HRRSCleanupTask* _hrrs_cleanup_task; 1834 double _claimed_region_time; 1835 double _max_region_time; 1836 1837 public: 1838 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1839 FreeRegionList* local_cleanup_list, 1840 HRRSCleanupTask* hrrs_cleanup_task) : 1841 _g1(g1), 1842 _max_live_bytes(0), _regions_claimed(0), 1843 _freed_bytes(0), 1844 _claimed_region_time(0.0), _max_region_time(0.0), 1845 _local_cleanup_list(local_cleanup_list), 1846 _old_regions_removed(), 1847 _humongous_regions_removed(), 1848 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1849 1850 size_t freed_bytes() { return _freed_bytes; } 1851 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; } 1852 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } 1853 1854 bool doHeapRegion(HeapRegion *hr) { 1855 if (hr->continuesHumongous()) { 1856 return false; 1857 } 1858 // We use a claim value of zero here because all regions 1859 // were claimed with value 1 in the FinalCount task. 1860 _g1->reset_gc_time_stamps(hr); 1861 double start = os::elapsedTime(); 1862 _regions_claimed++; 1863 hr->note_end_of_marking(); 1864 _max_live_bytes += hr->max_live_bytes(); 1865 1866 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1867 _freed_bytes += hr->used(); 1868 hr->set_containing_set(NULL); 1869 if (hr->isHumongous()) { 1870 assert(hr->startsHumongous(), "we should only see starts humongous"); 1871 _humongous_regions_removed.increment(1u, hr->capacity()); 1872 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1873 } else { 1874 _old_regions_removed.increment(1u, hr->capacity()); 1875 _g1->free_region(hr, _local_cleanup_list, true); 1876 } 1877 } else { 1878 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1879 } 1880 1881 double region_time = (os::elapsedTime() - start); 1882 _claimed_region_time += region_time; 1883 if (region_time > _max_region_time) { 1884 _max_region_time = region_time; 1885 } 1886 return false; 1887 } 1888 1889 size_t max_live_bytes() { return _max_live_bytes; } 1890 uint regions_claimed() { return _regions_claimed; } 1891 double claimed_region_time_sec() { return _claimed_region_time; } 1892 double max_region_time_sec() { return _max_region_time; } 1893 }; 1894 1895 class G1ParNoteEndTask: public AbstractGangTask { 1896 friend class G1NoteEndOfConcMarkClosure; 1897 1898 protected: 1899 G1CollectedHeap* _g1h; 1900 size_t _max_live_bytes; 1901 size_t _freed_bytes; 1902 FreeRegionList* _cleanup_list; 1903 1904 public: 1905 G1ParNoteEndTask(G1CollectedHeap* g1h, 1906 FreeRegionList* cleanup_list) : 1907 AbstractGangTask("G1 note end"), _g1h(g1h), 1908 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1909 1910 void work(uint worker_id) { 1911 double start = os::elapsedTime(); 1912 FreeRegionList local_cleanup_list("Local Cleanup List"); 1913 HRRSCleanupTask hrrs_cleanup_task; 1914 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1915 &hrrs_cleanup_task); 1916 if (G1CollectedHeap::use_parallel_gc_threads()) { 1917 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1918 _g1h->workers()->active_workers(), 1919 HeapRegion::NoteEndClaimValue); 1920 } else { 1921 _g1h->heap_region_iterate(&g1_note_end); 1922 } 1923 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1924 1925 // Now update the lists 1926 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1927 { 1928 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1929 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1930 _max_live_bytes += g1_note_end.max_live_bytes(); 1931 _freed_bytes += g1_note_end.freed_bytes(); 1932 1933 // If we iterate over the global cleanup list at the end of 1934 // cleanup to do this printing we will not guarantee to only 1935 // generate output for the newly-reclaimed regions (the list 1936 // might not be empty at the beginning of cleanup; we might 1937 // still be working on its previous contents). So we do the 1938 // printing here, before we append the new regions to the global 1939 // cleanup list. 1940 1941 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1942 if (hr_printer->is_active()) { 1943 FreeRegionListIterator iter(&local_cleanup_list); 1944 while (iter.more_available()) { 1945 HeapRegion* hr = iter.get_next(); 1946 hr_printer->cleanup(hr); 1947 } 1948 } 1949 1950 _cleanup_list->add_ordered(&local_cleanup_list); 1951 assert(local_cleanup_list.is_empty(), "post-condition"); 1952 1953 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1954 } 1955 } 1956 size_t max_live_bytes() { return _max_live_bytes; } 1957 size_t freed_bytes() { return _freed_bytes; } 1958 }; 1959 1960 class G1ParScrubRemSetTask: public AbstractGangTask { 1961 protected: 1962 G1RemSet* _g1rs; 1963 BitMap* _region_bm; 1964 BitMap* _card_bm; 1965 public: 1966 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1967 BitMap* region_bm, BitMap* card_bm) : 1968 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1969 _region_bm(region_bm), _card_bm(card_bm) { } 1970 1971 void work(uint worker_id) { 1972 if (G1CollectedHeap::use_parallel_gc_threads()) { 1973 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1974 HeapRegion::ScrubRemSetClaimValue); 1975 } else { 1976 _g1rs->scrub(_region_bm, _card_bm); 1977 } 1978 } 1979 1980 }; 1981 1982 void ConcurrentMark::cleanup() { 1983 // world is stopped at this checkpoint 1984 assert(SafepointSynchronize::is_at_safepoint(), 1985 "world should be stopped"); 1986 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1987 1988 // If a full collection has happened, we shouldn't do this. 1989 if (has_aborted()) { 1990 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1991 return; 1992 } 1993 1994 g1h->verify_region_sets_optional(); 1995 1996 if (VerifyDuringGC) { 1997 HandleMark hm; // handle scope 1998 Universe::heap()->prepare_for_verify(); 1999 Universe::verify(VerifyOption_G1UsePrevMarking, 2000 " VerifyDuringGC:(before)"); 2001 } 2002 g1h->check_bitmaps("Cleanup Start"); 2003 2004 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 2005 g1p->record_concurrent_mark_cleanup_start(); 2006 2007 double start = os::elapsedTime(); 2008 2009 HeapRegionRemSet::reset_for_cleanup_tasks(); 2010 2011 uint n_workers; 2012 2013 // Do counting once more with the world stopped for good measure. 2014 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 2015 2016 if (G1CollectedHeap::use_parallel_gc_threads()) { 2017 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 2018 "sanity check"); 2019 2020 g1h->set_par_threads(); 2021 n_workers = g1h->n_par_threads(); 2022 assert(g1h->n_par_threads() == n_workers, 2023 "Should not have been reset"); 2024 g1h->workers()->run_task(&g1_par_count_task); 2025 // Done with the parallel phase so reset to 0. 2026 g1h->set_par_threads(0); 2027 2028 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 2029 "sanity check"); 2030 } else { 2031 n_workers = 1; 2032 g1_par_count_task.work(0); 2033 } 2034 2035 if (VerifyDuringGC) { 2036 // Verify that the counting data accumulated during marking matches 2037 // that calculated by walking the marking bitmap. 2038 2039 // Bitmaps to hold expected values 2040 BitMap expected_region_bm(_region_bm.size(), true); 2041 BitMap expected_card_bm(_card_bm.size(), true); 2042 2043 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 2044 &_region_bm, 2045 &_card_bm, 2046 &expected_region_bm, 2047 &expected_card_bm); 2048 2049 if (G1CollectedHeap::use_parallel_gc_threads()) { 2050 g1h->set_par_threads((int)n_workers); 2051 g1h->workers()->run_task(&g1_par_verify_task); 2052 // Done with the parallel phase so reset to 0. 2053 g1h->set_par_threads(0); 2054 2055 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 2056 "sanity check"); 2057 } else { 2058 g1_par_verify_task.work(0); 2059 } 2060 2061 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 2062 } 2063 2064 size_t start_used_bytes = g1h->used(); 2065 g1h->set_marking_complete(); 2066 2067 double count_end = os::elapsedTime(); 2068 double this_final_counting_time = (count_end - start); 2069 _total_counting_time += this_final_counting_time; 2070 2071 if (G1PrintRegionLivenessInfo) { 2072 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 2073 _g1h->heap_region_iterate(&cl); 2074 } 2075 2076 // Install newly created mark bitMap as "prev". 2077 swapMarkBitMaps(); 2078 2079 g1h->reset_gc_time_stamp(); 2080 2081 // Note end of marking in all heap regions. 2082 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 2083 if (G1CollectedHeap::use_parallel_gc_threads()) { 2084 g1h->set_par_threads((int)n_workers); 2085 g1h->workers()->run_task(&g1_par_note_end_task); 2086 g1h->set_par_threads(0); 2087 2088 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 2089 "sanity check"); 2090 } else { 2091 g1_par_note_end_task.work(0); 2092 } 2093 g1h->check_gc_time_stamps(); 2094 2095 if (!cleanup_list_is_empty()) { 2096 // The cleanup list is not empty, so we'll have to process it 2097 // concurrently. Notify anyone else that might be wanting free 2098 // regions that there will be more free regions coming soon. 2099 g1h->set_free_regions_coming(); 2100 } 2101 2102 // call below, since it affects the metric by which we sort the heap 2103 // regions. 2104 if (G1ScrubRemSets) { 2105 double rs_scrub_start = os::elapsedTime(); 2106 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 2107 if (G1CollectedHeap::use_parallel_gc_threads()) { 2108 g1h->set_par_threads((int)n_workers); 2109 g1h->workers()->run_task(&g1_par_scrub_rs_task); 2110 g1h->set_par_threads(0); 2111 2112 assert(g1h->check_heap_region_claim_values( 2113 HeapRegion::ScrubRemSetClaimValue), 2114 "sanity check"); 2115 } else { 2116 g1_par_scrub_rs_task.work(0); 2117 } 2118 2119 double rs_scrub_end = os::elapsedTime(); 2120 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 2121 _total_rs_scrub_time += this_rs_scrub_time; 2122 } 2123 2124 // this will also free any regions totally full of garbage objects, 2125 // and sort the regions. 2126 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 2127 2128 // Statistics. 2129 double end = os::elapsedTime(); 2130 _cleanup_times.add((end - start) * 1000.0); 2131 2132 if (G1Log::fine()) { 2133 g1h->print_size_transition(gclog_or_tty, 2134 start_used_bytes, 2135 g1h->used(), 2136 g1h->capacity()); 2137 } 2138 2139 // Clean up will have freed any regions completely full of garbage. 2140 // Update the soft reference policy with the new heap occupancy. 2141 Universe::update_heap_info_at_gc(); 2142 2143 if (VerifyDuringGC) { 2144 HandleMark hm; // handle scope 2145 Universe::heap()->prepare_for_verify(); 2146 Universe::verify(VerifyOption_G1UsePrevMarking, 2147 " VerifyDuringGC:(after)"); 2148 } 2149 2150 g1h->check_bitmaps("Cleanup End"); 2151 2152 g1h->verify_region_sets_optional(); 2153 2154 // We need to make this be a "collection" so any collection pause that 2155 // races with it goes around and waits for completeCleanup to finish. 2156 g1h->increment_total_collections(); 2157 2158 // Clean out dead classes and update Metaspace sizes. 2159 if (ClassUnloadingWithConcurrentMark) { 2160 ClassLoaderDataGraph::purge(); 2161 } 2162 MetaspaceGC::compute_new_size(); 2163 2164 // We reclaimed old regions so we should calculate the sizes to make 2165 // sure we update the old gen/space data. 2166 g1h->g1mm()->update_sizes(); 2167 2168 g1h->trace_heap_after_concurrent_cycle(); 2169 } 2170 2171 void ConcurrentMark::completeCleanup() { 2172 if (has_aborted()) return; 2173 2174 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2175 2176 _cleanup_list.verify_optional(); 2177 FreeRegionList tmp_free_list("Tmp Free List"); 2178 2179 if (G1ConcRegionFreeingVerbose) { 2180 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2181 "cleanup list has %u entries", 2182 _cleanup_list.length()); 2183 } 2184 2185 // No one else should be accessing the _cleanup_list at this point, 2186 // so it is not necessary to take any locks 2187 while (!_cleanup_list.is_empty()) { 2188 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 2189 assert(hr != NULL, "Got NULL from a non-empty list"); 2190 hr->par_clear(); 2191 tmp_free_list.add_ordered(hr); 2192 2193 // Instead of adding one region at a time to the secondary_free_list, 2194 // we accumulate them in the local list and move them a few at a 2195 // time. This also cuts down on the number of notify_all() calls 2196 // we do during this process. We'll also append the local list when 2197 // _cleanup_list is empty (which means we just removed the last 2198 // region from the _cleanup_list). 2199 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2200 _cleanup_list.is_empty()) { 2201 if (G1ConcRegionFreeingVerbose) { 2202 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2203 "appending %u entries to the secondary_free_list, " 2204 "cleanup list still has %u entries", 2205 tmp_free_list.length(), 2206 _cleanup_list.length()); 2207 } 2208 2209 { 2210 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2211 g1h->secondary_free_list_add(&tmp_free_list); 2212 SecondaryFreeList_lock->notify_all(); 2213 } 2214 2215 if (G1StressConcRegionFreeing) { 2216 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2217 os::sleep(Thread::current(), (jlong) 1, false); 2218 } 2219 } 2220 } 2221 } 2222 assert(tmp_free_list.is_empty(), "post-condition"); 2223 } 2224 2225 // Supporting Object and Oop closures for reference discovery 2226 // and processing in during marking 2227 2228 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2229 HeapWord* addr = (HeapWord*)obj; 2230 return addr != NULL && 2231 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2232 } 2233 2234 // 'Keep Alive' oop closure used by both serial parallel reference processing. 2235 // Uses the CMTask associated with a worker thread (for serial reference 2236 // processing the CMTask for worker 0 is used) to preserve (mark) and 2237 // trace referent objects. 2238 // 2239 // Using the CMTask and embedded local queues avoids having the worker 2240 // threads operating on the global mark stack. This reduces the risk 2241 // of overflowing the stack - which we would rather avoid at this late 2242 // state. Also using the tasks' local queues removes the potential 2243 // of the workers interfering with each other that could occur if 2244 // operating on the global stack. 2245 2246 class G1CMKeepAliveAndDrainClosure: public OopClosure { 2247 ConcurrentMark* _cm; 2248 CMTask* _task; 2249 int _ref_counter_limit; 2250 int _ref_counter; 2251 bool _is_serial; 2252 public: 2253 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2254 _cm(cm), _task(task), _is_serial(is_serial), 2255 _ref_counter_limit(G1RefProcDrainInterval) { 2256 assert(_ref_counter_limit > 0, "sanity"); 2257 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2258 _ref_counter = _ref_counter_limit; 2259 } 2260 2261 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2262 virtual void do_oop( oop* p) { do_oop_work(p); } 2263 2264 template <class T> void do_oop_work(T* p) { 2265 if (!_cm->has_overflown()) { 2266 oop obj = oopDesc::load_decode_heap_oop(p); 2267 if (_cm->verbose_high()) { 2268 gclog_or_tty->print_cr("\t[%u] we're looking at location " 2269 "*"PTR_FORMAT" = "PTR_FORMAT, 2270 _task->worker_id(), p2i(p), p2i((void*) obj)); 2271 } 2272 2273 _task->deal_with_reference(obj); 2274 _ref_counter--; 2275 2276 if (_ref_counter == 0) { 2277 // We have dealt with _ref_counter_limit references, pushing them 2278 // and objects reachable from them on to the local stack (and 2279 // possibly the global stack). Call CMTask::do_marking_step() to 2280 // process these entries. 2281 // 2282 // We call CMTask::do_marking_step() in a loop, which we'll exit if 2283 // there's nothing more to do (i.e. we're done with the entries that 2284 // were pushed as a result of the CMTask::deal_with_reference() calls 2285 // above) or we overflow. 2286 // 2287 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2288 // flag while there may still be some work to do. (See the comment at 2289 // the beginning of CMTask::do_marking_step() for those conditions - 2290 // one of which is reaching the specified time target.) It is only 2291 // when CMTask::do_marking_step() returns without setting the 2292 // has_aborted() flag that the marking step has completed. 2293 do { 2294 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2295 _task->do_marking_step(mark_step_duration_ms, 2296 false /* do_termination */, 2297 _is_serial); 2298 } while (_task->has_aborted() && !_cm->has_overflown()); 2299 _ref_counter = _ref_counter_limit; 2300 } 2301 } else { 2302 if (_cm->verbose_high()) { 2303 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id()); 2304 } 2305 } 2306 } 2307 }; 2308 2309 // 'Drain' oop closure used by both serial and parallel reference processing. 2310 // Uses the CMTask associated with a given worker thread (for serial 2311 // reference processing the CMtask for worker 0 is used). Calls the 2312 // do_marking_step routine, with an unbelievably large timeout value, 2313 // to drain the marking data structures of the remaining entries 2314 // added by the 'keep alive' oop closure above. 2315 2316 class G1CMDrainMarkingStackClosure: public VoidClosure { 2317 ConcurrentMark* _cm; 2318 CMTask* _task; 2319 bool _is_serial; 2320 public: 2321 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2322 _cm(cm), _task(task), _is_serial(is_serial) { 2323 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2324 } 2325 2326 void do_void() { 2327 do { 2328 if (_cm->verbose_high()) { 2329 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s", 2330 _task->worker_id(), BOOL_TO_STR(_is_serial)); 2331 } 2332 2333 // We call CMTask::do_marking_step() to completely drain the local 2334 // and global marking stacks of entries pushed by the 'keep alive' 2335 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2336 // 2337 // CMTask::do_marking_step() is called in a loop, which we'll exit 2338 // if there's nothing more to do (i.e. we've completely drained the 2339 // entries that were pushed as a a result of applying the 'keep alive' 2340 // closure to the entries on the discovered ref lists) or we overflow 2341 // the global marking stack. 2342 // 2343 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2344 // flag while there may still be some work to do. (See the comment at 2345 // the beginning of CMTask::do_marking_step() for those conditions - 2346 // one of which is reaching the specified time target.) It is only 2347 // when CMTask::do_marking_step() returns without setting the 2348 // has_aborted() flag that the marking step has completed. 2349 2350 _task->do_marking_step(1000000000.0 /* something very large */, 2351 true /* do_termination */, 2352 _is_serial); 2353 } while (_task->has_aborted() && !_cm->has_overflown()); 2354 } 2355 }; 2356 2357 // Implementation of AbstractRefProcTaskExecutor for parallel 2358 // reference processing at the end of G1 concurrent marking 2359 2360 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2361 private: 2362 G1CollectedHeap* _g1h; 2363 ConcurrentMark* _cm; 2364 WorkGang* _workers; 2365 int _active_workers; 2366 2367 public: 2368 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2369 ConcurrentMark* cm, 2370 WorkGang* workers, 2371 int n_workers) : 2372 _g1h(g1h), _cm(cm), 2373 _workers(workers), _active_workers(n_workers) { } 2374 2375 // Executes the given task using concurrent marking worker threads. 2376 virtual void execute(ProcessTask& task); 2377 virtual void execute(EnqueueTask& task); 2378 }; 2379 2380 class G1CMRefProcTaskProxy: public AbstractGangTask { 2381 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2382 ProcessTask& _proc_task; 2383 G1CollectedHeap* _g1h; 2384 ConcurrentMark* _cm; 2385 2386 public: 2387 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2388 G1CollectedHeap* g1h, 2389 ConcurrentMark* cm) : 2390 AbstractGangTask("Process reference objects in parallel"), 2391 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2392 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2393 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2394 } 2395 2396 virtual void work(uint worker_id) { 2397 ResourceMark rm; 2398 HandleMark hm; 2399 CMTask* task = _cm->task(worker_id); 2400 G1CMIsAliveClosure g1_is_alive(_g1h); 2401 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2402 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2403 2404 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2405 } 2406 }; 2407 2408 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2409 assert(_workers != NULL, "Need parallel worker threads."); 2410 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2411 2412 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2413 2414 // We need to reset the concurrency level before each 2415 // proxy task execution, so that the termination protocol 2416 // and overflow handling in CMTask::do_marking_step() knows 2417 // how many workers to wait for. 2418 _cm->set_concurrency(_active_workers); 2419 _g1h->set_par_threads(_active_workers); 2420 _workers->run_task(&proc_task_proxy); 2421 _g1h->set_par_threads(0); 2422 } 2423 2424 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2425 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2426 EnqueueTask& _enq_task; 2427 2428 public: 2429 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2430 AbstractGangTask("Enqueue reference objects in parallel"), 2431 _enq_task(enq_task) { } 2432 2433 virtual void work(uint worker_id) { 2434 _enq_task.work(worker_id); 2435 } 2436 }; 2437 2438 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2439 assert(_workers != NULL, "Need parallel worker threads."); 2440 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2441 2442 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2443 2444 // Not strictly necessary but... 2445 // 2446 // We need to reset the concurrency level before each 2447 // proxy task execution, so that the termination protocol 2448 // and overflow handling in CMTask::do_marking_step() knows 2449 // how many workers to wait for. 2450 _cm->set_concurrency(_active_workers); 2451 _g1h->set_par_threads(_active_workers); 2452 _workers->run_task(&enq_task_proxy); 2453 _g1h->set_par_threads(0); 2454 } 2455 2456 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 2457 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 2458 } 2459 2460 // Helper class to get rid of some boilerplate code. 2461 class G1RemarkGCTraceTime : public GCTraceTime { 2462 static bool doit_and_prepend(bool doit) { 2463 if (doit) { 2464 gclog_or_tty->put(' '); 2465 } 2466 return doit; 2467 } 2468 2469 public: 2470 G1RemarkGCTraceTime(const char* title, bool doit) 2471 : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(), 2472 G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) { 2473 } 2474 }; 2475 2476 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2477 if (has_overflown()) { 2478 // Skip processing the discovered references if we have 2479 // overflown the global marking stack. Reference objects 2480 // only get discovered once so it is OK to not 2481 // de-populate the discovered reference lists. We could have, 2482 // but the only benefit would be that, when marking restarts, 2483 // less reference objects are discovered. 2484 return; 2485 } 2486 2487 ResourceMark rm; 2488 HandleMark hm; 2489 2490 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2491 2492 // Is alive closure. 2493 G1CMIsAliveClosure g1_is_alive(g1h); 2494 2495 // Inner scope to exclude the cleaning of the string and symbol 2496 // tables from the displayed time. 2497 { 2498 if (G1Log::finer()) { 2499 gclog_or_tty->put(' '); 2500 } 2501 GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm(), concurrent_gc_id()); 2502 2503 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2504 2505 // See the comment in G1CollectedHeap::ref_processing_init() 2506 // about how reference processing currently works in G1. 2507 2508 // Set the soft reference policy 2509 rp->setup_policy(clear_all_soft_refs); 2510 assert(_markStack.isEmpty(), "mark stack should be empty"); 2511 2512 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2513 // in serial reference processing. Note these closures are also 2514 // used for serially processing (by the the current thread) the 2515 // JNI references during parallel reference processing. 2516 // 2517 // These closures do not need to synchronize with the worker 2518 // threads involved in parallel reference processing as these 2519 // instances are executed serially by the current thread (e.g. 2520 // reference processing is not multi-threaded and is thus 2521 // performed by the current thread instead of a gang worker). 2522 // 2523 // The gang tasks involved in parallel reference processing create 2524 // their own instances of these closures, which do their own 2525 // synchronization among themselves. 2526 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2527 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2528 2529 // We need at least one active thread. If reference processing 2530 // is not multi-threaded we use the current (VMThread) thread, 2531 // otherwise we use the work gang from the G1CollectedHeap and 2532 // we utilize all the worker threads we can. 2533 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL; 2534 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2535 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2536 2537 // Parallel processing task executor. 2538 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2539 g1h->workers(), active_workers); 2540 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2541 2542 // Set the concurrency level. The phase was already set prior to 2543 // executing the remark task. 2544 set_concurrency(active_workers); 2545 2546 // Set the degree of MT processing here. If the discovery was done MT, 2547 // the number of threads involved during discovery could differ from 2548 // the number of active workers. This is OK as long as the discovered 2549 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2550 rp->set_active_mt_degree(active_workers); 2551 2552 // Process the weak references. 2553 const ReferenceProcessorStats& stats = 2554 rp->process_discovered_references(&g1_is_alive, 2555 &g1_keep_alive, 2556 &g1_drain_mark_stack, 2557 executor, 2558 g1h->gc_timer_cm(), 2559 concurrent_gc_id()); 2560 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2561 2562 // The do_oop work routines of the keep_alive and drain_marking_stack 2563 // oop closures will set the has_overflown flag if we overflow the 2564 // global marking stack. 2565 2566 assert(_markStack.overflow() || _markStack.isEmpty(), 2567 "mark stack should be empty (unless it overflowed)"); 2568 2569 if (_markStack.overflow()) { 2570 // This should have been done already when we tried to push an 2571 // entry on to the global mark stack. But let's do it again. 2572 set_has_overflown(); 2573 } 2574 2575 assert(rp->num_q() == active_workers, "why not"); 2576 2577 rp->enqueue_discovered_references(executor); 2578 2579 rp->verify_no_references_recorded(); 2580 assert(!rp->discovery_enabled(), "Post condition"); 2581 } 2582 2583 if (has_overflown()) { 2584 // We can not trust g1_is_alive if the marking stack overflowed 2585 return; 2586 } 2587 2588 assert(_markStack.isEmpty(), "Marking should have completed"); 2589 2590 // Unload Klasses, String, Symbols, Code Cache, etc. 2591 { 2592 G1RemarkGCTraceTime trace("Unloading", G1Log::finer()); 2593 2594 if (ClassUnloadingWithConcurrentMark) { 2595 bool purged_classes; 2596 2597 { 2598 G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest()); 2599 purged_classes = SystemDictionary::do_unloading(&g1_is_alive); 2600 } 2601 2602 { 2603 G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest()); 2604 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2605 } 2606 } 2607 2608 if (G1StringDedup::is_enabled()) { 2609 G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest()); 2610 G1StringDedup::unlink(&g1_is_alive); 2611 } 2612 } 2613 } 2614 2615 void ConcurrentMark::swapMarkBitMaps() { 2616 CMBitMapRO* temp = _prevMarkBitMap; 2617 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2618 _nextMarkBitMap = (CMBitMap*) temp; 2619 } 2620 2621 class CMObjectClosure; 2622 2623 // Closure for iterating over objects, currently only used for 2624 // processing SATB buffers. 2625 class CMObjectClosure : public ObjectClosure { 2626 private: 2627 CMTask* _task; 2628 2629 public: 2630 void do_object(oop obj) { 2631 _task->deal_with_reference(obj); 2632 } 2633 2634 CMObjectClosure(CMTask* task) : _task(task) { } 2635 }; 2636 2637 class G1RemarkThreadsClosure : public ThreadClosure { 2638 CMObjectClosure _cm_obj; 2639 G1CMOopClosure _cm_cl; 2640 MarkingCodeBlobClosure _code_cl; 2641 int _thread_parity; 2642 bool _is_par; 2643 2644 public: 2645 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) : 2646 _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2647 _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {} 2648 2649 void do_thread(Thread* thread) { 2650 if (thread->is_Java_thread()) { 2651 if (thread->claim_oops_do(_is_par, _thread_parity)) { 2652 JavaThread* jt = (JavaThread*)thread; 2653 2654 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2655 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2656 // * Alive if on the stack of an executing method 2657 // * Weakly reachable otherwise 2658 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2659 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2660 jt->nmethods_do(&_code_cl); 2661 2662 jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj); 2663 } 2664 } else if (thread->is_VM_thread()) { 2665 if (thread->claim_oops_do(_is_par, _thread_parity)) { 2666 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj); 2667 } 2668 } 2669 } 2670 }; 2671 2672 class CMRemarkTask: public AbstractGangTask { 2673 private: 2674 ConcurrentMark* _cm; 2675 bool _is_serial; 2676 public: 2677 void work(uint worker_id) { 2678 // Since all available tasks are actually started, we should 2679 // only proceed if we're supposed to be active. 2680 if (worker_id < _cm->active_tasks()) { 2681 CMTask* task = _cm->task(worker_id); 2682 task->record_start_time(); 2683 { 2684 ResourceMark rm; 2685 HandleMark hm; 2686 2687 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task, !_is_serial); 2688 Threads::threads_do(&threads_f); 2689 } 2690 2691 do { 2692 task->do_marking_step(1000000000.0 /* something very large */, 2693 true /* do_termination */, 2694 _is_serial); 2695 } while (task->has_aborted() && !_cm->has_overflown()); 2696 // If we overflow, then we do not want to restart. We instead 2697 // want to abort remark and do concurrent marking again. 2698 task->record_end_time(); 2699 } 2700 } 2701 2702 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) : 2703 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) { 2704 _cm->terminator()->reset_for_reuse(active_workers); 2705 } 2706 }; 2707 2708 void ConcurrentMark::checkpointRootsFinalWork() { 2709 ResourceMark rm; 2710 HandleMark hm; 2711 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2712 2713 G1RemarkGCTraceTime trace("Finalize Marking", G1Log::finer()); 2714 2715 g1h->ensure_parsability(false); 2716 2717 if (G1CollectedHeap::use_parallel_gc_threads()) { 2718 G1CollectedHeap::StrongRootsScope srs(g1h); 2719 // this is remark, so we'll use up all active threads 2720 uint active_workers = g1h->workers()->active_workers(); 2721 if (active_workers == 0) { 2722 assert(active_workers > 0, "Should have been set earlier"); 2723 active_workers = (uint) ParallelGCThreads; 2724 g1h->workers()->set_active_workers(active_workers); 2725 } 2726 set_concurrency_and_phase(active_workers, false /* concurrent */); 2727 // Leave _parallel_marking_threads at it's 2728 // value originally calculated in the ConcurrentMark 2729 // constructor and pass values of the active workers 2730 // through the gang in the task. 2731 2732 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */); 2733 // We will start all available threads, even if we decide that the 2734 // active_workers will be fewer. The extra ones will just bail out 2735 // immediately. 2736 g1h->set_par_threads(active_workers); 2737 g1h->workers()->run_task(&remarkTask); 2738 g1h->set_par_threads(0); 2739 } else { 2740 G1CollectedHeap::StrongRootsScope srs(g1h); 2741 uint active_workers = 1; 2742 set_concurrency_and_phase(active_workers, false /* concurrent */); 2743 2744 // Note - if there's no work gang then the VMThread will be 2745 // the thread to execute the remark - serially. We have 2746 // to pass true for the is_serial parameter so that 2747 // CMTask::do_marking_step() doesn't enter the sync 2748 // barriers in the event of an overflow. Doing so will 2749 // cause an assert that the current thread is not a 2750 // concurrent GC thread. 2751 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/); 2752 remarkTask.work(0); 2753 } 2754 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2755 guarantee(has_overflown() || 2756 satb_mq_set.completed_buffers_num() == 0, 2757 err_msg("Invariant: has_overflown = %s, num buffers = %d", 2758 BOOL_TO_STR(has_overflown()), 2759 satb_mq_set.completed_buffers_num())); 2760 2761 print_stats(); 2762 } 2763 2764 #ifndef PRODUCT 2765 2766 class PrintReachableOopClosure: public OopClosure { 2767 private: 2768 G1CollectedHeap* _g1h; 2769 outputStream* _out; 2770 VerifyOption _vo; 2771 bool _all; 2772 2773 public: 2774 PrintReachableOopClosure(outputStream* out, 2775 VerifyOption vo, 2776 bool all) : 2777 _g1h(G1CollectedHeap::heap()), 2778 _out(out), _vo(vo), _all(all) { } 2779 2780 void do_oop(narrowOop* p) { do_oop_work(p); } 2781 void do_oop( oop* p) { do_oop_work(p); } 2782 2783 template <class T> void do_oop_work(T* p) { 2784 oop obj = oopDesc::load_decode_heap_oop(p); 2785 const char* str = NULL; 2786 const char* str2 = ""; 2787 2788 if (obj == NULL) { 2789 str = ""; 2790 } else if (!_g1h->is_in_g1_reserved(obj)) { 2791 str = " O"; 2792 } else { 2793 HeapRegion* hr = _g1h->heap_region_containing(obj); 2794 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2795 bool marked = _g1h->is_marked(obj, _vo); 2796 2797 if (over_tams) { 2798 str = " >"; 2799 if (marked) { 2800 str2 = " AND MARKED"; 2801 } 2802 } else if (marked) { 2803 str = " M"; 2804 } else { 2805 str = " NOT"; 2806 } 2807 } 2808 2809 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2810 p2i(p), p2i((void*) obj), str, str2); 2811 } 2812 }; 2813 2814 class PrintReachableObjectClosure : public ObjectClosure { 2815 private: 2816 G1CollectedHeap* _g1h; 2817 outputStream* _out; 2818 VerifyOption _vo; 2819 bool _all; 2820 HeapRegion* _hr; 2821 2822 public: 2823 PrintReachableObjectClosure(outputStream* out, 2824 VerifyOption vo, 2825 bool all, 2826 HeapRegion* hr) : 2827 _g1h(G1CollectedHeap::heap()), 2828 _out(out), _vo(vo), _all(all), _hr(hr) { } 2829 2830 void do_object(oop o) { 2831 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2832 bool marked = _g1h->is_marked(o, _vo); 2833 bool print_it = _all || over_tams || marked; 2834 2835 if (print_it) { 2836 _out->print_cr(" "PTR_FORMAT"%s", 2837 p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : ""); 2838 PrintReachableOopClosure oopCl(_out, _vo, _all); 2839 o->oop_iterate_no_header(&oopCl); 2840 } 2841 } 2842 }; 2843 2844 class PrintReachableRegionClosure : public HeapRegionClosure { 2845 private: 2846 G1CollectedHeap* _g1h; 2847 outputStream* _out; 2848 VerifyOption _vo; 2849 bool _all; 2850 2851 public: 2852 bool doHeapRegion(HeapRegion* hr) { 2853 HeapWord* b = hr->bottom(); 2854 HeapWord* e = hr->end(); 2855 HeapWord* t = hr->top(); 2856 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2857 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2858 "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p)); 2859 _out->cr(); 2860 2861 HeapWord* from = b; 2862 HeapWord* to = t; 2863 2864 if (to > from) { 2865 _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to)); 2866 _out->cr(); 2867 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2868 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2869 _out->cr(); 2870 } 2871 2872 return false; 2873 } 2874 2875 PrintReachableRegionClosure(outputStream* out, 2876 VerifyOption vo, 2877 bool all) : 2878 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2879 }; 2880 2881 void ConcurrentMark::print_reachable(const char* str, 2882 VerifyOption vo, 2883 bool all) { 2884 gclog_or_tty->cr(); 2885 gclog_or_tty->print_cr("== Doing heap dump... "); 2886 2887 if (G1PrintReachableBaseFile == NULL) { 2888 gclog_or_tty->print_cr(" #### error: no base file defined"); 2889 return; 2890 } 2891 2892 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2893 (JVM_MAXPATHLEN - 1)) { 2894 gclog_or_tty->print_cr(" #### error: file name too long"); 2895 return; 2896 } 2897 2898 char file_name[JVM_MAXPATHLEN]; 2899 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2900 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2901 2902 fileStream fout(file_name); 2903 if (!fout.is_open()) { 2904 gclog_or_tty->print_cr(" #### error: could not open file"); 2905 return; 2906 } 2907 2908 outputStream* out = &fout; 2909 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2910 out->cr(); 2911 2912 out->print_cr("--- ITERATING OVER REGIONS"); 2913 out->cr(); 2914 PrintReachableRegionClosure rcl(out, vo, all); 2915 _g1h->heap_region_iterate(&rcl); 2916 out->cr(); 2917 2918 gclog_or_tty->print_cr(" done"); 2919 gclog_or_tty->flush(); 2920 } 2921 2922 #endif // PRODUCT 2923 2924 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2925 // Note we are overriding the read-only view of the prev map here, via 2926 // the cast. 2927 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2928 } 2929 2930 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2931 _nextMarkBitMap->clearRange(mr); 2932 } 2933 2934 HeapRegion* 2935 ConcurrentMark::claim_region(uint worker_id) { 2936 // "checkpoint" the finger 2937 HeapWord* finger = _finger; 2938 2939 // _heap_end will not change underneath our feet; it only changes at 2940 // yield points. 2941 while (finger < _heap_end) { 2942 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2943 2944 // Note on how this code handles humongous regions. In the 2945 // normal case the finger will reach the start of a "starts 2946 // humongous" (SH) region. Its end will either be the end of the 2947 // last "continues humongous" (CH) region in the sequence, or the 2948 // standard end of the SH region (if the SH is the only region in 2949 // the sequence). That way claim_region() will skip over the CH 2950 // regions. However, there is a subtle race between a CM thread 2951 // executing this method and a mutator thread doing a humongous 2952 // object allocation. The two are not mutually exclusive as the CM 2953 // thread does not need to hold the Heap_lock when it gets 2954 // here. So there is a chance that claim_region() will come across 2955 // a free region that's in the progress of becoming a SH or a CH 2956 // region. In the former case, it will either 2957 // a) Miss the update to the region's end, in which case it will 2958 // visit every subsequent CH region, will find their bitmaps 2959 // empty, and do nothing, or 2960 // b) Will observe the update of the region's end (in which case 2961 // it will skip the subsequent CH regions). 2962 // If it comes across a region that suddenly becomes CH, the 2963 // scenario will be similar to b). So, the race between 2964 // claim_region() and a humongous object allocation might force us 2965 // to do a bit of unnecessary work (due to some unnecessary bitmap 2966 // iterations) but it should not introduce and correctness issues. 2967 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2968 2969 // Above heap_region_containing_raw may return NULL as we always scan claim 2970 // until the end of the heap. In this case, just jump to the next region. 2971 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 2972 2973 // Is the gap between reading the finger and doing the CAS too long? 2974 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2975 if (res == finger && curr_region != NULL) { 2976 // we succeeded 2977 HeapWord* bottom = curr_region->bottom(); 2978 HeapWord* limit = curr_region->next_top_at_mark_start(); 2979 2980 if (verbose_low()) { 2981 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" " 2982 "["PTR_FORMAT", "PTR_FORMAT"), " 2983 "limit = "PTR_FORMAT, 2984 worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit)); 2985 } 2986 2987 // notice that _finger == end cannot be guaranteed here since, 2988 // someone else might have moved the finger even further 2989 assert(_finger >= end, "the finger should have moved forward"); 2990 2991 if (verbose_low()) { 2992 gclog_or_tty->print_cr("[%u] we were successful with region = " 2993 PTR_FORMAT, worker_id, p2i(curr_region)); 2994 } 2995 2996 if (limit > bottom) { 2997 if (verbose_low()) { 2998 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, " 2999 "returning it ", worker_id, p2i(curr_region)); 3000 } 3001 return curr_region; 3002 } else { 3003 assert(limit == bottom, 3004 "the region limit should be at bottom"); 3005 if (verbose_low()) { 3006 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, " 3007 "returning NULL", worker_id, p2i(curr_region)); 3008 } 3009 // we return NULL and the caller should try calling 3010 // claim_region() again. 3011 return NULL; 3012 } 3013 } else { 3014 assert(_finger > finger, "the finger should have moved forward"); 3015 if (verbose_low()) { 3016 if (curr_region == NULL) { 3017 gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, " 3018 "global finger = "PTR_FORMAT", " 3019 "our finger = "PTR_FORMAT, 3020 worker_id, p2i(_finger), p2i(finger)); 3021 } else { 3022 gclog_or_tty->print_cr("[%u] somebody else moved the finger, " 3023 "global finger = "PTR_FORMAT", " 3024 "our finger = "PTR_FORMAT, 3025 worker_id, p2i(_finger), p2i(finger)); 3026 } 3027 } 3028 3029 // read it again 3030 finger = _finger; 3031 } 3032 } 3033 3034 return NULL; 3035 } 3036 3037 #ifndef PRODUCT 3038 enum VerifyNoCSetOopsPhase { 3039 VerifyNoCSetOopsStack, 3040 VerifyNoCSetOopsQueues, 3041 VerifyNoCSetOopsSATBCompleted, 3042 VerifyNoCSetOopsSATBThread 3043 }; 3044 3045 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 3046 private: 3047 G1CollectedHeap* _g1h; 3048 VerifyNoCSetOopsPhase _phase; 3049 int _info; 3050 3051 const char* phase_str() { 3052 switch (_phase) { 3053 case VerifyNoCSetOopsStack: return "Stack"; 3054 case VerifyNoCSetOopsQueues: return "Queue"; 3055 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 3056 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 3057 default: ShouldNotReachHere(); 3058 } 3059 return NULL; 3060 } 3061 3062 void do_object_work(oop obj) { 3063 guarantee(!_g1h->obj_in_cs(obj), 3064 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 3065 p2i((void*) obj), phase_str(), _info)); 3066 } 3067 3068 public: 3069 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 3070 3071 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 3072 _phase = phase; 3073 _info = info; 3074 } 3075 3076 virtual void do_oop(oop* p) { 3077 oop obj = oopDesc::load_decode_heap_oop(p); 3078 do_object_work(obj); 3079 } 3080 3081 virtual void do_oop(narrowOop* p) { 3082 // We should not come across narrow oops while scanning marking 3083 // stacks and SATB buffers. 3084 ShouldNotReachHere(); 3085 } 3086 3087 virtual void do_object(oop obj) { 3088 do_object_work(obj); 3089 } 3090 }; 3091 3092 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 3093 bool verify_enqueued_buffers, 3094 bool verify_thread_buffers, 3095 bool verify_fingers) { 3096 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 3097 if (!G1CollectedHeap::heap()->mark_in_progress()) { 3098 return; 3099 } 3100 3101 VerifyNoCSetOopsClosure cl; 3102 3103 if (verify_stacks) { 3104 // Verify entries on the global mark stack 3105 cl.set_phase(VerifyNoCSetOopsStack); 3106 _markStack.oops_do(&cl); 3107 3108 // Verify entries on the task queues 3109 for (uint i = 0; i < _max_worker_id; i += 1) { 3110 cl.set_phase(VerifyNoCSetOopsQueues, i); 3111 CMTaskQueue* queue = _task_queues->queue(i); 3112 queue->oops_do(&cl); 3113 } 3114 } 3115 3116 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 3117 3118 // Verify entries on the enqueued SATB buffers 3119 if (verify_enqueued_buffers) { 3120 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 3121 satb_qs.iterate_completed_buffers_read_only(&cl); 3122 } 3123 3124 // Verify entries on the per-thread SATB buffers 3125 if (verify_thread_buffers) { 3126 cl.set_phase(VerifyNoCSetOopsSATBThread); 3127 satb_qs.iterate_thread_buffers_read_only(&cl); 3128 } 3129 3130 if (verify_fingers) { 3131 // Verify the global finger 3132 HeapWord* global_finger = finger(); 3133 if (global_finger != NULL && global_finger < _heap_end) { 3134 // The global finger always points to a heap region boundary. We 3135 // use heap_region_containing_raw() to get the containing region 3136 // given that the global finger could be pointing to a free region 3137 // which subsequently becomes continues humongous. If that 3138 // happens, heap_region_containing() will return the bottom of the 3139 // corresponding starts humongous region and the check below will 3140 // not hold any more. 3141 // Since we always iterate over all regions, we might get a NULL HeapRegion 3142 // here. 3143 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 3144 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 3145 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 3146 p2i(global_finger), HR_FORMAT_PARAMS(global_hr))); 3147 } 3148 3149 // Verify the task fingers 3150 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 3151 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 3152 CMTask* task = _tasks[i]; 3153 HeapWord* task_finger = task->finger(); 3154 if (task_finger != NULL && task_finger < _heap_end) { 3155 // See above note on the global finger verification. 3156 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 3157 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 3158 !task_hr->in_collection_set(), 3159 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 3160 p2i(task_finger), HR_FORMAT_PARAMS(task_hr))); 3161 } 3162 } 3163 } 3164 } 3165 #endif // PRODUCT 3166 3167 // Aggregate the counting data that was constructed concurrently 3168 // with marking. 3169 class AggregateCountDataHRClosure: public HeapRegionClosure { 3170 G1CollectedHeap* _g1h; 3171 ConcurrentMark* _cm; 3172 CardTableModRefBS* _ct_bs; 3173 BitMap* _cm_card_bm; 3174 uint _max_worker_id; 3175 3176 public: 3177 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 3178 BitMap* cm_card_bm, 3179 uint max_worker_id) : 3180 _g1h(g1h), _cm(g1h->concurrent_mark()), 3181 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 3182 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 3183 3184 bool doHeapRegion(HeapRegion* hr) { 3185 if (hr->continuesHumongous()) { 3186 // We will ignore these here and process them when their 3187 // associated "starts humongous" region is processed. 3188 // Note that we cannot rely on their associated 3189 // "starts humongous" region to have their bit set to 1 3190 // since, due to the region chunking in the parallel region 3191 // iteration, a "continues humongous" region might be visited 3192 // before its associated "starts humongous". 3193 return false; 3194 } 3195 3196 HeapWord* start = hr->bottom(); 3197 HeapWord* limit = hr->next_top_at_mark_start(); 3198 HeapWord* end = hr->end(); 3199 3200 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 3201 err_msg("Preconditions not met - " 3202 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 3203 "top: "PTR_FORMAT", end: "PTR_FORMAT, 3204 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()))); 3205 3206 assert(hr->next_marked_bytes() == 0, "Precondition"); 3207 3208 if (start == limit) { 3209 // NTAMS of this region has not been set so nothing to do. 3210 return false; 3211 } 3212 3213 // 'start' should be in the heap. 3214 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 3215 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 3216 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 3217 3218 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 3219 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 3220 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 3221 3222 // If ntams is not card aligned then we bump card bitmap index 3223 // for limit so that we get the all the cards spanned by 3224 // the object ending at ntams. 3225 // Note: if this is the last region in the heap then ntams 3226 // could be actually just beyond the end of the the heap; 3227 // limit_idx will then correspond to a (non-existent) card 3228 // that is also outside the heap. 3229 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 3230 limit_idx += 1; 3231 } 3232 3233 assert(limit_idx <= end_idx, "or else use atomics"); 3234 3235 // Aggregate the "stripe" in the count data associated with hr. 3236 uint hrm_index = hr->hrm_index(); 3237 size_t marked_bytes = 0; 3238 3239 for (uint i = 0; i < _max_worker_id; i += 1) { 3240 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 3241 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 3242 3243 // Fetch the marked_bytes in this region for task i and 3244 // add it to the running total for this region. 3245 marked_bytes += marked_bytes_array[hrm_index]; 3246 3247 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 3248 // into the global card bitmap. 3249 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 3250 3251 while (scan_idx < limit_idx) { 3252 assert(task_card_bm->at(scan_idx) == true, "should be"); 3253 _cm_card_bm->set_bit(scan_idx); 3254 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 3255 3256 // BitMap::get_next_one_offset() can handle the case when 3257 // its left_offset parameter is greater than its right_offset 3258 // parameter. It does, however, have an early exit if 3259 // left_offset == right_offset. So let's limit the value 3260 // passed in for left offset here. 3261 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 3262 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 3263 } 3264 } 3265 3266 // Update the marked bytes for this region. 3267 hr->add_to_marked_bytes(marked_bytes); 3268 3269 // Next heap region 3270 return false; 3271 } 3272 }; 3273 3274 class G1AggregateCountDataTask: public AbstractGangTask { 3275 protected: 3276 G1CollectedHeap* _g1h; 3277 ConcurrentMark* _cm; 3278 BitMap* _cm_card_bm; 3279 uint _max_worker_id; 3280 int _active_workers; 3281 3282 public: 3283 G1AggregateCountDataTask(G1CollectedHeap* g1h, 3284 ConcurrentMark* cm, 3285 BitMap* cm_card_bm, 3286 uint max_worker_id, 3287 int n_workers) : 3288 AbstractGangTask("Count Aggregation"), 3289 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 3290 _max_worker_id(max_worker_id), 3291 _active_workers(n_workers) { } 3292 3293 void work(uint worker_id) { 3294 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 3295 3296 if (G1CollectedHeap::use_parallel_gc_threads()) { 3297 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 3298 _active_workers, 3299 HeapRegion::AggregateCountClaimValue); 3300 } else { 3301 _g1h->heap_region_iterate(&cl); 3302 } 3303 } 3304 }; 3305 3306 3307 void ConcurrentMark::aggregate_count_data() { 3308 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3309 _g1h->workers()->active_workers() : 3310 1); 3311 3312 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3313 _max_worker_id, n_workers); 3314 3315 if (G1CollectedHeap::use_parallel_gc_threads()) { 3316 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3317 "sanity check"); 3318 _g1h->set_par_threads(n_workers); 3319 _g1h->workers()->run_task(&g1_par_agg_task); 3320 _g1h->set_par_threads(0); 3321 3322 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3323 "sanity check"); 3324 _g1h->reset_heap_region_claim_values(); 3325 } else { 3326 g1_par_agg_task.work(0); 3327 } 3328 } 3329 3330 // Clear the per-worker arrays used to store the per-region counting data 3331 void ConcurrentMark::clear_all_count_data() { 3332 // Clear the global card bitmap - it will be filled during 3333 // liveness count aggregation (during remark) and the 3334 // final counting task. 3335 _card_bm.clear(); 3336 3337 // Clear the global region bitmap - it will be filled as part 3338 // of the final counting task. 3339 _region_bm.clear(); 3340 3341 uint max_regions = _g1h->max_regions(); 3342 assert(_max_worker_id > 0, "uninitialized"); 3343 3344 for (uint i = 0; i < _max_worker_id; i += 1) { 3345 BitMap* task_card_bm = count_card_bitmap_for(i); 3346 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3347 3348 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3349 assert(marked_bytes_array != NULL, "uninitialized"); 3350 3351 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3352 task_card_bm->clear(); 3353 } 3354 } 3355 3356 void ConcurrentMark::print_stats() { 3357 if (verbose_stats()) { 3358 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3359 for (size_t i = 0; i < _active_tasks; ++i) { 3360 _tasks[i]->print_stats(); 3361 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3362 } 3363 } 3364 } 3365 3366 // abandon current marking iteration due to a Full GC 3367 void ConcurrentMark::abort() { 3368 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 3369 // concurrent bitmap clearing. 3370 _nextMarkBitMap->clearAll(); 3371 3372 // Note we cannot clear the previous marking bitmap here 3373 // since VerifyDuringGC verifies the objects marked during 3374 // a full GC against the previous bitmap. 3375 3376 // Clear the liveness counting data 3377 clear_all_count_data(); 3378 // Empty mark stack 3379 reset_marking_state(); 3380 for (uint i = 0; i < _max_worker_id; ++i) { 3381 _tasks[i]->clear_region_fields(); 3382 } 3383 _first_overflow_barrier_sync.abort(); 3384 _second_overflow_barrier_sync.abort(); 3385 const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id(); 3386 if (!gc_id.is_undefined()) { 3387 // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance 3388 // to detect that it was aborted. Only keep track of the first GC id that we aborted. 3389 _aborted_gc_id = gc_id; 3390 } 3391 _has_aborted = true; 3392 3393 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3394 satb_mq_set.abandon_partial_marking(); 3395 // This can be called either during or outside marking, we'll read 3396 // the expected_active value from the SATB queue set. 3397 satb_mq_set.set_active_all_threads( 3398 false, /* new active value */ 3399 satb_mq_set.is_active() /* expected_active */); 3400 3401 _g1h->trace_heap_after_concurrent_cycle(); 3402 _g1h->register_concurrent_cycle_end(); 3403 } 3404 3405 const GCId& ConcurrentMark::concurrent_gc_id() { 3406 if (has_aborted()) { 3407 return _aborted_gc_id; 3408 } 3409 return _g1h->gc_tracer_cm()->gc_id(); 3410 } 3411 3412 static void print_ms_time_info(const char* prefix, const char* name, 3413 NumberSeq& ns) { 3414 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3415 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3416 if (ns.num() > 0) { 3417 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3418 prefix, ns.sd(), ns.maximum()); 3419 } 3420 } 3421 3422 void ConcurrentMark::print_summary_info() { 3423 gclog_or_tty->print_cr(" Concurrent marking:"); 3424 print_ms_time_info(" ", "init marks", _init_times); 3425 print_ms_time_info(" ", "remarks", _remark_times); 3426 { 3427 print_ms_time_info(" ", "final marks", _remark_mark_times); 3428 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3429 3430 } 3431 print_ms_time_info(" ", "cleanups", _cleanup_times); 3432 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3433 _total_counting_time, 3434 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3435 (double)_cleanup_times.num() 3436 : 0.0)); 3437 if (G1ScrubRemSets) { 3438 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3439 _total_rs_scrub_time, 3440 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3441 (double)_cleanup_times.num() 3442 : 0.0)); 3443 } 3444 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3445 (_init_times.sum() + _remark_times.sum() + 3446 _cleanup_times.sum())/1000.0); 3447 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3448 "(%8.2f s marking).", 3449 cmThread()->vtime_accum(), 3450 cmThread()->vtime_mark_accum()); 3451 } 3452 3453 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3454 if (use_parallel_marking_threads()) { 3455 _parallel_workers->print_worker_threads_on(st); 3456 } 3457 } 3458 3459 void ConcurrentMark::print_on_error(outputStream* st) const { 3460 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 3461 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 3462 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 3463 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 3464 } 3465 3466 // We take a break if someone is trying to stop the world. 3467 bool ConcurrentMark::do_yield_check(uint worker_id) { 3468 if (SuspendibleThreadSet::should_yield()) { 3469 if (worker_id == 0) { 3470 _g1h->g1_policy()->record_concurrent_pause(); 3471 } 3472 SuspendibleThreadSet::yield(); 3473 return true; 3474 } else { 3475 return false; 3476 } 3477 } 3478 3479 #ifndef PRODUCT 3480 // for debugging purposes 3481 void ConcurrentMark::print_finger() { 3482 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3483 p2i(_heap_start), p2i(_heap_end), p2i(_finger)); 3484 for (uint i = 0; i < _max_worker_id; ++i) { 3485 gclog_or_tty->print(" %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger())); 3486 } 3487 gclog_or_tty->cr(); 3488 } 3489 #endif 3490 3491 void CMTask::scan_object(oop obj) { 3492 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3493 3494 if (_cm->verbose_high()) { 3495 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT, 3496 _worker_id, p2i((void*) obj)); 3497 } 3498 3499 size_t obj_size = obj->size(); 3500 _words_scanned += obj_size; 3501 3502 obj->oop_iterate(_cm_oop_closure); 3503 statsOnly( ++_objs_scanned ); 3504 check_limits(); 3505 } 3506 3507 // Closure for iteration over bitmaps 3508 class CMBitMapClosure : public BitMapClosure { 3509 private: 3510 // the bitmap that is being iterated over 3511 CMBitMap* _nextMarkBitMap; 3512 ConcurrentMark* _cm; 3513 CMTask* _task; 3514 3515 public: 3516 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3517 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3518 3519 bool do_bit(size_t offset) { 3520 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3521 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3522 assert( addr < _cm->finger(), "invariant"); 3523 3524 statsOnly( _task->increase_objs_found_on_bitmap() ); 3525 assert(addr >= _task->finger(), "invariant"); 3526 3527 // We move that task's local finger along. 3528 _task->move_finger_to(addr); 3529 3530 _task->scan_object(oop(addr)); 3531 // we only partially drain the local queue and global stack 3532 _task->drain_local_queue(true); 3533 _task->drain_global_stack(true); 3534 3535 // if the has_aborted flag has been raised, we need to bail out of 3536 // the iteration 3537 return !_task->has_aborted(); 3538 } 3539 }; 3540 3541 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3542 ConcurrentMark* cm, 3543 CMTask* task) 3544 : _g1h(g1h), _cm(cm), _task(task) { 3545 assert(_ref_processor == NULL, "should be initialized to NULL"); 3546 3547 if (G1UseConcMarkReferenceProcessing) { 3548 _ref_processor = g1h->ref_processor_cm(); 3549 assert(_ref_processor != NULL, "should not be NULL"); 3550 } 3551 } 3552 3553 void CMTask::setup_for_region(HeapRegion* hr) { 3554 assert(hr != NULL, 3555 "claim_region() should have filtered out NULL regions"); 3556 assert(!hr->continuesHumongous(), 3557 "claim_region() should have filtered out continues humongous regions"); 3558 3559 if (_cm->verbose_low()) { 3560 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT, 3561 _worker_id, p2i(hr)); 3562 } 3563 3564 _curr_region = hr; 3565 _finger = hr->bottom(); 3566 update_region_limit(); 3567 } 3568 3569 void CMTask::update_region_limit() { 3570 HeapRegion* hr = _curr_region; 3571 HeapWord* bottom = hr->bottom(); 3572 HeapWord* limit = hr->next_top_at_mark_start(); 3573 3574 if (limit == bottom) { 3575 if (_cm->verbose_low()) { 3576 gclog_or_tty->print_cr("[%u] found an empty region " 3577 "["PTR_FORMAT", "PTR_FORMAT")", 3578 _worker_id, p2i(bottom), p2i(limit)); 3579 } 3580 // The region was collected underneath our feet. 3581 // We set the finger to bottom to ensure that the bitmap 3582 // iteration that will follow this will not do anything. 3583 // (this is not a condition that holds when we set the region up, 3584 // as the region is not supposed to be empty in the first place) 3585 _finger = bottom; 3586 } else if (limit >= _region_limit) { 3587 assert(limit >= _finger, "peace of mind"); 3588 } else { 3589 assert(limit < _region_limit, "only way to get here"); 3590 // This can happen under some pretty unusual circumstances. An 3591 // evacuation pause empties the region underneath our feet (NTAMS 3592 // at bottom). We then do some allocation in the region (NTAMS 3593 // stays at bottom), followed by the region being used as a GC 3594 // alloc region (NTAMS will move to top() and the objects 3595 // originally below it will be grayed). All objects now marked in 3596 // the region are explicitly grayed, if below the global finger, 3597 // and we do not need in fact to scan anything else. So, we simply 3598 // set _finger to be limit to ensure that the bitmap iteration 3599 // doesn't do anything. 3600 _finger = limit; 3601 } 3602 3603 _region_limit = limit; 3604 } 3605 3606 void CMTask::giveup_current_region() { 3607 assert(_curr_region != NULL, "invariant"); 3608 if (_cm->verbose_low()) { 3609 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT, 3610 _worker_id, p2i(_curr_region)); 3611 } 3612 clear_region_fields(); 3613 } 3614 3615 void CMTask::clear_region_fields() { 3616 // Values for these three fields that indicate that we're not 3617 // holding on to a region. 3618 _curr_region = NULL; 3619 _finger = NULL; 3620 _region_limit = NULL; 3621 } 3622 3623 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3624 if (cm_oop_closure == NULL) { 3625 assert(_cm_oop_closure != NULL, "invariant"); 3626 } else { 3627 assert(_cm_oop_closure == NULL, "invariant"); 3628 } 3629 _cm_oop_closure = cm_oop_closure; 3630 } 3631 3632 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3633 guarantee(nextMarkBitMap != NULL, "invariant"); 3634 3635 if (_cm->verbose_low()) { 3636 gclog_or_tty->print_cr("[%u] resetting", _worker_id); 3637 } 3638 3639 _nextMarkBitMap = nextMarkBitMap; 3640 clear_region_fields(); 3641 3642 _calls = 0; 3643 _elapsed_time_ms = 0.0; 3644 _termination_time_ms = 0.0; 3645 _termination_start_time_ms = 0.0; 3646 3647 #if _MARKING_STATS_ 3648 _local_pushes = 0; 3649 _local_pops = 0; 3650 _local_max_size = 0; 3651 _objs_scanned = 0; 3652 _global_pushes = 0; 3653 _global_pops = 0; 3654 _global_max_size = 0; 3655 _global_transfers_to = 0; 3656 _global_transfers_from = 0; 3657 _regions_claimed = 0; 3658 _objs_found_on_bitmap = 0; 3659 _satb_buffers_processed = 0; 3660 _steal_attempts = 0; 3661 _steals = 0; 3662 _aborted = 0; 3663 _aborted_overflow = 0; 3664 _aborted_cm_aborted = 0; 3665 _aborted_yield = 0; 3666 _aborted_timed_out = 0; 3667 _aborted_satb = 0; 3668 _aborted_termination = 0; 3669 #endif // _MARKING_STATS_ 3670 } 3671 3672 bool CMTask::should_exit_termination() { 3673 regular_clock_call(); 3674 // This is called when we are in the termination protocol. We should 3675 // quit if, for some reason, this task wants to abort or the global 3676 // stack is not empty (this means that we can get work from it). 3677 return !_cm->mark_stack_empty() || has_aborted(); 3678 } 3679 3680 void CMTask::reached_limit() { 3681 assert(_words_scanned >= _words_scanned_limit || 3682 _refs_reached >= _refs_reached_limit , 3683 "shouldn't have been called otherwise"); 3684 regular_clock_call(); 3685 } 3686 3687 void CMTask::regular_clock_call() { 3688 if (has_aborted()) return; 3689 3690 // First, we need to recalculate the words scanned and refs reached 3691 // limits for the next clock call. 3692 recalculate_limits(); 3693 3694 // During the regular clock call we do the following 3695 3696 // (1) If an overflow has been flagged, then we abort. 3697 if (_cm->has_overflown()) { 3698 set_has_aborted(); 3699 return; 3700 } 3701 3702 // If we are not concurrent (i.e. we're doing remark) we don't need 3703 // to check anything else. The other steps are only needed during 3704 // the concurrent marking phase. 3705 if (!concurrent()) return; 3706 3707 // (2) If marking has been aborted for Full GC, then we also abort. 3708 if (_cm->has_aborted()) { 3709 set_has_aborted(); 3710 statsOnly( ++_aborted_cm_aborted ); 3711 return; 3712 } 3713 3714 double curr_time_ms = os::elapsedVTime() * 1000.0; 3715 3716 // (3) If marking stats are enabled, then we update the step history. 3717 #if _MARKING_STATS_ 3718 if (_words_scanned >= _words_scanned_limit) { 3719 ++_clock_due_to_scanning; 3720 } 3721 if (_refs_reached >= _refs_reached_limit) { 3722 ++_clock_due_to_marking; 3723 } 3724 3725 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3726 _interval_start_time_ms = curr_time_ms; 3727 _all_clock_intervals_ms.add(last_interval_ms); 3728 3729 if (_cm->verbose_medium()) { 3730 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, " 3731 "scanned = "SIZE_FORMAT"%s, refs reached = "SIZE_FORMAT"%s", 3732 _worker_id, last_interval_ms, 3733 _words_scanned, 3734 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3735 _refs_reached, 3736 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3737 } 3738 #endif // _MARKING_STATS_ 3739 3740 // (4) We check whether we should yield. If we have to, then we abort. 3741 if (SuspendibleThreadSet::should_yield()) { 3742 // We should yield. To do this we abort the task. The caller is 3743 // responsible for yielding. 3744 set_has_aborted(); 3745 statsOnly( ++_aborted_yield ); 3746 return; 3747 } 3748 3749 // (5) We check whether we've reached our time quota. If we have, 3750 // then we abort. 3751 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3752 if (elapsed_time_ms > _time_target_ms) { 3753 set_has_aborted(); 3754 _has_timed_out = true; 3755 statsOnly( ++_aborted_timed_out ); 3756 return; 3757 } 3758 3759 // (6) Finally, we check whether there are enough completed STAB 3760 // buffers available for processing. If there are, we abort. 3761 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3762 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3763 if (_cm->verbose_low()) { 3764 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers", 3765 _worker_id); 3766 } 3767 // we do need to process SATB buffers, we'll abort and restart 3768 // the marking task to do so 3769 set_has_aborted(); 3770 statsOnly( ++_aborted_satb ); 3771 return; 3772 } 3773 } 3774 3775 void CMTask::recalculate_limits() { 3776 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3777 _words_scanned_limit = _real_words_scanned_limit; 3778 3779 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3780 _refs_reached_limit = _real_refs_reached_limit; 3781 } 3782 3783 void CMTask::decrease_limits() { 3784 // This is called when we believe that we're going to do an infrequent 3785 // operation which will increase the per byte scanned cost (i.e. move 3786 // entries to/from the global stack). It basically tries to decrease the 3787 // scanning limit so that the clock is called earlier. 3788 3789 if (_cm->verbose_medium()) { 3790 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id); 3791 } 3792 3793 _words_scanned_limit = _real_words_scanned_limit - 3794 3 * words_scanned_period / 4; 3795 _refs_reached_limit = _real_refs_reached_limit - 3796 3 * refs_reached_period / 4; 3797 } 3798 3799 void CMTask::move_entries_to_global_stack() { 3800 // local array where we'll store the entries that will be popped 3801 // from the local queue 3802 oop buffer[global_stack_transfer_size]; 3803 3804 int n = 0; 3805 oop obj; 3806 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3807 buffer[n] = obj; 3808 ++n; 3809 } 3810 3811 if (n > 0) { 3812 // we popped at least one entry from the local queue 3813 3814 statsOnly( ++_global_transfers_to; _local_pops += n ); 3815 3816 if (!_cm->mark_stack_push(buffer, n)) { 3817 if (_cm->verbose_low()) { 3818 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow", 3819 _worker_id); 3820 } 3821 set_has_aborted(); 3822 } else { 3823 // the transfer was successful 3824 3825 if (_cm->verbose_medium()) { 3826 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack", 3827 _worker_id, n); 3828 } 3829 statsOnly( int tmp_size = _cm->mark_stack_size(); 3830 if (tmp_size > _global_max_size) { 3831 _global_max_size = tmp_size; 3832 } 3833 _global_pushes += n ); 3834 } 3835 } 3836 3837 // this operation was quite expensive, so decrease the limits 3838 decrease_limits(); 3839 } 3840 3841 void CMTask::get_entries_from_global_stack() { 3842 // local array where we'll store the entries that will be popped 3843 // from the global stack. 3844 oop buffer[global_stack_transfer_size]; 3845 int n; 3846 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3847 assert(n <= global_stack_transfer_size, 3848 "we should not pop more than the given limit"); 3849 if (n > 0) { 3850 // yes, we did actually pop at least one entry 3851 3852 statsOnly( ++_global_transfers_from; _global_pops += n ); 3853 if (_cm->verbose_medium()) { 3854 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack", 3855 _worker_id, n); 3856 } 3857 for (int i = 0; i < n; ++i) { 3858 bool success = _task_queue->push(buffer[i]); 3859 // We only call this when the local queue is empty or under a 3860 // given target limit. So, we do not expect this push to fail. 3861 assert(success, "invariant"); 3862 } 3863 3864 statsOnly( int tmp_size = _task_queue->size(); 3865 if (tmp_size > _local_max_size) { 3866 _local_max_size = tmp_size; 3867 } 3868 _local_pushes += n ); 3869 } 3870 3871 // this operation was quite expensive, so decrease the limits 3872 decrease_limits(); 3873 } 3874 3875 void CMTask::drain_local_queue(bool partially) { 3876 if (has_aborted()) return; 3877 3878 // Decide what the target size is, depending whether we're going to 3879 // drain it partially (so that other tasks can steal if they run out 3880 // of things to do) or totally (at the very end). 3881 size_t target_size; 3882 if (partially) { 3883 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3884 } else { 3885 target_size = 0; 3886 } 3887 3888 if (_task_queue->size() > target_size) { 3889 if (_cm->verbose_high()) { 3890 gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT, 3891 _worker_id, target_size); 3892 } 3893 3894 oop obj; 3895 bool ret = _task_queue->pop_local(obj); 3896 while (ret) { 3897 statsOnly( ++_local_pops ); 3898 3899 if (_cm->verbose_high()) { 3900 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id, 3901 p2i((void*) obj)); 3902 } 3903 3904 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3905 assert(!_g1h->is_on_master_free_list( 3906 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3907 3908 scan_object(obj); 3909 3910 if (_task_queue->size() <= target_size || has_aborted()) { 3911 ret = false; 3912 } else { 3913 ret = _task_queue->pop_local(obj); 3914 } 3915 } 3916 3917 if (_cm->verbose_high()) { 3918 gclog_or_tty->print_cr("[%u] drained local queue, size = %u", 3919 _worker_id, _task_queue->size()); 3920 } 3921 } 3922 } 3923 3924 void CMTask::drain_global_stack(bool partially) { 3925 if (has_aborted()) return; 3926 3927 // We have a policy to drain the local queue before we attempt to 3928 // drain the global stack. 3929 assert(partially || _task_queue->size() == 0, "invariant"); 3930 3931 // Decide what the target size is, depending whether we're going to 3932 // drain it partially (so that other tasks can steal if they run out 3933 // of things to do) or totally (at the very end). Notice that, 3934 // because we move entries from the global stack in chunks or 3935 // because another task might be doing the same, we might in fact 3936 // drop below the target. But, this is not a problem. 3937 size_t target_size; 3938 if (partially) { 3939 target_size = _cm->partial_mark_stack_size_target(); 3940 } else { 3941 target_size = 0; 3942 } 3943 3944 if (_cm->mark_stack_size() > target_size) { 3945 if (_cm->verbose_low()) { 3946 gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT, 3947 _worker_id, target_size); 3948 } 3949 3950 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3951 get_entries_from_global_stack(); 3952 drain_local_queue(partially); 3953 } 3954 3955 if (_cm->verbose_low()) { 3956 gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT, 3957 _worker_id, _cm->mark_stack_size()); 3958 } 3959 } 3960 } 3961 3962 // SATB Queue has several assumptions on whether to call the par or 3963 // non-par versions of the methods. this is why some of the code is 3964 // replicated. We should really get rid of the single-threaded version 3965 // of the code to simplify things. 3966 void CMTask::drain_satb_buffers() { 3967 if (has_aborted()) return; 3968 3969 // We set this so that the regular clock knows that we're in the 3970 // middle of draining buffers and doesn't set the abort flag when it 3971 // notices that SATB buffers are available for draining. It'd be 3972 // very counter productive if it did that. :-) 3973 _draining_satb_buffers = true; 3974 3975 CMObjectClosure oc(this); 3976 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3977 if (G1CollectedHeap::use_parallel_gc_threads()) { 3978 satb_mq_set.set_par_closure(_worker_id, &oc); 3979 } else { 3980 satb_mq_set.set_closure(&oc); 3981 } 3982 3983 // This keeps claiming and applying the closure to completed buffers 3984 // until we run out of buffers or we need to abort. 3985 if (G1CollectedHeap::use_parallel_gc_threads()) { 3986 while (!has_aborted() && 3987 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) { 3988 if (_cm->verbose_medium()) { 3989 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3990 } 3991 statsOnly( ++_satb_buffers_processed ); 3992 regular_clock_call(); 3993 } 3994 } else { 3995 while (!has_aborted() && 3996 satb_mq_set.apply_closure_to_completed_buffer()) { 3997 if (_cm->verbose_medium()) { 3998 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3999 } 4000 statsOnly( ++_satb_buffers_processed ); 4001 regular_clock_call(); 4002 } 4003 } 4004 4005 _draining_satb_buffers = false; 4006 4007 assert(has_aborted() || 4008 concurrent() || 4009 satb_mq_set.completed_buffers_num() == 0, "invariant"); 4010 4011 if (G1CollectedHeap::use_parallel_gc_threads()) { 4012 satb_mq_set.set_par_closure(_worker_id, NULL); 4013 } else { 4014 satb_mq_set.set_closure(NULL); 4015 } 4016 4017 // again, this was a potentially expensive operation, decrease the 4018 // limits to get the regular clock call early 4019 decrease_limits(); 4020 } 4021 4022 void CMTask::print_stats() { 4023 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 4024 _worker_id, _calls); 4025 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 4026 _elapsed_time_ms, _termination_time_ms); 4027 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4028 _step_times_ms.num(), _step_times_ms.avg(), 4029 _step_times_ms.sd()); 4030 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4031 _step_times_ms.maximum(), _step_times_ms.sum()); 4032 4033 #if _MARKING_STATS_ 4034 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4035 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 4036 _all_clock_intervals_ms.sd()); 4037 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4038 _all_clock_intervals_ms.maximum(), 4039 _all_clock_intervals_ms.sum()); 4040 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 4041 _clock_due_to_scanning, _clock_due_to_marking); 4042 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 4043 _objs_scanned, _objs_found_on_bitmap); 4044 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 4045 _local_pushes, _local_pops, _local_max_size); 4046 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 4047 _global_pushes, _global_pops, _global_max_size); 4048 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 4049 _global_transfers_to,_global_transfers_from); 4050 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 4051 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 4052 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 4053 _steal_attempts, _steals); 4054 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 4055 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 4056 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 4057 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 4058 _aborted_timed_out, _aborted_satb, _aborted_termination); 4059 #endif // _MARKING_STATS_ 4060 } 4061 4062 /***************************************************************************** 4063 4064 The do_marking_step(time_target_ms, ...) method is the building 4065 block of the parallel marking framework. It can be called in parallel 4066 with other invocations of do_marking_step() on different tasks 4067 (but only one per task, obviously) and concurrently with the 4068 mutator threads, or during remark, hence it eliminates the need 4069 for two versions of the code. When called during remark, it will 4070 pick up from where the task left off during the concurrent marking 4071 phase. Interestingly, tasks are also claimable during evacuation 4072 pauses too, since do_marking_step() ensures that it aborts before 4073 it needs to yield. 4074 4075 The data structures that it uses to do marking work are the 4076 following: 4077 4078 (1) Marking Bitmap. If there are gray objects that appear only 4079 on the bitmap (this happens either when dealing with an overflow 4080 or when the initial marking phase has simply marked the roots 4081 and didn't push them on the stack), then tasks claim heap 4082 regions whose bitmap they then scan to find gray objects. A 4083 global finger indicates where the end of the last claimed region 4084 is. A local finger indicates how far into the region a task has 4085 scanned. The two fingers are used to determine how to gray an 4086 object (i.e. whether simply marking it is OK, as it will be 4087 visited by a task in the future, or whether it needs to be also 4088 pushed on a stack). 4089 4090 (2) Local Queue. The local queue of the task which is accessed 4091 reasonably efficiently by the task. Other tasks can steal from 4092 it when they run out of work. Throughout the marking phase, a 4093 task attempts to keep its local queue short but not totally 4094 empty, so that entries are available for stealing by other 4095 tasks. Only when there is no more work, a task will totally 4096 drain its local queue. 4097 4098 (3) Global Mark Stack. This handles local queue overflow. During 4099 marking only sets of entries are moved between it and the local 4100 queues, as access to it requires a mutex and more fine-grain 4101 interaction with it which might cause contention. If it 4102 overflows, then the marking phase should restart and iterate 4103 over the bitmap to identify gray objects. Throughout the marking 4104 phase, tasks attempt to keep the global mark stack at a small 4105 length but not totally empty, so that entries are available for 4106 popping by other tasks. Only when there is no more work, tasks 4107 will totally drain the global mark stack. 4108 4109 (4) SATB Buffer Queue. This is where completed SATB buffers are 4110 made available. Buffers are regularly removed from this queue 4111 and scanned for roots, so that the queue doesn't get too 4112 long. During remark, all completed buffers are processed, as 4113 well as the filled in parts of any uncompleted buffers. 4114 4115 The do_marking_step() method tries to abort when the time target 4116 has been reached. There are a few other cases when the 4117 do_marking_step() method also aborts: 4118 4119 (1) When the marking phase has been aborted (after a Full GC). 4120 4121 (2) When a global overflow (on the global stack) has been 4122 triggered. Before the task aborts, it will actually sync up with 4123 the other tasks to ensure that all the marking data structures 4124 (local queues, stacks, fingers etc.) are re-initialized so that 4125 when do_marking_step() completes, the marking phase can 4126 immediately restart. 4127 4128 (3) When enough completed SATB buffers are available. The 4129 do_marking_step() method only tries to drain SATB buffers right 4130 at the beginning. So, if enough buffers are available, the 4131 marking step aborts and the SATB buffers are processed at 4132 the beginning of the next invocation. 4133 4134 (4) To yield. when we have to yield then we abort and yield 4135 right at the end of do_marking_step(). This saves us from a lot 4136 of hassle as, by yielding we might allow a Full GC. If this 4137 happens then objects will be compacted underneath our feet, the 4138 heap might shrink, etc. We save checking for this by just 4139 aborting and doing the yield right at the end. 4140 4141 From the above it follows that the do_marking_step() method should 4142 be called in a loop (or, otherwise, regularly) until it completes. 4143 4144 If a marking step completes without its has_aborted() flag being 4145 true, it means it has completed the current marking phase (and 4146 also all other marking tasks have done so and have all synced up). 4147 4148 A method called regular_clock_call() is invoked "regularly" (in 4149 sub ms intervals) throughout marking. It is this clock method that 4150 checks all the abort conditions which were mentioned above and 4151 decides when the task should abort. A work-based scheme is used to 4152 trigger this clock method: when the number of object words the 4153 marking phase has scanned or the number of references the marking 4154 phase has visited reach a given limit. Additional invocations to 4155 the method clock have been planted in a few other strategic places 4156 too. The initial reason for the clock method was to avoid calling 4157 vtime too regularly, as it is quite expensive. So, once it was in 4158 place, it was natural to piggy-back all the other conditions on it 4159 too and not constantly check them throughout the code. 4160 4161 If do_termination is true then do_marking_step will enter its 4162 termination protocol. 4163 4164 The value of is_serial must be true when do_marking_step is being 4165 called serially (i.e. by the VMThread) and do_marking_step should 4166 skip any synchronization in the termination and overflow code. 4167 Examples include the serial remark code and the serial reference 4168 processing closures. 4169 4170 The value of is_serial must be false when do_marking_step is 4171 being called by any of the worker threads in a work gang. 4172 Examples include the concurrent marking code (CMMarkingTask), 4173 the MT remark code, and the MT reference processing closures. 4174 4175 *****************************************************************************/ 4176 4177 void CMTask::do_marking_step(double time_target_ms, 4178 bool do_termination, 4179 bool is_serial) { 4180 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 4181 assert(concurrent() == _cm->concurrent(), "they should be the same"); 4182 4183 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4184 assert(_task_queues != NULL, "invariant"); 4185 assert(_task_queue != NULL, "invariant"); 4186 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 4187 4188 assert(!_claimed, 4189 "only one thread should claim this task at any one time"); 4190 4191 // OK, this doesn't safeguard again all possible scenarios, as it is 4192 // possible for two threads to set the _claimed flag at the same 4193 // time. But it is only for debugging purposes anyway and it will 4194 // catch most problems. 4195 _claimed = true; 4196 4197 _start_time_ms = os::elapsedVTime() * 1000.0; 4198 statsOnly( _interval_start_time_ms = _start_time_ms ); 4199 4200 // If do_stealing is true then do_marking_step will attempt to 4201 // steal work from the other CMTasks. It only makes sense to 4202 // enable stealing when the termination protocol is enabled 4203 // and do_marking_step() is not being called serially. 4204 bool do_stealing = do_termination && !is_serial; 4205 4206 double diff_prediction_ms = 4207 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4208 _time_target_ms = time_target_ms - diff_prediction_ms; 4209 4210 // set up the variables that are used in the work-based scheme to 4211 // call the regular clock method 4212 _words_scanned = 0; 4213 _refs_reached = 0; 4214 recalculate_limits(); 4215 4216 // clear all flags 4217 clear_has_aborted(); 4218 _has_timed_out = false; 4219 _draining_satb_buffers = false; 4220 4221 ++_calls; 4222 4223 if (_cm->verbose_low()) { 4224 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, " 4225 "target = %1.2lfms >>>>>>>>>>", 4226 _worker_id, _calls, _time_target_ms); 4227 } 4228 4229 // Set up the bitmap and oop closures. Anything that uses them is 4230 // eventually called from this method, so it is OK to allocate these 4231 // statically. 4232 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4233 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 4234 set_cm_oop_closure(&cm_oop_closure); 4235 4236 if (_cm->has_overflown()) { 4237 // This can happen if the mark stack overflows during a GC pause 4238 // and this task, after a yield point, restarts. We have to abort 4239 // as we need to get into the overflow protocol which happens 4240 // right at the end of this task. 4241 set_has_aborted(); 4242 } 4243 4244 // First drain any available SATB buffers. After this, we will not 4245 // look at SATB buffers before the next invocation of this method. 4246 // If enough completed SATB buffers are queued up, the regular clock 4247 // will abort this task so that it restarts. 4248 drain_satb_buffers(); 4249 // ...then partially drain the local queue and the global stack 4250 drain_local_queue(true); 4251 drain_global_stack(true); 4252 4253 do { 4254 if (!has_aborted() && _curr_region != NULL) { 4255 // This means that we're already holding on to a region. 4256 assert(_finger != NULL, "if region is not NULL, then the finger " 4257 "should not be NULL either"); 4258 4259 // We might have restarted this task after an evacuation pause 4260 // which might have evacuated the region we're holding on to 4261 // underneath our feet. Let's read its limit again to make sure 4262 // that we do not iterate over a region of the heap that 4263 // contains garbage (update_region_limit() will also move 4264 // _finger to the start of the region if it is found empty). 4265 update_region_limit(); 4266 // We will start from _finger not from the start of the region, 4267 // as we might be restarting this task after aborting half-way 4268 // through scanning this region. In this case, _finger points to 4269 // the address where we last found a marked object. If this is a 4270 // fresh region, _finger points to start(). 4271 MemRegion mr = MemRegion(_finger, _region_limit); 4272 4273 if (_cm->verbose_low()) { 4274 gclog_or_tty->print_cr("[%u] we're scanning part " 4275 "["PTR_FORMAT", "PTR_FORMAT") " 4276 "of region "HR_FORMAT, 4277 _worker_id, p2i(_finger), p2i(_region_limit), 4278 HR_FORMAT_PARAMS(_curr_region)); 4279 } 4280 4281 assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(), 4282 "humongous regions should go around loop once only"); 4283 4284 // Some special cases: 4285 // If the memory region is empty, we can just give up the region. 4286 // If the current region is humongous then we only need to check 4287 // the bitmap for the bit associated with the start of the object, 4288 // scan the object if it's live, and give up the region. 4289 // Otherwise, let's iterate over the bitmap of the part of the region 4290 // that is left. 4291 // If the iteration is successful, give up the region. 4292 if (mr.is_empty()) { 4293 giveup_current_region(); 4294 regular_clock_call(); 4295 } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) { 4296 if (_nextMarkBitMap->isMarked(mr.start())) { 4297 // The object is marked - apply the closure 4298 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 4299 bitmap_closure.do_bit(offset); 4300 } 4301 // Even if this task aborted while scanning the humongous object 4302 // we can (and should) give up the current region. 4303 giveup_current_region(); 4304 regular_clock_call(); 4305 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4306 giveup_current_region(); 4307 regular_clock_call(); 4308 } else { 4309 assert(has_aborted(), "currently the only way to do so"); 4310 // The only way to abort the bitmap iteration is to return 4311 // false from the do_bit() method. However, inside the 4312 // do_bit() method we move the _finger to point to the 4313 // object currently being looked at. So, if we bail out, we 4314 // have definitely set _finger to something non-null. 4315 assert(_finger != NULL, "invariant"); 4316 4317 // Region iteration was actually aborted. So now _finger 4318 // points to the address of the object we last scanned. If we 4319 // leave it there, when we restart this task, we will rescan 4320 // the object. It is easy to avoid this. We move the finger by 4321 // enough to point to the next possible object header (the 4322 // bitmap knows by how much we need to move it as it knows its 4323 // granularity). 4324 assert(_finger < _region_limit, "invariant"); 4325 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 4326 // Check if bitmap iteration was aborted while scanning the last object 4327 if (new_finger >= _region_limit) { 4328 giveup_current_region(); 4329 } else { 4330 move_finger_to(new_finger); 4331 } 4332 } 4333 } 4334 // At this point we have either completed iterating over the 4335 // region we were holding on to, or we have aborted. 4336 4337 // We then partially drain the local queue and the global stack. 4338 // (Do we really need this?) 4339 drain_local_queue(true); 4340 drain_global_stack(true); 4341 4342 // Read the note on the claim_region() method on why it might 4343 // return NULL with potentially more regions available for 4344 // claiming and why we have to check out_of_regions() to determine 4345 // whether we're done or not. 4346 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4347 // We are going to try to claim a new region. We should have 4348 // given up on the previous one. 4349 // Separated the asserts so that we know which one fires. 4350 assert(_curr_region == NULL, "invariant"); 4351 assert(_finger == NULL, "invariant"); 4352 assert(_region_limit == NULL, "invariant"); 4353 if (_cm->verbose_low()) { 4354 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id); 4355 } 4356 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 4357 if (claimed_region != NULL) { 4358 // Yes, we managed to claim one 4359 statsOnly( ++_regions_claimed ); 4360 4361 if (_cm->verbose_low()) { 4362 gclog_or_tty->print_cr("[%u] we successfully claimed " 4363 "region "PTR_FORMAT, 4364 _worker_id, p2i(claimed_region)); 4365 } 4366 4367 setup_for_region(claimed_region); 4368 assert(_curr_region == claimed_region, "invariant"); 4369 } 4370 // It is important to call the regular clock here. It might take 4371 // a while to claim a region if, for example, we hit a large 4372 // block of empty regions. So we need to call the regular clock 4373 // method once round the loop to make sure it's called 4374 // frequently enough. 4375 regular_clock_call(); 4376 } 4377 4378 if (!has_aborted() && _curr_region == NULL) { 4379 assert(_cm->out_of_regions(), 4380 "at this point we should be out of regions"); 4381 } 4382 } while ( _curr_region != NULL && !has_aborted()); 4383 4384 if (!has_aborted()) { 4385 // We cannot check whether the global stack is empty, since other 4386 // tasks might be pushing objects to it concurrently. 4387 assert(_cm->out_of_regions(), 4388 "at this point we should be out of regions"); 4389 4390 if (_cm->verbose_low()) { 4391 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id); 4392 } 4393 4394 // Try to reduce the number of available SATB buffers so that 4395 // remark has less work to do. 4396 drain_satb_buffers(); 4397 } 4398 4399 // Since we've done everything else, we can now totally drain the 4400 // local queue and global stack. 4401 drain_local_queue(false); 4402 drain_global_stack(false); 4403 4404 // Attempt at work stealing from other task's queues. 4405 if (do_stealing && !has_aborted()) { 4406 // We have not aborted. This means that we have finished all that 4407 // we could. Let's try to do some stealing... 4408 4409 // We cannot check whether the global stack is empty, since other 4410 // tasks might be pushing objects to it concurrently. 4411 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4412 "only way to reach here"); 4413 4414 if (_cm->verbose_low()) { 4415 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id); 4416 } 4417 4418 while (!has_aborted()) { 4419 oop obj; 4420 statsOnly( ++_steal_attempts ); 4421 4422 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 4423 if (_cm->verbose_medium()) { 4424 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully", 4425 _worker_id, p2i((void*) obj)); 4426 } 4427 4428 statsOnly( ++_steals ); 4429 4430 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4431 "any stolen object should be marked"); 4432 scan_object(obj); 4433 4434 // And since we're towards the end, let's totally drain the 4435 // local queue and global stack. 4436 drain_local_queue(false); 4437 drain_global_stack(false); 4438 } else { 4439 break; 4440 } 4441 } 4442 } 4443 4444 // If we are about to wrap up and go into termination, check if we 4445 // should raise the overflow flag. 4446 if (do_termination && !has_aborted()) { 4447 if (_cm->force_overflow()->should_force()) { 4448 _cm->set_has_overflown(); 4449 regular_clock_call(); 4450 } 4451 } 4452 4453 // We still haven't aborted. Now, let's try to get into the 4454 // termination protocol. 4455 if (do_termination && !has_aborted()) { 4456 // We cannot check whether the global stack is empty, since other 4457 // tasks might be concurrently pushing objects on it. 4458 // Separated the asserts so that we know which one fires. 4459 assert(_cm->out_of_regions(), "only way to reach here"); 4460 assert(_task_queue->size() == 0, "only way to reach here"); 4461 4462 if (_cm->verbose_low()) { 4463 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id); 4464 } 4465 4466 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4467 4468 // The CMTask class also extends the TerminatorTerminator class, 4469 // hence its should_exit_termination() method will also decide 4470 // whether to exit the termination protocol or not. 4471 bool finished = (is_serial || 4472 _cm->terminator()->offer_termination(this)); 4473 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4474 _termination_time_ms += 4475 termination_end_time_ms - _termination_start_time_ms; 4476 4477 if (finished) { 4478 // We're all done. 4479 4480 if (_worker_id == 0) { 4481 // let's allow task 0 to do this 4482 if (concurrent()) { 4483 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4484 // we need to set this to false before the next 4485 // safepoint. This way we ensure that the marking phase 4486 // doesn't observe any more heap expansions. 4487 _cm->clear_concurrent_marking_in_progress(); 4488 } 4489 } 4490 4491 // We can now guarantee that the global stack is empty, since 4492 // all other tasks have finished. We separated the guarantees so 4493 // that, if a condition is false, we can immediately find out 4494 // which one. 4495 guarantee(_cm->out_of_regions(), "only way to reach here"); 4496 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4497 guarantee(_task_queue->size() == 0, "only way to reach here"); 4498 guarantee(!_cm->has_overflown(), "only way to reach here"); 4499 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4500 4501 if (_cm->verbose_low()) { 4502 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id); 4503 } 4504 } else { 4505 // Apparently there's more work to do. Let's abort this task. It 4506 // will restart it and we can hopefully find more things to do. 4507 4508 if (_cm->verbose_low()) { 4509 gclog_or_tty->print_cr("[%u] apparently there is more work to do", 4510 _worker_id); 4511 } 4512 4513 set_has_aborted(); 4514 statsOnly( ++_aborted_termination ); 4515 } 4516 } 4517 4518 // Mainly for debugging purposes to make sure that a pointer to the 4519 // closure which was statically allocated in this frame doesn't 4520 // escape it by accident. 4521 set_cm_oop_closure(NULL); 4522 double end_time_ms = os::elapsedVTime() * 1000.0; 4523 double elapsed_time_ms = end_time_ms - _start_time_ms; 4524 // Update the step history. 4525 _step_times_ms.add(elapsed_time_ms); 4526 4527 if (has_aborted()) { 4528 // The task was aborted for some reason. 4529 4530 statsOnly( ++_aborted ); 4531 4532 if (_has_timed_out) { 4533 double diff_ms = elapsed_time_ms - _time_target_ms; 4534 // Keep statistics of how well we did with respect to hitting 4535 // our target only if we actually timed out (if we aborted for 4536 // other reasons, then the results might get skewed). 4537 _marking_step_diffs_ms.add(diff_ms); 4538 } 4539 4540 if (_cm->has_overflown()) { 4541 // This is the interesting one. We aborted because a global 4542 // overflow was raised. This means we have to restart the 4543 // marking phase and start iterating over regions. However, in 4544 // order to do this we have to make sure that all tasks stop 4545 // what they are doing and re-initialize in a safe manner. We 4546 // will achieve this with the use of two barrier sync points. 4547 4548 if (_cm->verbose_low()) { 4549 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id); 4550 } 4551 4552 if (!is_serial) { 4553 // We only need to enter the sync barrier if being called 4554 // from a parallel context 4555 _cm->enter_first_sync_barrier(_worker_id); 4556 4557 // When we exit this sync barrier we know that all tasks have 4558 // stopped doing marking work. So, it's now safe to 4559 // re-initialize our data structures. At the end of this method, 4560 // task 0 will clear the global data structures. 4561 } 4562 4563 statsOnly( ++_aborted_overflow ); 4564 4565 // We clear the local state of this task... 4566 clear_region_fields(); 4567 4568 if (!is_serial) { 4569 // ...and enter the second barrier. 4570 _cm->enter_second_sync_barrier(_worker_id); 4571 } 4572 // At this point, if we're during the concurrent phase of 4573 // marking, everything has been re-initialized and we're 4574 // ready to restart. 4575 } 4576 4577 if (_cm->verbose_low()) { 4578 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4579 "elapsed = %1.2lfms <<<<<<<<<<", 4580 _worker_id, _time_target_ms, elapsed_time_ms); 4581 if (_cm->has_aborted()) { 4582 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========", 4583 _worker_id); 4584 } 4585 } 4586 } else { 4587 if (_cm->verbose_low()) { 4588 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4589 "elapsed = %1.2lfms <<<<<<<<<<", 4590 _worker_id, _time_target_ms, elapsed_time_ms); 4591 } 4592 } 4593 4594 _claimed = false; 4595 } 4596 4597 CMTask::CMTask(uint worker_id, 4598 ConcurrentMark* cm, 4599 size_t* marked_bytes, 4600 BitMap* card_bm, 4601 CMTaskQueue* task_queue, 4602 CMTaskQueueSet* task_queues) 4603 : _g1h(G1CollectedHeap::heap()), 4604 _worker_id(worker_id), _cm(cm), 4605 _claimed(false), 4606 _nextMarkBitMap(NULL), _hash_seed(17), 4607 _task_queue(task_queue), 4608 _task_queues(task_queues), 4609 _cm_oop_closure(NULL), 4610 _marked_bytes_array(marked_bytes), 4611 _card_bm(card_bm) { 4612 guarantee(task_queue != NULL, "invariant"); 4613 guarantee(task_queues != NULL, "invariant"); 4614 4615 statsOnly( _clock_due_to_scanning = 0; 4616 _clock_due_to_marking = 0 ); 4617 4618 _marking_step_diffs_ms.add(0.5); 4619 } 4620 4621 // These are formatting macros that are used below to ensure 4622 // consistent formatting. The *_H_* versions are used to format the 4623 // header for a particular value and they should be kept consistent 4624 // with the corresponding macro. Also note that most of the macros add 4625 // the necessary white space (as a prefix) which makes them a bit 4626 // easier to compose. 4627 4628 // All the output lines are prefixed with this string to be able to 4629 // identify them easily in a large log file. 4630 #define G1PPRL_LINE_PREFIX "###" 4631 4632 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4633 #ifdef _LP64 4634 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4635 #else // _LP64 4636 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4637 #endif // _LP64 4638 4639 // For per-region info 4640 #define G1PPRL_TYPE_FORMAT " %-4s" 4641 #define G1PPRL_TYPE_H_FORMAT " %4s" 4642 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4643 #define G1PPRL_BYTE_H_FORMAT " %9s" 4644 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4645 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4646 4647 // For summary info 4648 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4649 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4650 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4651 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4652 4653 G1PrintRegionLivenessInfoClosure:: 4654 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4655 : _out(out), 4656 _total_used_bytes(0), _total_capacity_bytes(0), 4657 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4658 _hum_used_bytes(0), _hum_capacity_bytes(0), 4659 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 4660 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 4661 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4662 MemRegion g1_reserved = g1h->g1_reserved(); 4663 double now = os::elapsedTime(); 4664 4665 // Print the header of the output. 4666 _out->cr(); 4667 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4668 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4669 G1PPRL_SUM_ADDR_FORMAT("reserved") 4670 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4671 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 4672 HeapRegion::GrainBytes); 4673 _out->print_cr(G1PPRL_LINE_PREFIX); 4674 _out->print_cr(G1PPRL_LINE_PREFIX 4675 G1PPRL_TYPE_H_FORMAT 4676 G1PPRL_ADDR_BASE_H_FORMAT 4677 G1PPRL_BYTE_H_FORMAT 4678 G1PPRL_BYTE_H_FORMAT 4679 G1PPRL_BYTE_H_FORMAT 4680 G1PPRL_DOUBLE_H_FORMAT 4681 G1PPRL_BYTE_H_FORMAT 4682 G1PPRL_BYTE_H_FORMAT, 4683 "type", "address-range", 4684 "used", "prev-live", "next-live", "gc-eff", 4685 "remset", "code-roots"); 4686 _out->print_cr(G1PPRL_LINE_PREFIX 4687 G1PPRL_TYPE_H_FORMAT 4688 G1PPRL_ADDR_BASE_H_FORMAT 4689 G1PPRL_BYTE_H_FORMAT 4690 G1PPRL_BYTE_H_FORMAT 4691 G1PPRL_BYTE_H_FORMAT 4692 G1PPRL_DOUBLE_H_FORMAT 4693 G1PPRL_BYTE_H_FORMAT 4694 G1PPRL_BYTE_H_FORMAT, 4695 "", "", 4696 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 4697 "(bytes)", "(bytes)"); 4698 } 4699 4700 // It takes as a parameter a reference to one of the _hum_* fields, it 4701 // deduces the corresponding value for a region in a humongous region 4702 // series (either the region size, or what's left if the _hum_* field 4703 // is < the region size), and updates the _hum_* field accordingly. 4704 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4705 size_t bytes = 0; 4706 // The > 0 check is to deal with the prev and next live bytes which 4707 // could be 0. 4708 if (*hum_bytes > 0) { 4709 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4710 *hum_bytes -= bytes; 4711 } 4712 return bytes; 4713 } 4714 4715 // It deduces the values for a region in a humongous region series 4716 // from the _hum_* fields and updates those accordingly. It assumes 4717 // that that _hum_* fields have already been set up from the "starts 4718 // humongous" region and we visit the regions in address order. 4719 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4720 size_t* capacity_bytes, 4721 size_t* prev_live_bytes, 4722 size_t* next_live_bytes) { 4723 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4724 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4725 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4726 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4727 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4728 } 4729 4730 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4731 const char* type = ""; 4732 HeapWord* bottom = r->bottom(); 4733 HeapWord* end = r->end(); 4734 size_t capacity_bytes = r->capacity(); 4735 size_t used_bytes = r->used(); 4736 size_t prev_live_bytes = r->live_bytes(); 4737 size_t next_live_bytes = r->next_live_bytes(); 4738 double gc_eff = r->gc_efficiency(); 4739 size_t remset_bytes = r->rem_set()->mem_size(); 4740 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 4741 4742 if (r->used() == 0) { 4743 type = "FREE"; 4744 } else if (r->is_survivor()) { 4745 type = "SURV"; 4746 } else if (r->is_young()) { 4747 type = "EDEN"; 4748 } else if (r->startsHumongous()) { 4749 type = "HUMS"; 4750 4751 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4752 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4753 "they should have been zeroed after the last time we used them"); 4754 // Set up the _hum_* fields. 4755 _hum_capacity_bytes = capacity_bytes; 4756 _hum_used_bytes = used_bytes; 4757 _hum_prev_live_bytes = prev_live_bytes; 4758 _hum_next_live_bytes = next_live_bytes; 4759 get_hum_bytes(&used_bytes, &capacity_bytes, 4760 &prev_live_bytes, &next_live_bytes); 4761 end = bottom + HeapRegion::GrainWords; 4762 } else if (r->continuesHumongous()) { 4763 type = "HUMC"; 4764 get_hum_bytes(&used_bytes, &capacity_bytes, 4765 &prev_live_bytes, &next_live_bytes); 4766 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4767 } else { 4768 type = "OLD"; 4769 } 4770 4771 _total_used_bytes += used_bytes; 4772 _total_capacity_bytes += capacity_bytes; 4773 _total_prev_live_bytes += prev_live_bytes; 4774 _total_next_live_bytes += next_live_bytes; 4775 _total_remset_bytes += remset_bytes; 4776 _total_strong_code_roots_bytes += strong_code_roots_bytes; 4777 4778 // Print a line for this particular region. 4779 _out->print_cr(G1PPRL_LINE_PREFIX 4780 G1PPRL_TYPE_FORMAT 4781 G1PPRL_ADDR_BASE_FORMAT 4782 G1PPRL_BYTE_FORMAT 4783 G1PPRL_BYTE_FORMAT 4784 G1PPRL_BYTE_FORMAT 4785 G1PPRL_DOUBLE_FORMAT 4786 G1PPRL_BYTE_FORMAT 4787 G1PPRL_BYTE_FORMAT, 4788 type, p2i(bottom), p2i(end), 4789 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 4790 remset_bytes, strong_code_roots_bytes); 4791 4792 return false; 4793 } 4794 4795 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4796 // add static memory usages to remembered set sizes 4797 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 4798 // Print the footer of the output. 4799 _out->print_cr(G1PPRL_LINE_PREFIX); 4800 _out->print_cr(G1PPRL_LINE_PREFIX 4801 " SUMMARY" 4802 G1PPRL_SUM_MB_FORMAT("capacity") 4803 G1PPRL_SUM_MB_PERC_FORMAT("used") 4804 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4805 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 4806 G1PPRL_SUM_MB_FORMAT("remset") 4807 G1PPRL_SUM_MB_FORMAT("code-roots"), 4808 bytes_to_mb(_total_capacity_bytes), 4809 bytes_to_mb(_total_used_bytes), 4810 perc(_total_used_bytes, _total_capacity_bytes), 4811 bytes_to_mb(_total_prev_live_bytes), 4812 perc(_total_prev_live_bytes, _total_capacity_bytes), 4813 bytes_to_mb(_total_next_live_bytes), 4814 perc(_total_next_live_bytes, _total_capacity_bytes), 4815 bytes_to_mb(_total_remset_bytes), 4816 bytes_to_mb(_total_strong_code_roots_bytes)); 4817 _out->cr(); 4818 }