1 /* 2 * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "code/codeCache.hpp" 28 #include "gc_implementation/g1/concurrentMark.inline.hpp" 29 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 30 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 31 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 32 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 33 #include "gc_implementation/g1/g1Log.hpp" 34 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 35 #include "gc_implementation/g1/g1RemSet.hpp" 36 #include "gc_implementation/g1/heapRegion.inline.hpp" 37 #include "gc_implementation/g1/heapRegionRemSet.hpp" 38 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 39 #include "gc_implementation/g1/heapRegionSet.inline.hpp" 40 #include "gc_implementation/shared/vmGCOperations.hpp" 41 #include "gc_implementation/shared/gcTimer.hpp" 42 #include "gc_implementation/shared/gcTrace.hpp" 43 #include "gc_implementation/shared/gcTraceTime.hpp" 44 #include "memory/allocation.hpp" 45 #include "memory/genOopClosures.inline.hpp" 46 #include "memory/referencePolicy.hpp" 47 #include "memory/resourceArea.hpp" 48 #include "oops/oop.inline.hpp" 49 #include "runtime/handles.inline.hpp" 50 #include "runtime/java.hpp" 51 #include "runtime/atomic.inline.hpp" 52 #include "runtime/prefetch.inline.hpp" 53 #include "services/memTracker.hpp" 54 55 // Concurrent marking bit map wrapper 56 57 CMBitMapRO::CMBitMapRO(int shifter) : 58 _bm(), 59 _shifter(shifter) { 60 _bmStartWord = 0; 61 _bmWordSize = 0; 62 } 63 64 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 65 const HeapWord* limit) const { 66 // First we must round addr *up* to a possible object boundary. 67 addr = (HeapWord*)align_size_up((intptr_t)addr, 68 HeapWordSize << _shifter); 69 size_t addrOffset = heapWordToOffset(addr); 70 if (limit == NULL) { 71 limit = _bmStartWord + _bmWordSize; 72 } 73 size_t limitOffset = heapWordToOffset(limit); 74 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 75 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 76 assert(nextAddr >= addr, "get_next_one postcondition"); 77 assert(nextAddr == limit || isMarked(nextAddr), 78 "get_next_one postcondition"); 79 return nextAddr; 80 } 81 82 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr, 83 const HeapWord* limit) const { 84 size_t addrOffset = heapWordToOffset(addr); 85 if (limit == NULL) { 86 limit = _bmStartWord + _bmWordSize; 87 } 88 size_t limitOffset = heapWordToOffset(limit); 89 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 90 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 91 assert(nextAddr >= addr, "get_next_one postcondition"); 92 assert(nextAddr == limit || !isMarked(nextAddr), 93 "get_next_one postcondition"); 94 return nextAddr; 95 } 96 97 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 98 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 99 return (int) (diff >> _shifter); 100 } 101 102 #ifndef PRODUCT 103 bool CMBitMapRO::covers(MemRegion heap_rs) const { 104 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 105 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 106 "size inconsistency"); 107 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 108 _bmWordSize == heap_rs.word_size(); 109 } 110 #endif 111 112 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 113 _bm.print_on_error(st, prefix); 114 } 115 116 size_t CMBitMap::compute_size(size_t heap_size) { 117 return heap_size / mark_distance(); 118 } 119 120 size_t CMBitMap::mark_distance() { 121 return MinObjAlignmentInBytes * BitsPerByte; 122 } 123 124 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 125 _bmStartWord = heap.start(); 126 _bmWordSize = heap.word_size(); 127 128 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start()); 129 _bm.set_size(_bmWordSize >> _shifter); 130 131 storage->set_mapping_changed_listener(&_listener); 132 } 133 134 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions) { 135 // We need to clear the bitmap on commit, removing any existing information. 136 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 137 _bm->clearRange(mr); 138 } 139 140 // Closure used for clearing the given mark bitmap. 141 class ClearBitmapHRClosure : public HeapRegionClosure { 142 private: 143 ConcurrentMark* _cm; 144 CMBitMap* _bitmap; 145 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration. 146 public: 147 ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) { 148 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield."); 149 } 150 151 virtual bool doHeapRegion(HeapRegion* r) { 152 size_t const chunk_size_in_words = M / HeapWordSize; 153 154 HeapWord* cur = r->bottom(); 155 HeapWord* const end = r->end(); 156 157 while (cur < end) { 158 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 159 _bitmap->clearRange(mr); 160 161 cur += chunk_size_in_words; 162 163 // Abort iteration if after yielding the marking has been aborted. 164 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) { 165 return true; 166 } 167 // Repeat the asserts from before the start of the closure. We will do them 168 // as asserts here to minimize their overhead on the product. However, we 169 // will have them as guarantees at the beginning / end of the bitmap 170 // clearing to get some checking in the product. 171 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant"); 172 assert(!_may_yield || !G1CollectedHeap::heap()->mark_in_progress(), "invariant"); 173 } 174 175 return false; 176 } 177 }; 178 179 class ParClearNextMarkBitmapTask : public AbstractGangTask { 180 ClearBitmapHRClosure* _cl; 181 uint _workload; 182 bool _suspendible; // If the task is suspendible, workers must join the STS. 183 public: 184 ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) : _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task") { 185 assert(n_workers > 0, "Must have at least one worker."); 186 uint max_regions = G1CollectedHeap::heap()->max_regions(); 187 _workload = (max_regions + n_workers - 1) / n_workers; 188 assert(n_workers * _workload >= max_regions, "Workloads should cover all regions."); 189 } 190 191 void work(uint worker_id) { 192 if (_suspendible) { 193 SuspendibleThreadSet::join(); 194 } 195 uint start = worker_id * _workload; 196 uint end = MIN2(start + _workload, G1CollectedHeap::heap()->max_regions()); 197 G1CollectedHeap::heap()->heap_region_iterate_range(_cl, start, end); 198 if (_suspendible) { 199 SuspendibleThreadSet::leave(); 200 } 201 } 202 }; 203 204 void CMBitMap::clearAll() { 205 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 206 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */); 207 if (g1h->use_parallel_gc_threads()) { 208 uint n_workers = g1h->workers()->active_workers(); 209 ParClearNextMarkBitmapTask task(&cl, n_workers, false); 210 g1h->workers()->run_task(&task); 211 } else { 212 g1h->heap_region_iterate(&cl); 213 } 214 guarantee(cl.complete(), "Must have completed iteration."); 215 return; 216 } 217 218 void CMBitMap::markRange(MemRegion mr) { 219 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 220 assert(!mr.is_empty(), "unexpected empty region"); 221 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 222 ((HeapWord *) mr.end())), 223 "markRange memory region end is not card aligned"); 224 // convert address range into offset range 225 _bm.at_put_range(heapWordToOffset(mr.start()), 226 heapWordToOffset(mr.end()), true); 227 } 228 229 void CMBitMap::clearRange(MemRegion mr) { 230 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 231 assert(!mr.is_empty(), "unexpected empty region"); 232 // convert address range into offset range 233 _bm.at_put_range(heapWordToOffset(mr.start()), 234 heapWordToOffset(mr.end()), false); 235 } 236 237 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 238 HeapWord* end_addr) { 239 HeapWord* start = getNextMarkedWordAddress(addr); 240 start = MIN2(start, end_addr); 241 HeapWord* end = getNextUnmarkedWordAddress(start); 242 end = MIN2(end, end_addr); 243 assert(start <= end, "Consistency check"); 244 MemRegion mr(start, end); 245 if (!mr.is_empty()) { 246 clearRange(mr); 247 } 248 return mr; 249 } 250 251 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 252 _base(NULL), _cm(cm) 253 #ifdef ASSERT 254 , _drain_in_progress(false) 255 , _drain_in_progress_yields(false) 256 #endif 257 {} 258 259 bool CMMarkStack::allocate(size_t capacity) { 260 // allocate a stack of the requisite depth 261 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 262 if (!rs.is_reserved()) { 263 warning("ConcurrentMark MarkStack allocation failure"); 264 return false; 265 } 266 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 267 if (!_virtual_space.initialize(rs, rs.size())) { 268 warning("ConcurrentMark MarkStack backing store failure"); 269 // Release the virtual memory reserved for the marking stack 270 rs.release(); 271 return false; 272 } 273 assert(_virtual_space.committed_size() == rs.size(), 274 "Didn't reserve backing store for all of ConcurrentMark stack?"); 275 _base = (oop*) _virtual_space.low(); 276 setEmpty(); 277 _capacity = (jint) capacity; 278 _saved_index = -1; 279 _should_expand = false; 280 NOT_PRODUCT(_max_depth = 0); 281 return true; 282 } 283 284 void CMMarkStack::expand() { 285 // Called, during remark, if we've overflown the marking stack during marking. 286 assert(isEmpty(), "stack should been emptied while handling overflow"); 287 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 288 // Clear expansion flag 289 _should_expand = false; 290 if (_capacity == (jint) MarkStackSizeMax) { 291 if (PrintGCDetails && Verbose) { 292 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 293 } 294 return; 295 } 296 // Double capacity if possible 297 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 298 // Do not give up existing stack until we have managed to 299 // get the double capacity that we desired. 300 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 301 sizeof(oop))); 302 if (rs.is_reserved()) { 303 // Release the backing store associated with old stack 304 _virtual_space.release(); 305 // Reinitialize virtual space for new stack 306 if (!_virtual_space.initialize(rs, rs.size())) { 307 fatal("Not enough swap for expanded marking stack capacity"); 308 } 309 _base = (oop*)(_virtual_space.low()); 310 _index = 0; 311 _capacity = new_capacity; 312 } else { 313 if (PrintGCDetails && Verbose) { 314 // Failed to double capacity, continue; 315 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 316 SIZE_FORMAT"K to " SIZE_FORMAT"K", 317 _capacity / K, new_capacity / K); 318 } 319 } 320 } 321 322 void CMMarkStack::set_should_expand() { 323 // If we're resetting the marking state because of an 324 // marking stack overflow, record that we should, if 325 // possible, expand the stack. 326 _should_expand = _cm->has_overflown(); 327 } 328 329 CMMarkStack::~CMMarkStack() { 330 if (_base != NULL) { 331 _base = NULL; 332 _virtual_space.release(); 333 } 334 } 335 336 void CMMarkStack::par_push(oop ptr) { 337 while (true) { 338 if (isFull()) { 339 _overflow = true; 340 return; 341 } 342 // Otherwise... 343 jint index = _index; 344 jint next_index = index+1; 345 jint res = Atomic::cmpxchg(next_index, &_index, index); 346 if (res == index) { 347 _base[index] = ptr; 348 // Note that we don't maintain this atomically. We could, but it 349 // doesn't seem necessary. 350 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 351 return; 352 } 353 // Otherwise, we need to try again. 354 } 355 } 356 357 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 358 while (true) { 359 if (isFull()) { 360 _overflow = true; 361 return; 362 } 363 // Otherwise... 364 jint index = _index; 365 jint next_index = index + n; 366 if (next_index > _capacity) { 367 _overflow = true; 368 return; 369 } 370 jint res = Atomic::cmpxchg(next_index, &_index, index); 371 if (res == index) { 372 for (int i = 0; i < n; i++) { 373 int ind = index + i; 374 assert(ind < _capacity, "By overflow test above."); 375 _base[ind] = ptr_arr[i]; 376 } 377 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 378 return; 379 } 380 // Otherwise, we need to try again. 381 } 382 } 383 384 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 385 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 386 jint start = _index; 387 jint next_index = start + n; 388 if (next_index > _capacity) { 389 _overflow = true; 390 return; 391 } 392 // Otherwise. 393 _index = next_index; 394 for (int i = 0; i < n; i++) { 395 int ind = start + i; 396 assert(ind < _capacity, "By overflow test above."); 397 _base[ind] = ptr_arr[i]; 398 } 399 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 400 } 401 402 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 403 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 404 jint index = _index; 405 if (index == 0) { 406 *n = 0; 407 return false; 408 } else { 409 int k = MIN2(max, index); 410 jint new_ind = index - k; 411 for (int j = 0; j < k; j++) { 412 ptr_arr[j] = _base[new_ind + j]; 413 } 414 _index = new_ind; 415 *n = k; 416 return true; 417 } 418 } 419 420 template<class OopClosureClass> 421 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 422 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 423 || SafepointSynchronize::is_at_safepoint(), 424 "Drain recursion must be yield-safe."); 425 bool res = true; 426 debug_only(_drain_in_progress = true); 427 debug_only(_drain_in_progress_yields = yield_after); 428 while (!isEmpty()) { 429 oop newOop = pop(); 430 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 431 assert(newOop->is_oop(), "Expected an oop"); 432 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 433 "only grey objects on this stack"); 434 newOop->oop_iterate(cl); 435 if (yield_after && _cm->do_yield_check()) { 436 res = false; 437 break; 438 } 439 } 440 debug_only(_drain_in_progress = false); 441 return res; 442 } 443 444 void CMMarkStack::note_start_of_gc() { 445 assert(_saved_index == -1, 446 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 447 _saved_index = _index; 448 } 449 450 void CMMarkStack::note_end_of_gc() { 451 // This is intentionally a guarantee, instead of an assert. If we 452 // accidentally add something to the mark stack during GC, it 453 // will be a correctness issue so it's better if we crash. we'll 454 // only check this once per GC anyway, so it won't be a performance 455 // issue in any way. 456 guarantee(_saved_index == _index, 457 err_msg("saved index: %d index: %d", _saved_index, _index)); 458 _saved_index = -1; 459 } 460 461 void CMMarkStack::oops_do(OopClosure* f) { 462 assert(_saved_index == _index, 463 err_msg("saved index: %d index: %d", _saved_index, _index)); 464 for (int i = 0; i < _index; i += 1) { 465 f->do_oop(&_base[i]); 466 } 467 } 468 469 bool ConcurrentMark::not_yet_marked(oop obj) const { 470 return _g1h->is_obj_ill(obj); 471 } 472 473 CMRootRegions::CMRootRegions() : 474 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 475 _should_abort(false), _next_survivor(NULL) { } 476 477 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 478 _young_list = g1h->young_list(); 479 _cm = cm; 480 } 481 482 void CMRootRegions::prepare_for_scan() { 483 assert(!scan_in_progress(), "pre-condition"); 484 485 // Currently, only survivors can be root regions. 486 assert(_next_survivor == NULL, "pre-condition"); 487 _next_survivor = _young_list->first_survivor_region(); 488 _scan_in_progress = (_next_survivor != NULL); 489 _should_abort = false; 490 } 491 492 HeapRegion* CMRootRegions::claim_next() { 493 if (_should_abort) { 494 // If someone has set the should_abort flag, we return NULL to 495 // force the caller to bail out of their loop. 496 return NULL; 497 } 498 499 // Currently, only survivors can be root regions. 500 HeapRegion* res = _next_survivor; 501 if (res != NULL) { 502 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 503 // Read it again in case it changed while we were waiting for the lock. 504 res = _next_survivor; 505 if (res != NULL) { 506 if (res == _young_list->last_survivor_region()) { 507 // We just claimed the last survivor so store NULL to indicate 508 // that we're done. 509 _next_survivor = NULL; 510 } else { 511 _next_survivor = res->get_next_young_region(); 512 } 513 } else { 514 // Someone else claimed the last survivor while we were trying 515 // to take the lock so nothing else to do. 516 } 517 } 518 assert(res == NULL || res->is_survivor(), "post-condition"); 519 520 return res; 521 } 522 523 void CMRootRegions::scan_finished() { 524 assert(scan_in_progress(), "pre-condition"); 525 526 // Currently, only survivors can be root regions. 527 if (!_should_abort) { 528 assert(_next_survivor == NULL, "we should have claimed all survivors"); 529 } 530 _next_survivor = NULL; 531 532 { 533 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 534 _scan_in_progress = false; 535 RootRegionScan_lock->notify_all(); 536 } 537 } 538 539 bool CMRootRegions::wait_until_scan_finished() { 540 if (!scan_in_progress()) return false; 541 542 { 543 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 544 while (scan_in_progress()) { 545 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 546 } 547 } 548 return true; 549 } 550 551 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 552 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 553 #endif // _MSC_VER 554 555 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 556 return MAX2((n_par_threads + 2) / 4, 1U); 557 } 558 559 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 560 _g1h(g1h), 561 _markBitMap1(), 562 _markBitMap2(), 563 _parallel_marking_threads(0), 564 _max_parallel_marking_threads(0), 565 _sleep_factor(0.0), 566 _marking_task_overhead(1.0), 567 _cleanup_sleep_factor(0.0), 568 _cleanup_task_overhead(1.0), 569 _cleanup_list("Cleanup List"), 570 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 571 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> 572 CardTableModRefBS::card_shift, 573 false /* in_resource_area*/), 574 575 _prevMarkBitMap(&_markBitMap1), 576 _nextMarkBitMap(&_markBitMap2), 577 578 _markStack(this), 579 // _finger set in set_non_marking_state 580 581 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)), 582 // _active_tasks set in set_non_marking_state 583 // _tasks set inside the constructor 584 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 585 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 586 587 _has_overflown(false), 588 _concurrent(false), 589 _has_aborted(false), 590 _aborted_gc_id(GCId::undefined()), 591 _restart_for_overflow(false), 592 _concurrent_marking_in_progress(false), 593 594 // _verbose_level set below 595 596 _init_times(), 597 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 598 _cleanup_times(), 599 _total_counting_time(0.0), 600 _total_rs_scrub_time(0.0), 601 602 _parallel_workers(NULL), 603 604 _count_card_bitmaps(NULL), 605 _count_marked_bytes(NULL), 606 _completed_initialization(false) { 607 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 608 if (verbose_level < no_verbose) { 609 verbose_level = no_verbose; 610 } 611 if (verbose_level > high_verbose) { 612 verbose_level = high_verbose; 613 } 614 _verbose_level = verbose_level; 615 616 if (verbose_low()) { 617 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 618 "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end)); 619 } 620 621 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 622 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 623 624 // Create & start a ConcurrentMark thread. 625 _cmThread = new ConcurrentMarkThread(this); 626 assert(cmThread() != NULL, "CM Thread should have been created"); 627 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 628 if (_cmThread->osthread() == NULL) { 629 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 630 } 631 632 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 633 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 634 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 635 636 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 637 satb_qs.set_buffer_size(G1SATBBufferSize); 638 639 _root_regions.init(_g1h, this); 640 641 if (ConcGCThreads > ParallelGCThreads) { 642 warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") " 643 "than ParallelGCThreads (" UINTX_FORMAT ").", 644 ConcGCThreads, ParallelGCThreads); 645 return; 646 } 647 if (ParallelGCThreads == 0) { 648 // if we are not running with any parallel GC threads we will not 649 // spawn any marking threads either 650 _parallel_marking_threads = 0; 651 _max_parallel_marking_threads = 0; 652 _sleep_factor = 0.0; 653 _marking_task_overhead = 1.0; 654 } else { 655 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 656 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 657 // if both are set 658 _sleep_factor = 0.0; 659 _marking_task_overhead = 1.0; 660 } else if (G1MarkingOverheadPercent > 0) { 661 // We will calculate the number of parallel marking threads based 662 // on a target overhead with respect to the soft real-time goal 663 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 664 double overall_cm_overhead = 665 (double) MaxGCPauseMillis * marking_overhead / 666 (double) GCPauseIntervalMillis; 667 double cpu_ratio = 1.0 / (double) os::processor_count(); 668 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 669 double marking_task_overhead = 670 overall_cm_overhead / marking_thread_num * 671 (double) os::processor_count(); 672 double sleep_factor = 673 (1.0 - marking_task_overhead) / marking_task_overhead; 674 675 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num); 676 _sleep_factor = sleep_factor; 677 _marking_task_overhead = marking_task_overhead; 678 } else { 679 // Calculate the number of parallel marking threads by scaling 680 // the number of parallel GC threads. 681 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads); 682 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num); 683 _sleep_factor = 0.0; 684 _marking_task_overhead = 1.0; 685 } 686 687 assert(ConcGCThreads > 0, "Should have been set"); 688 _parallel_marking_threads = (uint) ConcGCThreads; 689 _max_parallel_marking_threads = _parallel_marking_threads; 690 691 if (parallel_marking_threads() > 1) { 692 _cleanup_task_overhead = 1.0; 693 } else { 694 _cleanup_task_overhead = marking_task_overhead(); 695 } 696 _cleanup_sleep_factor = 697 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 698 699 #if 0 700 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 701 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 702 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 703 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 704 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 705 #endif 706 707 guarantee(parallel_marking_threads() > 0, "peace of mind"); 708 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 709 _max_parallel_marking_threads, false, true); 710 if (_parallel_workers == NULL) { 711 vm_exit_during_initialization("Failed necessary allocation."); 712 } else { 713 _parallel_workers->initialize_workers(); 714 } 715 } 716 717 if (FLAG_IS_DEFAULT(MarkStackSize)) { 718 uintx mark_stack_size = 719 MIN2(MarkStackSizeMax, 720 MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE))); 721 // Verify that the calculated value for MarkStackSize is in range. 722 // It would be nice to use the private utility routine from Arguments. 723 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 724 warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): " 725 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 726 mark_stack_size, (uintx) 1, MarkStackSizeMax); 727 return; 728 } 729 FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size); 730 } else { 731 // Verify MarkStackSize is in range. 732 if (FLAG_IS_CMDLINE(MarkStackSize)) { 733 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 734 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 735 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): " 736 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 737 MarkStackSize, (uintx) 1, MarkStackSizeMax); 738 return; 739 } 740 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 741 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 742 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")" 743 " or for MarkStackSizeMax (" UINTX_FORMAT ")", 744 MarkStackSize, MarkStackSizeMax); 745 return; 746 } 747 } 748 } 749 } 750 751 if (!_markStack.allocate(MarkStackSize)) { 752 warning("Failed to allocate CM marking stack"); 753 return; 754 } 755 756 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 757 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 758 759 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 760 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 761 762 BitMap::idx_t card_bm_size = _card_bm.size(); 763 764 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 765 _active_tasks = _max_worker_id; 766 767 size_t max_regions = (size_t) _g1h->max_regions(); 768 for (uint i = 0; i < _max_worker_id; ++i) { 769 CMTaskQueue* task_queue = new CMTaskQueue(); 770 task_queue->initialize(); 771 _task_queues->register_queue(i, task_queue); 772 773 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 774 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 775 776 _tasks[i] = new CMTask(i, this, 777 _count_marked_bytes[i], 778 &_count_card_bitmaps[i], 779 task_queue, _task_queues); 780 781 _accum_task_vtime[i] = 0.0; 782 } 783 784 // Calculate the card number for the bottom of the heap. Used 785 // in biasing indexes into the accounting card bitmaps. 786 _heap_bottom_card_num = 787 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 788 CardTableModRefBS::card_shift); 789 790 // Clear all the liveness counting data 791 clear_all_count_data(); 792 793 // so that the call below can read a sensible value 794 _heap_start = g1h->reserved_region().start(); 795 set_non_marking_state(); 796 _completed_initialization = true; 797 } 798 799 void ConcurrentMark::reset() { 800 // Starting values for these two. This should be called in a STW 801 // phase. 802 MemRegion reserved = _g1h->g1_reserved(); 803 _heap_start = reserved.start(); 804 _heap_end = reserved.end(); 805 806 // Separated the asserts so that we know which one fires. 807 assert(_heap_start != NULL, "heap bounds should look ok"); 808 assert(_heap_end != NULL, "heap bounds should look ok"); 809 assert(_heap_start < _heap_end, "heap bounds should look ok"); 810 811 // Reset all the marking data structures and any necessary flags 812 reset_marking_state(); 813 814 if (verbose_low()) { 815 gclog_or_tty->print_cr("[global] resetting"); 816 } 817 818 // We do reset all of them, since different phases will use 819 // different number of active threads. So, it's easiest to have all 820 // of them ready. 821 for (uint i = 0; i < _max_worker_id; ++i) { 822 _tasks[i]->reset(_nextMarkBitMap); 823 } 824 825 // we need this to make sure that the flag is on during the evac 826 // pause with initial mark piggy-backed 827 set_concurrent_marking_in_progress(); 828 } 829 830 831 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 832 _markStack.set_should_expand(); 833 _markStack.setEmpty(); // Also clears the _markStack overflow flag 834 if (clear_overflow) { 835 clear_has_overflown(); 836 } else { 837 assert(has_overflown(), "pre-condition"); 838 } 839 _finger = _heap_start; 840 841 for (uint i = 0; i < _max_worker_id; ++i) { 842 CMTaskQueue* queue = _task_queues->queue(i); 843 queue->set_empty(); 844 } 845 } 846 847 void ConcurrentMark::set_concurrency(uint active_tasks) { 848 assert(active_tasks <= _max_worker_id, "we should not have more"); 849 850 _active_tasks = active_tasks; 851 // Need to update the three data structures below according to the 852 // number of active threads for this phase. 853 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 854 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 855 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 856 } 857 858 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 859 set_concurrency(active_tasks); 860 861 _concurrent = concurrent; 862 // We propagate this to all tasks, not just the active ones. 863 for (uint i = 0; i < _max_worker_id; ++i) 864 _tasks[i]->set_concurrent(concurrent); 865 866 if (concurrent) { 867 set_concurrent_marking_in_progress(); 868 } else { 869 // We currently assume that the concurrent flag has been set to 870 // false before we start remark. At this point we should also be 871 // in a STW phase. 872 assert(!concurrent_marking_in_progress(), "invariant"); 873 assert(out_of_regions(), 874 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT, 875 p2i(_finger), p2i(_heap_end))); 876 } 877 } 878 879 void ConcurrentMark::set_non_marking_state() { 880 // We set the global marking state to some default values when we're 881 // not doing marking. 882 reset_marking_state(); 883 _active_tasks = 0; 884 clear_concurrent_marking_in_progress(); 885 } 886 887 ConcurrentMark::~ConcurrentMark() { 888 // The ConcurrentMark instance is never freed. 889 ShouldNotReachHere(); 890 } 891 892 void ConcurrentMark::clearNextBitmap() { 893 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 894 895 // Make sure that the concurrent mark thread looks to still be in 896 // the current cycle. 897 guarantee(cmThread()->during_cycle(), "invariant"); 898 899 // We are finishing up the current cycle by clearing the next 900 // marking bitmap and getting it ready for the next cycle. During 901 // this time no other cycle can start. So, let's make sure that this 902 // is the case. 903 guarantee(!g1h->mark_in_progress(), "invariant"); 904 905 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */); 906 if (use_parallel_marking_threads()) { 907 ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true); 908 _parallel_workers->run_task(&task); 909 } else { 910 SuspendibleThreadSetJoiner sts; 911 g1h->heap_region_iterate(&cl); 912 } 913 914 // Clear the liveness counting data. If the marking has been aborted, the abort() 915 // call already did that. 916 if (cl.complete()) { 917 clear_all_count_data(); 918 } 919 920 // Repeat the asserts from above. 921 guarantee(cmThread()->during_cycle(), "invariant"); 922 guarantee(!g1h->mark_in_progress(), "invariant"); 923 } 924 925 class CheckBitmapClearHRClosure : public HeapRegionClosure { 926 CMBitMap* _bitmap; 927 bool _error; 928 public: 929 CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) { 930 } 931 932 virtual bool doHeapRegion(HeapRegion* r) { 933 return _bitmap->getNextMarkedWordAddress(r->bottom(), r->end()) != r->end(); 934 } 935 }; 936 937 bool ConcurrentMark::nextMarkBitmapIsClear() { 938 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 939 _g1h->heap_region_iterate(&cl); 940 return cl.complete(); 941 } 942 943 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 944 public: 945 bool doHeapRegion(HeapRegion* r) { 946 if (!r->continuesHumongous()) { 947 r->note_start_of_marking(); 948 } 949 return false; 950 } 951 }; 952 953 void ConcurrentMark::checkpointRootsInitialPre() { 954 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 955 G1CollectorPolicy* g1p = g1h->g1_policy(); 956 957 _has_aborted = false; 958 959 #ifndef PRODUCT 960 if (G1PrintReachableAtInitialMark) { 961 print_reachable("at-cycle-start", 962 VerifyOption_G1UsePrevMarking, true /* all */); 963 } 964 #endif 965 966 // Initialize marking structures. This has to be done in a STW phase. 967 reset(); 968 969 // For each region note start of marking. 970 NoteStartOfMarkHRClosure startcl; 971 g1h->heap_region_iterate(&startcl); 972 } 973 974 975 void ConcurrentMark::checkpointRootsInitialPost() { 976 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 977 978 // If we force an overflow during remark, the remark operation will 979 // actually abort and we'll restart concurrent marking. If we always 980 // force an overflow during remark we'll never actually complete the 981 // marking phase. So, we initialize this here, at the start of the 982 // cycle, so that at the remaining overflow number will decrease at 983 // every remark and we'll eventually not need to cause one. 984 force_overflow_stw()->init(); 985 986 // Start Concurrent Marking weak-reference discovery. 987 ReferenceProcessor* rp = g1h->ref_processor_cm(); 988 // enable ("weak") refs discovery 989 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 990 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 991 992 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 993 // This is the start of the marking cycle, we're expected all 994 // threads to have SATB queues with active set to false. 995 satb_mq_set.set_active_all_threads(true, /* new active value */ 996 false /* expected_active */); 997 998 _root_regions.prepare_for_scan(); 999 1000 // update_g1_committed() will be called at the end of an evac pause 1001 // when marking is on. So, it's also called at the end of the 1002 // initial-mark pause to update the heap end, if the heap expands 1003 // during it. No need to call it here. 1004 } 1005 1006 /* 1007 * Notice that in the next two methods, we actually leave the STS 1008 * during the barrier sync and join it immediately afterwards. If we 1009 * do not do this, the following deadlock can occur: one thread could 1010 * be in the barrier sync code, waiting for the other thread to also 1011 * sync up, whereas another one could be trying to yield, while also 1012 * waiting for the other threads to sync up too. 1013 * 1014 * Note, however, that this code is also used during remark and in 1015 * this case we should not attempt to leave / enter the STS, otherwise 1016 * we'll either hit an assert (debug / fastdebug) or deadlock 1017 * (product). So we should only leave / enter the STS if we are 1018 * operating concurrently. 1019 * 1020 * Because the thread that does the sync barrier has left the STS, it 1021 * is possible to be suspended for a Full GC or an evacuation pause 1022 * could occur. This is actually safe, since the entering the sync 1023 * barrier is one of the last things do_marking_step() does, and it 1024 * doesn't manipulate any data structures afterwards. 1025 */ 1026 1027 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 1028 if (verbose_low()) { 1029 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id); 1030 } 1031 1032 if (concurrent()) { 1033 SuspendibleThreadSet::leave(); 1034 } 1035 1036 bool barrier_aborted = !_first_overflow_barrier_sync.enter(); 1037 1038 if (concurrent()) { 1039 SuspendibleThreadSet::join(); 1040 } 1041 // at this point everyone should have synced up and not be doing any 1042 // more work 1043 1044 if (verbose_low()) { 1045 if (barrier_aborted) { 1046 gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id); 1047 } else { 1048 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); 1049 } 1050 } 1051 1052 if (barrier_aborted) { 1053 // If the barrier aborted we ignore the overflow condition and 1054 // just abort the whole marking phase as quickly as possible. 1055 return; 1056 } 1057 1058 // If we're executing the concurrent phase of marking, reset the marking 1059 // state; otherwise the marking state is reset after reference processing, 1060 // during the remark pause. 1061 // If we reset here as a result of an overflow during the remark we will 1062 // see assertion failures from any subsequent set_concurrency_and_phase() 1063 // calls. 1064 if (concurrent()) { 1065 // let the task associated with with worker 0 do this 1066 if (worker_id == 0) { 1067 // task 0 is responsible for clearing the global data structures 1068 // We should be here because of an overflow. During STW we should 1069 // not clear the overflow flag since we rely on it being true when 1070 // we exit this method to abort the pause and restart concurrent 1071 // marking. 1072 reset_marking_state(true /* clear_overflow */); 1073 force_overflow()->update(); 1074 1075 if (G1Log::fine()) { 1076 gclog_or_tty->gclog_stamp(concurrent_gc_id()); 1077 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 1078 } 1079 } 1080 } 1081 1082 // after this, each task should reset its own data structures then 1083 // then go into the second barrier 1084 } 1085 1086 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 1087 if (verbose_low()) { 1088 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id); 1089 } 1090 1091 if (concurrent()) { 1092 SuspendibleThreadSet::leave(); 1093 } 1094 1095 bool barrier_aborted = !_second_overflow_barrier_sync.enter(); 1096 1097 if (concurrent()) { 1098 SuspendibleThreadSet::join(); 1099 } 1100 // at this point everything should be re-initialized and ready to go 1101 1102 if (verbose_low()) { 1103 if (barrier_aborted) { 1104 gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id); 1105 } else { 1106 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); 1107 } 1108 } 1109 } 1110 1111 #ifndef PRODUCT 1112 void ForceOverflowSettings::init() { 1113 _num_remaining = G1ConcMarkForceOverflow; 1114 _force = false; 1115 update(); 1116 } 1117 1118 void ForceOverflowSettings::update() { 1119 if (_num_remaining > 0) { 1120 _num_remaining -= 1; 1121 _force = true; 1122 } else { 1123 _force = false; 1124 } 1125 } 1126 1127 bool ForceOverflowSettings::should_force() { 1128 if (_force) { 1129 _force = false; 1130 return true; 1131 } else { 1132 return false; 1133 } 1134 } 1135 #endif // !PRODUCT 1136 1137 class CMConcurrentMarkingTask: public AbstractGangTask { 1138 private: 1139 ConcurrentMark* _cm; 1140 ConcurrentMarkThread* _cmt; 1141 1142 public: 1143 void work(uint worker_id) { 1144 assert(Thread::current()->is_ConcurrentGC_thread(), 1145 "this should only be done by a conc GC thread"); 1146 ResourceMark rm; 1147 1148 double start_vtime = os::elapsedVTime(); 1149 1150 SuspendibleThreadSet::join(); 1151 1152 assert(worker_id < _cm->active_tasks(), "invariant"); 1153 CMTask* the_task = _cm->task(worker_id); 1154 the_task->record_start_time(); 1155 if (!_cm->has_aborted()) { 1156 do { 1157 double start_vtime_sec = os::elapsedVTime(); 1158 double start_time_sec = os::elapsedTime(); 1159 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1160 1161 the_task->do_marking_step(mark_step_duration_ms, 1162 true /* do_termination */, 1163 false /* is_serial*/); 1164 1165 double end_time_sec = os::elapsedTime(); 1166 double end_vtime_sec = os::elapsedVTime(); 1167 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1168 double elapsed_time_sec = end_time_sec - start_time_sec; 1169 _cm->clear_has_overflown(); 1170 1171 bool ret = _cm->do_yield_check(worker_id); 1172 1173 jlong sleep_time_ms; 1174 if (!_cm->has_aborted() && the_task->has_aborted()) { 1175 sleep_time_ms = 1176 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1177 SuspendibleThreadSet::leave(); 1178 os::sleep(Thread::current(), sleep_time_ms, false); 1179 SuspendibleThreadSet::join(); 1180 } 1181 double end_time2_sec = os::elapsedTime(); 1182 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1183 1184 #if 0 1185 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1186 "overhead %1.4lf", 1187 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1188 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1189 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1190 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1191 #endif 1192 } while (!_cm->has_aborted() && the_task->has_aborted()); 1193 } 1194 the_task->record_end_time(); 1195 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1196 1197 SuspendibleThreadSet::leave(); 1198 1199 double end_vtime = os::elapsedVTime(); 1200 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1201 } 1202 1203 CMConcurrentMarkingTask(ConcurrentMark* cm, 1204 ConcurrentMarkThread* cmt) : 1205 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1206 1207 ~CMConcurrentMarkingTask() { } 1208 }; 1209 1210 // Calculates the number of active workers for a concurrent 1211 // phase. 1212 uint ConcurrentMark::calc_parallel_marking_threads() { 1213 if (G1CollectedHeap::use_parallel_gc_threads()) { 1214 uint n_conc_workers = 0; 1215 if (!UseDynamicNumberOfGCThreads || 1216 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1217 !ForceDynamicNumberOfGCThreads)) { 1218 n_conc_workers = max_parallel_marking_threads(); 1219 } else { 1220 n_conc_workers = 1221 AdaptiveSizePolicy::calc_default_active_workers( 1222 max_parallel_marking_threads(), 1223 1, /* Minimum workers */ 1224 parallel_marking_threads(), 1225 Threads::number_of_non_daemon_threads()); 1226 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1227 // that scaling has already gone into "_max_parallel_marking_threads". 1228 } 1229 assert(n_conc_workers > 0, "Always need at least 1"); 1230 return n_conc_workers; 1231 } 1232 // If we are not running with any parallel GC threads we will not 1233 // have spawned any marking threads either. Hence the number of 1234 // concurrent workers should be 0. 1235 return 0; 1236 } 1237 1238 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1239 // Currently, only survivors can be root regions. 1240 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1241 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1242 1243 const uintx interval = PrefetchScanIntervalInBytes; 1244 HeapWord* curr = hr->bottom(); 1245 const HeapWord* end = hr->top(); 1246 while (curr < end) { 1247 Prefetch::read(curr, interval); 1248 oop obj = oop(curr); 1249 int size = obj->oop_iterate(&cl); 1250 assert(size == obj->size(), "sanity"); 1251 curr += size; 1252 } 1253 } 1254 1255 class CMRootRegionScanTask : public AbstractGangTask { 1256 private: 1257 ConcurrentMark* _cm; 1258 1259 public: 1260 CMRootRegionScanTask(ConcurrentMark* cm) : 1261 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1262 1263 void work(uint worker_id) { 1264 assert(Thread::current()->is_ConcurrentGC_thread(), 1265 "this should only be done by a conc GC thread"); 1266 1267 CMRootRegions* root_regions = _cm->root_regions(); 1268 HeapRegion* hr = root_regions->claim_next(); 1269 while (hr != NULL) { 1270 _cm->scanRootRegion(hr, worker_id); 1271 hr = root_regions->claim_next(); 1272 } 1273 } 1274 }; 1275 1276 void ConcurrentMark::scanRootRegions() { 1277 // Start of concurrent marking. 1278 ClassLoaderDataGraph::clear_claimed_marks(); 1279 1280 // scan_in_progress() will have been set to true only if there was 1281 // at least one root region to scan. So, if it's false, we 1282 // should not attempt to do any further work. 1283 if (root_regions()->scan_in_progress()) { 1284 _parallel_marking_threads = calc_parallel_marking_threads(); 1285 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1286 "Maximum number of marking threads exceeded"); 1287 uint active_workers = MAX2(1U, parallel_marking_threads()); 1288 1289 CMRootRegionScanTask task(this); 1290 if (use_parallel_marking_threads()) { 1291 _parallel_workers->set_active_workers((int) active_workers); 1292 _parallel_workers->run_task(&task); 1293 } else { 1294 task.work(0); 1295 } 1296 1297 // It's possible that has_aborted() is true here without actually 1298 // aborting the survivor scan earlier. This is OK as it's 1299 // mainly used for sanity checking. 1300 root_regions()->scan_finished(); 1301 } 1302 } 1303 1304 void ConcurrentMark::markFromRoots() { 1305 // we might be tempted to assert that: 1306 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1307 // "inconsistent argument?"); 1308 // However that wouldn't be right, because it's possible that 1309 // a safepoint is indeed in progress as a younger generation 1310 // stop-the-world GC happens even as we mark in this generation. 1311 1312 _restart_for_overflow = false; 1313 force_overflow_conc()->init(); 1314 1315 // _g1h has _n_par_threads 1316 _parallel_marking_threads = calc_parallel_marking_threads(); 1317 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1318 "Maximum number of marking threads exceeded"); 1319 1320 uint active_workers = MAX2(1U, parallel_marking_threads()); 1321 1322 // Parallel task terminator is set in "set_concurrency_and_phase()" 1323 set_concurrency_and_phase(active_workers, true /* concurrent */); 1324 1325 CMConcurrentMarkingTask markingTask(this, cmThread()); 1326 if (use_parallel_marking_threads()) { 1327 _parallel_workers->set_active_workers((int)active_workers); 1328 // Don't set _n_par_threads because it affects MT in process_roots() 1329 // and the decisions on that MT processing is made elsewhere. 1330 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1331 _parallel_workers->run_task(&markingTask); 1332 } else { 1333 markingTask.work(0); 1334 } 1335 print_stats(); 1336 } 1337 1338 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1339 // world is stopped at this checkpoint 1340 assert(SafepointSynchronize::is_at_safepoint(), 1341 "world should be stopped"); 1342 1343 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1344 1345 // If a full collection has happened, we shouldn't do this. 1346 if (has_aborted()) { 1347 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1348 return; 1349 } 1350 1351 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1352 1353 if (VerifyDuringGC) { 1354 HandleMark hm; // handle scope 1355 Universe::heap()->prepare_for_verify(); 1356 Universe::verify(VerifyOption_G1UsePrevMarking, 1357 " VerifyDuringGC:(before)"); 1358 } 1359 g1h->check_bitmaps("Remark Start"); 1360 1361 G1CollectorPolicy* g1p = g1h->g1_policy(); 1362 g1p->record_concurrent_mark_remark_start(); 1363 1364 double start = os::elapsedTime(); 1365 1366 checkpointRootsFinalWork(); 1367 1368 double mark_work_end = os::elapsedTime(); 1369 1370 weakRefsWork(clear_all_soft_refs); 1371 1372 if (has_overflown()) { 1373 // Oops. We overflowed. Restart concurrent marking. 1374 _restart_for_overflow = true; 1375 if (G1TraceMarkStackOverflow) { 1376 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1377 } 1378 1379 // Verify the heap w.r.t. the previous marking bitmap. 1380 if (VerifyDuringGC) { 1381 HandleMark hm; // handle scope 1382 Universe::heap()->prepare_for_verify(); 1383 Universe::verify(VerifyOption_G1UsePrevMarking, 1384 " VerifyDuringGC:(overflow)"); 1385 } 1386 1387 // Clear the marking state because we will be restarting 1388 // marking due to overflowing the global mark stack. 1389 reset_marking_state(); 1390 } else { 1391 // Aggregate the per-task counting data that we have accumulated 1392 // while marking. 1393 aggregate_count_data(); 1394 1395 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1396 // We're done with marking. 1397 // This is the end of the marking cycle, we're expected all 1398 // threads to have SATB queues with active set to true. 1399 satb_mq_set.set_active_all_threads(false, /* new active value */ 1400 true /* expected_active */); 1401 1402 if (VerifyDuringGC) { 1403 HandleMark hm; // handle scope 1404 Universe::heap()->prepare_for_verify(); 1405 Universe::verify(VerifyOption_G1UseNextMarking, 1406 " VerifyDuringGC:(after)"); 1407 } 1408 g1h->check_bitmaps("Remark End"); 1409 assert(!restart_for_overflow(), "sanity"); 1410 // Completely reset the marking state since marking completed 1411 set_non_marking_state(); 1412 } 1413 1414 // Expand the marking stack, if we have to and if we can. 1415 if (_markStack.should_expand()) { 1416 _markStack.expand(); 1417 } 1418 1419 // Statistics 1420 double now = os::elapsedTime(); 1421 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1422 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1423 _remark_times.add((now - start) * 1000.0); 1424 1425 g1p->record_concurrent_mark_remark_end(); 1426 1427 G1CMIsAliveClosure is_alive(g1h); 1428 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1429 } 1430 1431 // Base class of the closures that finalize and verify the 1432 // liveness counting data. 1433 class CMCountDataClosureBase: public HeapRegionClosure { 1434 protected: 1435 G1CollectedHeap* _g1h; 1436 ConcurrentMark* _cm; 1437 CardTableModRefBS* _ct_bs; 1438 1439 BitMap* _region_bm; 1440 BitMap* _card_bm; 1441 1442 // Takes a region that's not empty (i.e., it has at least one 1443 // live object in it and sets its corresponding bit on the region 1444 // bitmap to 1. If the region is "starts humongous" it will also set 1445 // to 1 the bits on the region bitmap that correspond to its 1446 // associated "continues humongous" regions. 1447 void set_bit_for_region(HeapRegion* hr) { 1448 assert(!hr->continuesHumongous(), "should have filtered those out"); 1449 1450 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1451 if (!hr->startsHumongous()) { 1452 // Normal (non-humongous) case: just set the bit. 1453 _region_bm->par_at_put(index, true); 1454 } else { 1455 // Starts humongous case: calculate how many regions are part of 1456 // this humongous region and then set the bit range. 1457 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1458 _region_bm->par_at_put_range(index, end_index, true); 1459 } 1460 } 1461 1462 public: 1463 CMCountDataClosureBase(G1CollectedHeap* g1h, 1464 BitMap* region_bm, BitMap* card_bm): 1465 _g1h(g1h), _cm(g1h->concurrent_mark()), 1466 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 1467 _region_bm(region_bm), _card_bm(card_bm) { } 1468 }; 1469 1470 // Closure that calculates the # live objects per region. Used 1471 // for verification purposes during the cleanup pause. 1472 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1473 CMBitMapRO* _bm; 1474 size_t _region_marked_bytes; 1475 1476 public: 1477 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1478 BitMap* region_bm, BitMap* card_bm) : 1479 CMCountDataClosureBase(g1h, region_bm, card_bm), 1480 _bm(bm), _region_marked_bytes(0) { } 1481 1482 bool doHeapRegion(HeapRegion* hr) { 1483 1484 if (hr->continuesHumongous()) { 1485 // We will ignore these here and process them when their 1486 // associated "starts humongous" region is processed (see 1487 // set_bit_for_heap_region()). Note that we cannot rely on their 1488 // associated "starts humongous" region to have their bit set to 1489 // 1 since, due to the region chunking in the parallel region 1490 // iteration, a "continues humongous" region might be visited 1491 // before its associated "starts humongous". 1492 return false; 1493 } 1494 1495 HeapWord* ntams = hr->next_top_at_mark_start(); 1496 HeapWord* start = hr->bottom(); 1497 1498 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1499 err_msg("Preconditions not met - " 1500 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT, 1501 p2i(start), p2i(ntams), p2i(hr->end()))); 1502 1503 // Find the first marked object at or after "start". 1504 start = _bm->getNextMarkedWordAddress(start, ntams); 1505 1506 size_t marked_bytes = 0; 1507 1508 while (start < ntams) { 1509 oop obj = oop(start); 1510 int obj_sz = obj->size(); 1511 HeapWord* obj_end = start + obj_sz; 1512 1513 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1514 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1515 1516 // Note: if we're looking at the last region in heap - obj_end 1517 // could be actually just beyond the end of the heap; end_idx 1518 // will then correspond to a (non-existent) card that is also 1519 // just beyond the heap. 1520 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1521 // end of object is not card aligned - increment to cover 1522 // all the cards spanned by the object 1523 end_idx += 1; 1524 } 1525 1526 // Set the bits in the card BM for the cards spanned by this object. 1527 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1528 1529 // Add the size of this object to the number of marked bytes. 1530 marked_bytes += (size_t)obj_sz * HeapWordSize; 1531 1532 // Find the next marked object after this one. 1533 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1534 } 1535 1536 // Mark the allocated-since-marking portion... 1537 HeapWord* top = hr->top(); 1538 if (ntams < top) { 1539 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1540 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1541 1542 // Note: if we're looking at the last region in heap - top 1543 // could be actually just beyond the end of the heap; end_idx 1544 // will then correspond to a (non-existent) card that is also 1545 // just beyond the heap. 1546 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1547 // end of object is not card aligned - increment to cover 1548 // all the cards spanned by the object 1549 end_idx += 1; 1550 } 1551 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1552 1553 // This definitely means the region has live objects. 1554 set_bit_for_region(hr); 1555 } 1556 1557 // Update the live region bitmap. 1558 if (marked_bytes > 0) { 1559 set_bit_for_region(hr); 1560 } 1561 1562 // Set the marked bytes for the current region so that 1563 // it can be queried by a calling verification routine 1564 _region_marked_bytes = marked_bytes; 1565 1566 return false; 1567 } 1568 1569 size_t region_marked_bytes() const { return _region_marked_bytes; } 1570 }; 1571 1572 // Heap region closure used for verifying the counting data 1573 // that was accumulated concurrently and aggregated during 1574 // the remark pause. This closure is applied to the heap 1575 // regions during the STW cleanup pause. 1576 1577 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1578 G1CollectedHeap* _g1h; 1579 ConcurrentMark* _cm; 1580 CalcLiveObjectsClosure _calc_cl; 1581 BitMap* _region_bm; // Region BM to be verified 1582 BitMap* _card_bm; // Card BM to be verified 1583 bool _verbose; // verbose output? 1584 1585 BitMap* _exp_region_bm; // Expected Region BM values 1586 BitMap* _exp_card_bm; // Expected card BM values 1587 1588 int _failures; 1589 1590 public: 1591 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1592 BitMap* region_bm, 1593 BitMap* card_bm, 1594 BitMap* exp_region_bm, 1595 BitMap* exp_card_bm, 1596 bool verbose) : 1597 _g1h(g1h), _cm(g1h->concurrent_mark()), 1598 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1599 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1600 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1601 _failures(0) { } 1602 1603 int failures() const { return _failures; } 1604 1605 bool doHeapRegion(HeapRegion* hr) { 1606 if (hr->continuesHumongous()) { 1607 // We will ignore these here and process them when their 1608 // associated "starts humongous" region is processed (see 1609 // set_bit_for_heap_region()). Note that we cannot rely on their 1610 // associated "starts humongous" region to have their bit set to 1611 // 1 since, due to the region chunking in the parallel region 1612 // iteration, a "continues humongous" region might be visited 1613 // before its associated "starts humongous". 1614 return false; 1615 } 1616 1617 int failures = 0; 1618 1619 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1620 // this region and set the corresponding bits in the expected region 1621 // and card bitmaps. 1622 bool res = _calc_cl.doHeapRegion(hr); 1623 assert(res == false, "should be continuing"); 1624 1625 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1626 Mutex::_no_safepoint_check_flag); 1627 1628 // Verify the marked bytes for this region. 1629 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1630 size_t act_marked_bytes = hr->next_marked_bytes(); 1631 1632 // We're not OK if expected marked bytes > actual marked bytes. It means 1633 // we have missed accounting some objects during the actual marking. 1634 if (exp_marked_bytes > act_marked_bytes) { 1635 if (_verbose) { 1636 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1637 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1638 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1639 } 1640 failures += 1; 1641 } 1642 1643 // Verify the bit, for this region, in the actual and expected 1644 // (which was just calculated) region bit maps. 1645 // We're not OK if the bit in the calculated expected region 1646 // bitmap is set and the bit in the actual region bitmap is not. 1647 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1648 1649 bool expected = _exp_region_bm->at(index); 1650 bool actual = _region_bm->at(index); 1651 if (expected && !actual) { 1652 if (_verbose) { 1653 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1654 "expected: %s, actual: %s", 1655 hr->hrs_index(), 1656 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1657 } 1658 failures += 1; 1659 } 1660 1661 // Verify that the card bit maps for the cards spanned by the current 1662 // region match. We have an error if we have a set bit in the expected 1663 // bit map and the corresponding bit in the actual bitmap is not set. 1664 1665 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1666 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1667 1668 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1669 expected = _exp_card_bm->at(i); 1670 actual = _card_bm->at(i); 1671 1672 if (expected && !actual) { 1673 if (_verbose) { 1674 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1675 "expected: %s, actual: %s", 1676 hr->hrs_index(), i, 1677 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1678 } 1679 failures += 1; 1680 } 1681 } 1682 1683 if (failures > 0 && _verbose) { 1684 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1685 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1686 HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()), 1687 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1688 } 1689 1690 _failures += failures; 1691 1692 // We could stop iteration over the heap when we 1693 // find the first violating region by returning true. 1694 return false; 1695 } 1696 }; 1697 1698 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1699 protected: 1700 G1CollectedHeap* _g1h; 1701 ConcurrentMark* _cm; 1702 BitMap* _actual_region_bm; 1703 BitMap* _actual_card_bm; 1704 1705 uint _n_workers; 1706 1707 BitMap* _expected_region_bm; 1708 BitMap* _expected_card_bm; 1709 1710 int _failures; 1711 bool _verbose; 1712 1713 public: 1714 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1715 BitMap* region_bm, BitMap* card_bm, 1716 BitMap* expected_region_bm, BitMap* expected_card_bm) 1717 : AbstractGangTask("G1 verify final counting"), 1718 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1719 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1720 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1721 _failures(0), _verbose(false), 1722 _n_workers(0) { 1723 assert(VerifyDuringGC, "don't call this otherwise"); 1724 1725 // Use the value already set as the number of active threads 1726 // in the call to run_task(). 1727 if (G1CollectedHeap::use_parallel_gc_threads()) { 1728 assert( _g1h->workers()->active_workers() > 0, 1729 "Should have been previously set"); 1730 _n_workers = _g1h->workers()->active_workers(); 1731 } else { 1732 _n_workers = 1; 1733 } 1734 1735 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1736 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1737 1738 _verbose = _cm->verbose_medium(); 1739 } 1740 1741 void work(uint worker_id) { 1742 assert(worker_id < _n_workers, "invariant"); 1743 1744 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1745 _actual_region_bm, _actual_card_bm, 1746 _expected_region_bm, 1747 _expected_card_bm, 1748 _verbose); 1749 1750 if (G1CollectedHeap::use_parallel_gc_threads()) { 1751 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1752 worker_id, 1753 _n_workers, 1754 HeapRegion::VerifyCountClaimValue); 1755 } else { 1756 _g1h->heap_region_iterate(&verify_cl); 1757 } 1758 1759 Atomic::add(verify_cl.failures(), &_failures); 1760 } 1761 1762 int failures() const { return _failures; } 1763 }; 1764 1765 // Closure that finalizes the liveness counting data. 1766 // Used during the cleanup pause. 1767 // Sets the bits corresponding to the interval [NTAMS, top] 1768 // (which contains the implicitly live objects) in the 1769 // card liveness bitmap. Also sets the bit for each region, 1770 // containing live data, in the region liveness bitmap. 1771 1772 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1773 public: 1774 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1775 BitMap* region_bm, 1776 BitMap* card_bm) : 1777 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1778 1779 bool doHeapRegion(HeapRegion* hr) { 1780 1781 if (hr->continuesHumongous()) { 1782 // We will ignore these here and process them when their 1783 // associated "starts humongous" region is processed (see 1784 // set_bit_for_heap_region()). Note that we cannot rely on their 1785 // associated "starts humongous" region to have their bit set to 1786 // 1 since, due to the region chunking in the parallel region 1787 // iteration, a "continues humongous" region might be visited 1788 // before its associated "starts humongous". 1789 return false; 1790 } 1791 1792 HeapWord* ntams = hr->next_top_at_mark_start(); 1793 HeapWord* top = hr->top(); 1794 1795 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1796 1797 // Mark the allocated-since-marking portion... 1798 if (ntams < top) { 1799 // This definitely means the region has live objects. 1800 set_bit_for_region(hr); 1801 1802 // Now set the bits in the card bitmap for [ntams, top) 1803 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1804 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1805 1806 // Note: if we're looking at the last region in heap - top 1807 // could be actually just beyond the end of the heap; end_idx 1808 // will then correspond to a (non-existent) card that is also 1809 // just beyond the heap. 1810 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1811 // end of object is not card aligned - increment to cover 1812 // all the cards spanned by the object 1813 end_idx += 1; 1814 } 1815 1816 assert(end_idx <= _card_bm->size(), 1817 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1818 end_idx, _card_bm->size())); 1819 assert(start_idx < _card_bm->size(), 1820 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1821 start_idx, _card_bm->size())); 1822 1823 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1824 } 1825 1826 // Set the bit for the region if it contains live data 1827 if (hr->next_marked_bytes() > 0) { 1828 set_bit_for_region(hr); 1829 } 1830 1831 return false; 1832 } 1833 }; 1834 1835 class G1ParFinalCountTask: public AbstractGangTask { 1836 protected: 1837 G1CollectedHeap* _g1h; 1838 ConcurrentMark* _cm; 1839 BitMap* _actual_region_bm; 1840 BitMap* _actual_card_bm; 1841 1842 uint _n_workers; 1843 1844 public: 1845 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1846 : AbstractGangTask("G1 final counting"), 1847 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1848 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1849 _n_workers(0) { 1850 // Use the value already set as the number of active threads 1851 // in the call to run_task(). 1852 if (G1CollectedHeap::use_parallel_gc_threads()) { 1853 assert( _g1h->workers()->active_workers() > 0, 1854 "Should have been previously set"); 1855 _n_workers = _g1h->workers()->active_workers(); 1856 } else { 1857 _n_workers = 1; 1858 } 1859 } 1860 1861 void work(uint worker_id) { 1862 assert(worker_id < _n_workers, "invariant"); 1863 1864 FinalCountDataUpdateClosure final_update_cl(_g1h, 1865 _actual_region_bm, 1866 _actual_card_bm); 1867 1868 if (G1CollectedHeap::use_parallel_gc_threads()) { 1869 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1870 worker_id, 1871 _n_workers, 1872 HeapRegion::FinalCountClaimValue); 1873 } else { 1874 _g1h->heap_region_iterate(&final_update_cl); 1875 } 1876 } 1877 }; 1878 1879 class G1ParNoteEndTask; 1880 1881 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1882 G1CollectedHeap* _g1; 1883 size_t _max_live_bytes; 1884 uint _regions_claimed; 1885 size_t _freed_bytes; 1886 FreeRegionList* _local_cleanup_list; 1887 HeapRegionSetCount _old_regions_removed; 1888 HeapRegionSetCount _humongous_regions_removed; 1889 HRRSCleanupTask* _hrrs_cleanup_task; 1890 double _claimed_region_time; 1891 double _max_region_time; 1892 1893 public: 1894 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1895 FreeRegionList* local_cleanup_list, 1896 HRRSCleanupTask* hrrs_cleanup_task) : 1897 _g1(g1), 1898 _max_live_bytes(0), _regions_claimed(0), 1899 _freed_bytes(0), 1900 _claimed_region_time(0.0), _max_region_time(0.0), 1901 _local_cleanup_list(local_cleanup_list), 1902 _old_regions_removed(), 1903 _humongous_regions_removed(), 1904 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1905 1906 size_t freed_bytes() { return _freed_bytes; } 1907 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; } 1908 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } 1909 1910 bool doHeapRegion(HeapRegion *hr) { 1911 if (hr->continuesHumongous()) { 1912 return false; 1913 } 1914 // We use a claim value of zero here because all regions 1915 // were claimed with value 1 in the FinalCount task. 1916 _g1->reset_gc_time_stamps(hr); 1917 double start = os::elapsedTime(); 1918 _regions_claimed++; 1919 hr->note_end_of_marking(); 1920 _max_live_bytes += hr->max_live_bytes(); 1921 1922 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1923 _freed_bytes += hr->used(); 1924 hr->set_containing_set(NULL); 1925 if (hr->isHumongous()) { 1926 assert(hr->startsHumongous(), "we should only see starts humongous"); 1927 _humongous_regions_removed.increment(1u, hr->capacity()); 1928 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1929 } else { 1930 _old_regions_removed.increment(1u, hr->capacity()); 1931 _g1->free_region(hr, _local_cleanup_list, true); 1932 } 1933 } else { 1934 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1935 } 1936 1937 double region_time = (os::elapsedTime() - start); 1938 _claimed_region_time += region_time; 1939 if (region_time > _max_region_time) { 1940 _max_region_time = region_time; 1941 } 1942 return false; 1943 } 1944 1945 size_t max_live_bytes() { return _max_live_bytes; } 1946 uint regions_claimed() { return _regions_claimed; } 1947 double claimed_region_time_sec() { return _claimed_region_time; } 1948 double max_region_time_sec() { return _max_region_time; } 1949 }; 1950 1951 class G1ParNoteEndTask: public AbstractGangTask { 1952 friend class G1NoteEndOfConcMarkClosure; 1953 1954 protected: 1955 G1CollectedHeap* _g1h; 1956 size_t _max_live_bytes; 1957 size_t _freed_bytes; 1958 FreeRegionList* _cleanup_list; 1959 1960 public: 1961 G1ParNoteEndTask(G1CollectedHeap* g1h, 1962 FreeRegionList* cleanup_list) : 1963 AbstractGangTask("G1 note end"), _g1h(g1h), 1964 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1965 1966 void work(uint worker_id) { 1967 double start = os::elapsedTime(); 1968 FreeRegionList local_cleanup_list("Local Cleanup List"); 1969 HRRSCleanupTask hrrs_cleanup_task; 1970 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1971 &hrrs_cleanup_task); 1972 if (G1CollectedHeap::use_parallel_gc_threads()) { 1973 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1974 _g1h->workers()->active_workers(), 1975 HeapRegion::NoteEndClaimValue); 1976 } else { 1977 _g1h->heap_region_iterate(&g1_note_end); 1978 } 1979 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1980 1981 // Now update the lists 1982 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1983 { 1984 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1985 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1986 _max_live_bytes += g1_note_end.max_live_bytes(); 1987 _freed_bytes += g1_note_end.freed_bytes(); 1988 1989 // If we iterate over the global cleanup list at the end of 1990 // cleanup to do this printing we will not guarantee to only 1991 // generate output for the newly-reclaimed regions (the list 1992 // might not be empty at the beginning of cleanup; we might 1993 // still be working on its previous contents). So we do the 1994 // printing here, before we append the new regions to the global 1995 // cleanup list. 1996 1997 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1998 if (hr_printer->is_active()) { 1999 FreeRegionListIterator iter(&local_cleanup_list); 2000 while (iter.more_available()) { 2001 HeapRegion* hr = iter.get_next(); 2002 hr_printer->cleanup(hr); 2003 } 2004 } 2005 2006 _cleanup_list->add_ordered(&local_cleanup_list); 2007 assert(local_cleanup_list.is_empty(), "post-condition"); 2008 2009 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 2010 } 2011 } 2012 size_t max_live_bytes() { return _max_live_bytes; } 2013 size_t freed_bytes() { return _freed_bytes; } 2014 }; 2015 2016 class G1ParScrubRemSetTask: public AbstractGangTask { 2017 protected: 2018 G1RemSet* _g1rs; 2019 BitMap* _region_bm; 2020 BitMap* _card_bm; 2021 public: 2022 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 2023 BitMap* region_bm, BitMap* card_bm) : 2024 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 2025 _region_bm(region_bm), _card_bm(card_bm) { } 2026 2027 void work(uint worker_id) { 2028 if (G1CollectedHeap::use_parallel_gc_threads()) { 2029 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 2030 HeapRegion::ScrubRemSetClaimValue); 2031 } else { 2032 _g1rs->scrub(_region_bm, _card_bm); 2033 } 2034 } 2035 2036 }; 2037 2038 void ConcurrentMark::cleanup() { 2039 // world is stopped at this checkpoint 2040 assert(SafepointSynchronize::is_at_safepoint(), 2041 "world should be stopped"); 2042 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2043 2044 // If a full collection has happened, we shouldn't do this. 2045 if (has_aborted()) { 2046 g1h->set_marking_complete(); // So bitmap clearing isn't confused 2047 return; 2048 } 2049 2050 g1h->verify_region_sets_optional(); 2051 2052 if (VerifyDuringGC) { 2053 HandleMark hm; // handle scope 2054 Universe::heap()->prepare_for_verify(); 2055 Universe::verify(VerifyOption_G1UsePrevMarking, 2056 " VerifyDuringGC:(before)"); 2057 } 2058 g1h->check_bitmaps("Cleanup Start"); 2059 2060 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 2061 g1p->record_concurrent_mark_cleanup_start(); 2062 2063 double start = os::elapsedTime(); 2064 2065 HeapRegionRemSet::reset_for_cleanup_tasks(); 2066 2067 uint n_workers; 2068 2069 // Do counting once more with the world stopped for good measure. 2070 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 2071 2072 if (G1CollectedHeap::use_parallel_gc_threads()) { 2073 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 2074 "sanity check"); 2075 2076 g1h->set_par_threads(); 2077 n_workers = g1h->n_par_threads(); 2078 assert(g1h->n_par_threads() == n_workers, 2079 "Should not have been reset"); 2080 g1h->workers()->run_task(&g1_par_count_task); 2081 // Done with the parallel phase so reset to 0. 2082 g1h->set_par_threads(0); 2083 2084 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 2085 "sanity check"); 2086 } else { 2087 n_workers = 1; 2088 g1_par_count_task.work(0); 2089 } 2090 2091 if (VerifyDuringGC) { 2092 // Verify that the counting data accumulated during marking matches 2093 // that calculated by walking the marking bitmap. 2094 2095 // Bitmaps to hold expected values 2096 BitMap expected_region_bm(_region_bm.size(), true); 2097 BitMap expected_card_bm(_card_bm.size(), true); 2098 2099 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 2100 &_region_bm, 2101 &_card_bm, 2102 &expected_region_bm, 2103 &expected_card_bm); 2104 2105 if (G1CollectedHeap::use_parallel_gc_threads()) { 2106 g1h->set_par_threads((int)n_workers); 2107 g1h->workers()->run_task(&g1_par_verify_task); 2108 // Done with the parallel phase so reset to 0. 2109 g1h->set_par_threads(0); 2110 2111 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 2112 "sanity check"); 2113 } else { 2114 g1_par_verify_task.work(0); 2115 } 2116 2117 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 2118 } 2119 2120 size_t start_used_bytes = g1h->used(); 2121 g1h->set_marking_complete(); 2122 2123 double count_end = os::elapsedTime(); 2124 double this_final_counting_time = (count_end - start); 2125 _total_counting_time += this_final_counting_time; 2126 2127 if (G1PrintRegionLivenessInfo) { 2128 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 2129 _g1h->heap_region_iterate(&cl); 2130 } 2131 2132 // Install newly created mark bitMap as "prev". 2133 swapMarkBitMaps(); 2134 2135 g1h->reset_gc_time_stamp(); 2136 2137 // Note end of marking in all heap regions. 2138 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 2139 if (G1CollectedHeap::use_parallel_gc_threads()) { 2140 g1h->set_par_threads((int)n_workers); 2141 g1h->workers()->run_task(&g1_par_note_end_task); 2142 g1h->set_par_threads(0); 2143 2144 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 2145 "sanity check"); 2146 } else { 2147 g1_par_note_end_task.work(0); 2148 } 2149 g1h->check_gc_time_stamps(); 2150 2151 if (!cleanup_list_is_empty()) { 2152 // The cleanup list is not empty, so we'll have to process it 2153 // concurrently. Notify anyone else that might be wanting free 2154 // regions that there will be more free regions coming soon. 2155 g1h->set_free_regions_coming(); 2156 } 2157 2158 // call below, since it affects the metric by which we sort the heap 2159 // regions. 2160 if (G1ScrubRemSets) { 2161 double rs_scrub_start = os::elapsedTime(); 2162 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 2163 if (G1CollectedHeap::use_parallel_gc_threads()) { 2164 g1h->set_par_threads((int)n_workers); 2165 g1h->workers()->run_task(&g1_par_scrub_rs_task); 2166 g1h->set_par_threads(0); 2167 2168 assert(g1h->check_heap_region_claim_values( 2169 HeapRegion::ScrubRemSetClaimValue), 2170 "sanity check"); 2171 } else { 2172 g1_par_scrub_rs_task.work(0); 2173 } 2174 2175 double rs_scrub_end = os::elapsedTime(); 2176 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 2177 _total_rs_scrub_time += this_rs_scrub_time; 2178 } 2179 2180 // this will also free any regions totally full of garbage objects, 2181 // and sort the regions. 2182 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 2183 2184 // Statistics. 2185 double end = os::elapsedTime(); 2186 _cleanup_times.add((end - start) * 1000.0); 2187 2188 if (G1Log::fine()) { 2189 g1h->print_size_transition(gclog_or_tty, 2190 start_used_bytes, 2191 g1h->used(), 2192 g1h->capacity()); 2193 } 2194 2195 // Clean up will have freed any regions completely full of garbage. 2196 // Update the soft reference policy with the new heap occupancy. 2197 Universe::update_heap_info_at_gc(); 2198 2199 if (VerifyDuringGC) { 2200 HandleMark hm; // handle scope 2201 Universe::heap()->prepare_for_verify(); 2202 Universe::verify(VerifyOption_G1UsePrevMarking, 2203 " VerifyDuringGC:(after)"); 2204 } 2205 2206 g1h->check_bitmaps("Cleanup End"); 2207 2208 g1h->verify_region_sets_optional(); 2209 2210 // We need to make this be a "collection" so any collection pause that 2211 // races with it goes around and waits for completeCleanup to finish. 2212 g1h->increment_total_collections(); 2213 2214 // Clean out dead classes and update Metaspace sizes. 2215 if (ClassUnloadingWithConcurrentMark) { 2216 ClassLoaderDataGraph::purge(); 2217 } 2218 MetaspaceGC::compute_new_size(); 2219 2220 // We reclaimed old regions so we should calculate the sizes to make 2221 // sure we update the old gen/space data. 2222 g1h->g1mm()->update_sizes(); 2223 2224 g1h->trace_heap_after_concurrent_cycle(); 2225 } 2226 2227 void ConcurrentMark::completeCleanup() { 2228 if (has_aborted()) return; 2229 2230 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2231 2232 _cleanup_list.verify_optional(); 2233 FreeRegionList tmp_free_list("Tmp Free List"); 2234 2235 if (G1ConcRegionFreeingVerbose) { 2236 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2237 "cleanup list has %u entries", 2238 _cleanup_list.length()); 2239 } 2240 2241 // No one else should be accessing the _cleanup_list at this point, 2242 // so it is not necessary to take any locks 2243 while (!_cleanup_list.is_empty()) { 2244 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 2245 assert(hr != NULL, "Got NULL from a non-empty list"); 2246 hr->par_clear(); 2247 tmp_free_list.add_ordered(hr); 2248 2249 // Instead of adding one region at a time to the secondary_free_list, 2250 // we accumulate them in the local list and move them a few at a 2251 // time. This also cuts down on the number of notify_all() calls 2252 // we do during this process. We'll also append the local list when 2253 // _cleanup_list is empty (which means we just removed the last 2254 // region from the _cleanup_list). 2255 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2256 _cleanup_list.is_empty()) { 2257 if (G1ConcRegionFreeingVerbose) { 2258 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2259 "appending %u entries to the secondary_free_list, " 2260 "cleanup list still has %u entries", 2261 tmp_free_list.length(), 2262 _cleanup_list.length()); 2263 } 2264 2265 { 2266 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2267 g1h->secondary_free_list_add(&tmp_free_list); 2268 SecondaryFreeList_lock->notify_all(); 2269 } 2270 2271 if (G1StressConcRegionFreeing) { 2272 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2273 os::sleep(Thread::current(), (jlong) 1, false); 2274 } 2275 } 2276 } 2277 } 2278 assert(tmp_free_list.is_empty(), "post-condition"); 2279 } 2280 2281 // Supporting Object and Oop closures for reference discovery 2282 // and processing in during marking 2283 2284 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2285 HeapWord* addr = (HeapWord*)obj; 2286 return addr != NULL && 2287 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2288 } 2289 2290 // 'Keep Alive' oop closure used by both serial parallel reference processing. 2291 // Uses the CMTask associated with a worker thread (for serial reference 2292 // processing the CMTask for worker 0 is used) to preserve (mark) and 2293 // trace referent objects. 2294 // 2295 // Using the CMTask and embedded local queues avoids having the worker 2296 // threads operating on the global mark stack. This reduces the risk 2297 // of overflowing the stack - which we would rather avoid at this late 2298 // state. Also using the tasks' local queues removes the potential 2299 // of the workers interfering with each other that could occur if 2300 // operating on the global stack. 2301 2302 class G1CMKeepAliveAndDrainClosure: public OopClosure { 2303 ConcurrentMark* _cm; 2304 CMTask* _task; 2305 int _ref_counter_limit; 2306 int _ref_counter; 2307 bool _is_serial; 2308 public: 2309 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2310 _cm(cm), _task(task), _is_serial(is_serial), 2311 _ref_counter_limit(G1RefProcDrainInterval) { 2312 assert(_ref_counter_limit > 0, "sanity"); 2313 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2314 _ref_counter = _ref_counter_limit; 2315 } 2316 2317 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2318 virtual void do_oop( oop* p) { do_oop_work(p); } 2319 2320 template <class T> void do_oop_work(T* p) { 2321 if (!_cm->has_overflown()) { 2322 oop obj = oopDesc::load_decode_heap_oop(p); 2323 if (_cm->verbose_high()) { 2324 gclog_or_tty->print_cr("\t[%u] we're looking at location " 2325 "*"PTR_FORMAT" = "PTR_FORMAT, 2326 _task->worker_id(), p2i(p), p2i((void*) obj)); 2327 } 2328 2329 _task->deal_with_reference(obj); 2330 _ref_counter--; 2331 2332 if (_ref_counter == 0) { 2333 // We have dealt with _ref_counter_limit references, pushing them 2334 // and objects reachable from them on to the local stack (and 2335 // possibly the global stack). Call CMTask::do_marking_step() to 2336 // process these entries. 2337 // 2338 // We call CMTask::do_marking_step() in a loop, which we'll exit if 2339 // there's nothing more to do (i.e. we're done with the entries that 2340 // were pushed as a result of the CMTask::deal_with_reference() calls 2341 // above) or we overflow. 2342 // 2343 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2344 // flag while there may still be some work to do. (See the comment at 2345 // the beginning of CMTask::do_marking_step() for those conditions - 2346 // one of which is reaching the specified time target.) It is only 2347 // when CMTask::do_marking_step() returns without setting the 2348 // has_aborted() flag that the marking step has completed. 2349 do { 2350 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2351 _task->do_marking_step(mark_step_duration_ms, 2352 false /* do_termination */, 2353 _is_serial); 2354 } while (_task->has_aborted() && !_cm->has_overflown()); 2355 _ref_counter = _ref_counter_limit; 2356 } 2357 } else { 2358 if (_cm->verbose_high()) { 2359 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id()); 2360 } 2361 } 2362 } 2363 }; 2364 2365 // 'Drain' oop closure used by both serial and parallel reference processing. 2366 // Uses the CMTask associated with a given worker thread (for serial 2367 // reference processing the CMtask for worker 0 is used). Calls the 2368 // do_marking_step routine, with an unbelievably large timeout value, 2369 // to drain the marking data structures of the remaining entries 2370 // added by the 'keep alive' oop closure above. 2371 2372 class G1CMDrainMarkingStackClosure: public VoidClosure { 2373 ConcurrentMark* _cm; 2374 CMTask* _task; 2375 bool _is_serial; 2376 public: 2377 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2378 _cm(cm), _task(task), _is_serial(is_serial) { 2379 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2380 } 2381 2382 void do_void() { 2383 do { 2384 if (_cm->verbose_high()) { 2385 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s", 2386 _task->worker_id(), BOOL_TO_STR(_is_serial)); 2387 } 2388 2389 // We call CMTask::do_marking_step() to completely drain the local 2390 // and global marking stacks of entries pushed by the 'keep alive' 2391 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2392 // 2393 // CMTask::do_marking_step() is called in a loop, which we'll exit 2394 // if there's nothing more to do (i.e. we've completely drained the 2395 // entries that were pushed as a a result of applying the 'keep alive' 2396 // closure to the entries on the discovered ref lists) or we overflow 2397 // the global marking stack. 2398 // 2399 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2400 // flag while there may still be some work to do. (See the comment at 2401 // the beginning of CMTask::do_marking_step() for those conditions - 2402 // one of which is reaching the specified time target.) It is only 2403 // when CMTask::do_marking_step() returns without setting the 2404 // has_aborted() flag that the marking step has completed. 2405 2406 _task->do_marking_step(1000000000.0 /* something very large */, 2407 true /* do_termination */, 2408 _is_serial); 2409 } while (_task->has_aborted() && !_cm->has_overflown()); 2410 } 2411 }; 2412 2413 // Implementation of AbstractRefProcTaskExecutor for parallel 2414 // reference processing at the end of G1 concurrent marking 2415 2416 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2417 private: 2418 G1CollectedHeap* _g1h; 2419 ConcurrentMark* _cm; 2420 WorkGang* _workers; 2421 int _active_workers; 2422 2423 public: 2424 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2425 ConcurrentMark* cm, 2426 WorkGang* workers, 2427 int n_workers) : 2428 _g1h(g1h), _cm(cm), 2429 _workers(workers), _active_workers(n_workers) { } 2430 2431 // Executes the given task using concurrent marking worker threads. 2432 virtual void execute(ProcessTask& task); 2433 virtual void execute(EnqueueTask& task); 2434 }; 2435 2436 class G1CMRefProcTaskProxy: public AbstractGangTask { 2437 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2438 ProcessTask& _proc_task; 2439 G1CollectedHeap* _g1h; 2440 ConcurrentMark* _cm; 2441 2442 public: 2443 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2444 G1CollectedHeap* g1h, 2445 ConcurrentMark* cm) : 2446 AbstractGangTask("Process reference objects in parallel"), 2447 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2448 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2449 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2450 } 2451 2452 virtual void work(uint worker_id) { 2453 ResourceMark rm; 2454 HandleMark hm; 2455 CMTask* task = _cm->task(worker_id); 2456 G1CMIsAliveClosure g1_is_alive(_g1h); 2457 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2458 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2459 2460 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2461 } 2462 }; 2463 2464 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2465 assert(_workers != NULL, "Need parallel worker threads."); 2466 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2467 2468 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2469 2470 // We need to reset the concurrency level before each 2471 // proxy task execution, so that the termination protocol 2472 // and overflow handling in CMTask::do_marking_step() knows 2473 // how many workers to wait for. 2474 _cm->set_concurrency(_active_workers); 2475 _g1h->set_par_threads(_active_workers); 2476 _workers->run_task(&proc_task_proxy); 2477 _g1h->set_par_threads(0); 2478 } 2479 2480 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2481 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2482 EnqueueTask& _enq_task; 2483 2484 public: 2485 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2486 AbstractGangTask("Enqueue reference objects in parallel"), 2487 _enq_task(enq_task) { } 2488 2489 virtual void work(uint worker_id) { 2490 _enq_task.work(worker_id); 2491 } 2492 }; 2493 2494 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2495 assert(_workers != NULL, "Need parallel worker threads."); 2496 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2497 2498 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2499 2500 // Not strictly necessary but... 2501 // 2502 // We need to reset the concurrency level before each 2503 // proxy task execution, so that the termination protocol 2504 // and overflow handling in CMTask::do_marking_step() knows 2505 // how many workers to wait for. 2506 _cm->set_concurrency(_active_workers); 2507 _g1h->set_par_threads(_active_workers); 2508 _workers->run_task(&enq_task_proxy); 2509 _g1h->set_par_threads(0); 2510 } 2511 2512 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 2513 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 2514 } 2515 2516 // Helper class to get rid of some boilerplate code. 2517 class G1RemarkGCTraceTime : public GCTraceTime { 2518 static bool doit_and_prepend(bool doit) { 2519 if (doit) { 2520 gclog_or_tty->put(' '); 2521 } 2522 return doit; 2523 } 2524 2525 public: 2526 G1RemarkGCTraceTime(const char* title, bool doit) 2527 : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(), 2528 G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) { 2529 } 2530 }; 2531 2532 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2533 if (has_overflown()) { 2534 // Skip processing the discovered references if we have 2535 // overflown the global marking stack. Reference objects 2536 // only get discovered once so it is OK to not 2537 // de-populate the discovered reference lists. We could have, 2538 // but the only benefit would be that, when marking restarts, 2539 // less reference objects are discovered. 2540 return; 2541 } 2542 2543 ResourceMark rm; 2544 HandleMark hm; 2545 2546 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2547 2548 // Is alive closure. 2549 G1CMIsAliveClosure g1_is_alive(g1h); 2550 2551 // Inner scope to exclude the cleaning of the string and symbol 2552 // tables from the displayed time. 2553 { 2554 if (G1Log::finer()) { 2555 gclog_or_tty->put(' '); 2556 } 2557 GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm(), concurrent_gc_id()); 2558 2559 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2560 2561 // See the comment in G1CollectedHeap::ref_processing_init() 2562 // about how reference processing currently works in G1. 2563 2564 // Set the soft reference policy 2565 rp->setup_policy(clear_all_soft_refs); 2566 assert(_markStack.isEmpty(), "mark stack should be empty"); 2567 2568 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2569 // in serial reference processing. Note these closures are also 2570 // used for serially processing (by the the current thread) the 2571 // JNI references during parallel reference processing. 2572 // 2573 // These closures do not need to synchronize with the worker 2574 // threads involved in parallel reference processing as these 2575 // instances are executed serially by the current thread (e.g. 2576 // reference processing is not multi-threaded and is thus 2577 // performed by the current thread instead of a gang worker). 2578 // 2579 // The gang tasks involved in parallel reference processing create 2580 // their own instances of these closures, which do their own 2581 // synchronization among themselves. 2582 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2583 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2584 2585 // We need at least one active thread. If reference processing 2586 // is not multi-threaded we use the current (VMThread) thread, 2587 // otherwise we use the work gang from the G1CollectedHeap and 2588 // we utilize all the worker threads we can. 2589 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL; 2590 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2591 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2592 2593 // Parallel processing task executor. 2594 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2595 g1h->workers(), active_workers); 2596 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2597 2598 // Set the concurrency level. The phase was already set prior to 2599 // executing the remark task. 2600 set_concurrency(active_workers); 2601 2602 // Set the degree of MT processing here. If the discovery was done MT, 2603 // the number of threads involved during discovery could differ from 2604 // the number of active workers. This is OK as long as the discovered 2605 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2606 rp->set_active_mt_degree(active_workers); 2607 2608 // Process the weak references. 2609 const ReferenceProcessorStats& stats = 2610 rp->process_discovered_references(&g1_is_alive, 2611 &g1_keep_alive, 2612 &g1_drain_mark_stack, 2613 executor, 2614 g1h->gc_timer_cm(), 2615 concurrent_gc_id()); 2616 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2617 2618 // The do_oop work routines of the keep_alive and drain_marking_stack 2619 // oop closures will set the has_overflown flag if we overflow the 2620 // global marking stack. 2621 2622 assert(_markStack.overflow() || _markStack.isEmpty(), 2623 "mark stack should be empty (unless it overflowed)"); 2624 2625 if (_markStack.overflow()) { 2626 // This should have been done already when we tried to push an 2627 // entry on to the global mark stack. But let's do it again. 2628 set_has_overflown(); 2629 } 2630 2631 assert(rp->num_q() == active_workers, "why not"); 2632 2633 rp->enqueue_discovered_references(executor); 2634 2635 rp->verify_no_references_recorded(); 2636 assert(!rp->discovery_enabled(), "Post condition"); 2637 } 2638 2639 if (has_overflown()) { 2640 // We can not trust g1_is_alive if the marking stack overflowed 2641 return; 2642 } 2643 2644 assert(_markStack.isEmpty(), "Marking should have completed"); 2645 2646 // Unload Klasses, String, Symbols, Code Cache, etc. 2647 { 2648 G1RemarkGCTraceTime trace("Unloading", G1Log::finer()); 2649 2650 if (ClassUnloadingWithConcurrentMark) { 2651 bool purged_classes; 2652 2653 { 2654 G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest()); 2655 purged_classes = SystemDictionary::do_unloading(&g1_is_alive); 2656 } 2657 2658 { 2659 G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest()); 2660 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2661 } 2662 } 2663 2664 if (G1StringDedup::is_enabled()) { 2665 G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest()); 2666 G1StringDedup::unlink(&g1_is_alive); 2667 } 2668 } 2669 } 2670 2671 void ConcurrentMark::swapMarkBitMaps() { 2672 CMBitMapRO* temp = _prevMarkBitMap; 2673 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2674 _nextMarkBitMap = (CMBitMap*) temp; 2675 } 2676 2677 class CMObjectClosure; 2678 2679 // Closure for iterating over objects, currently only used for 2680 // processing SATB buffers. 2681 class CMObjectClosure : public ObjectClosure { 2682 private: 2683 CMTask* _task; 2684 2685 public: 2686 void do_object(oop obj) { 2687 _task->deal_with_reference(obj); 2688 } 2689 2690 CMObjectClosure(CMTask* task) : _task(task) { } 2691 }; 2692 2693 class G1RemarkThreadsClosure : public ThreadClosure { 2694 CMObjectClosure _cm_obj; 2695 G1CMOopClosure _cm_cl; 2696 MarkingCodeBlobClosure _code_cl; 2697 int _thread_parity; 2698 bool _is_par; 2699 2700 public: 2701 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) : 2702 _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2703 _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {} 2704 2705 void do_thread(Thread* thread) { 2706 if (thread->is_Java_thread()) { 2707 if (thread->claim_oops_do(_is_par, _thread_parity)) { 2708 JavaThread* jt = (JavaThread*)thread; 2709 2710 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2711 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2712 // * Alive if on the stack of an executing method 2713 // * Weakly reachable otherwise 2714 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2715 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2716 jt->nmethods_do(&_code_cl); 2717 2718 jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj); 2719 } 2720 } else if (thread->is_VM_thread()) { 2721 if (thread->claim_oops_do(_is_par, _thread_parity)) { 2722 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj); 2723 } 2724 } 2725 } 2726 }; 2727 2728 class CMRemarkTask: public AbstractGangTask { 2729 private: 2730 ConcurrentMark* _cm; 2731 bool _is_serial; 2732 public: 2733 void work(uint worker_id) { 2734 // Since all available tasks are actually started, we should 2735 // only proceed if we're supposed to be active. 2736 if (worker_id < _cm->active_tasks()) { 2737 CMTask* task = _cm->task(worker_id); 2738 task->record_start_time(); 2739 { 2740 ResourceMark rm; 2741 HandleMark hm; 2742 2743 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task, !_is_serial); 2744 Threads::threads_do(&threads_f); 2745 } 2746 2747 do { 2748 task->do_marking_step(1000000000.0 /* something very large */, 2749 true /* do_termination */, 2750 _is_serial); 2751 } while (task->has_aborted() && !_cm->has_overflown()); 2752 // If we overflow, then we do not want to restart. We instead 2753 // want to abort remark and do concurrent marking again. 2754 task->record_end_time(); 2755 } 2756 } 2757 2758 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) : 2759 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) { 2760 _cm->terminator()->reset_for_reuse(active_workers); 2761 } 2762 }; 2763 2764 void ConcurrentMark::checkpointRootsFinalWork() { 2765 ResourceMark rm; 2766 HandleMark hm; 2767 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2768 2769 G1RemarkGCTraceTime trace("Finalize Marking", G1Log::finer()); 2770 2771 g1h->ensure_parsability(false); 2772 2773 if (G1CollectedHeap::use_parallel_gc_threads()) { 2774 G1CollectedHeap::StrongRootsScope srs(g1h); 2775 // this is remark, so we'll use up all active threads 2776 uint active_workers = g1h->workers()->active_workers(); 2777 if (active_workers == 0) { 2778 assert(active_workers > 0, "Should have been set earlier"); 2779 active_workers = (uint) ParallelGCThreads; 2780 g1h->workers()->set_active_workers(active_workers); 2781 } 2782 set_concurrency_and_phase(active_workers, false /* concurrent */); 2783 // Leave _parallel_marking_threads at it's 2784 // value originally calculated in the ConcurrentMark 2785 // constructor and pass values of the active workers 2786 // through the gang in the task. 2787 2788 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */); 2789 // We will start all available threads, even if we decide that the 2790 // active_workers will be fewer. The extra ones will just bail out 2791 // immediately. 2792 g1h->set_par_threads(active_workers); 2793 g1h->workers()->run_task(&remarkTask); 2794 g1h->set_par_threads(0); 2795 } else { 2796 G1CollectedHeap::StrongRootsScope srs(g1h); 2797 uint active_workers = 1; 2798 set_concurrency_and_phase(active_workers, false /* concurrent */); 2799 2800 // Note - if there's no work gang then the VMThread will be 2801 // the thread to execute the remark - serially. We have 2802 // to pass true for the is_serial parameter so that 2803 // CMTask::do_marking_step() doesn't enter the sync 2804 // barriers in the event of an overflow. Doing so will 2805 // cause an assert that the current thread is not a 2806 // concurrent GC thread. 2807 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/); 2808 remarkTask.work(0); 2809 } 2810 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2811 guarantee(has_overflown() || 2812 satb_mq_set.completed_buffers_num() == 0, 2813 err_msg("Invariant: has_overflown = %s, num buffers = %d", 2814 BOOL_TO_STR(has_overflown()), 2815 satb_mq_set.completed_buffers_num())); 2816 2817 print_stats(); 2818 } 2819 2820 #ifndef PRODUCT 2821 2822 class PrintReachableOopClosure: public OopClosure { 2823 private: 2824 G1CollectedHeap* _g1h; 2825 outputStream* _out; 2826 VerifyOption _vo; 2827 bool _all; 2828 2829 public: 2830 PrintReachableOopClosure(outputStream* out, 2831 VerifyOption vo, 2832 bool all) : 2833 _g1h(G1CollectedHeap::heap()), 2834 _out(out), _vo(vo), _all(all) { } 2835 2836 void do_oop(narrowOop* p) { do_oop_work(p); } 2837 void do_oop( oop* p) { do_oop_work(p); } 2838 2839 template <class T> void do_oop_work(T* p) { 2840 oop obj = oopDesc::load_decode_heap_oop(p); 2841 const char* str = NULL; 2842 const char* str2 = ""; 2843 2844 if (obj == NULL) { 2845 str = ""; 2846 } else if (!_g1h->is_in_g1_reserved(obj)) { 2847 str = " O"; 2848 } else { 2849 HeapRegion* hr = _g1h->heap_region_containing(obj); 2850 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2851 bool marked = _g1h->is_marked(obj, _vo); 2852 2853 if (over_tams) { 2854 str = " >"; 2855 if (marked) { 2856 str2 = " AND MARKED"; 2857 } 2858 } else if (marked) { 2859 str = " M"; 2860 } else { 2861 str = " NOT"; 2862 } 2863 } 2864 2865 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2866 p2i(p), p2i((void*) obj), str, str2); 2867 } 2868 }; 2869 2870 class PrintReachableObjectClosure : public ObjectClosure { 2871 private: 2872 G1CollectedHeap* _g1h; 2873 outputStream* _out; 2874 VerifyOption _vo; 2875 bool _all; 2876 HeapRegion* _hr; 2877 2878 public: 2879 PrintReachableObjectClosure(outputStream* out, 2880 VerifyOption vo, 2881 bool all, 2882 HeapRegion* hr) : 2883 _g1h(G1CollectedHeap::heap()), 2884 _out(out), _vo(vo), _all(all), _hr(hr) { } 2885 2886 void do_object(oop o) { 2887 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2888 bool marked = _g1h->is_marked(o, _vo); 2889 bool print_it = _all || over_tams || marked; 2890 2891 if (print_it) { 2892 _out->print_cr(" "PTR_FORMAT"%s", 2893 p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : ""); 2894 PrintReachableOopClosure oopCl(_out, _vo, _all); 2895 o->oop_iterate_no_header(&oopCl); 2896 } 2897 } 2898 }; 2899 2900 class PrintReachableRegionClosure : public HeapRegionClosure { 2901 private: 2902 G1CollectedHeap* _g1h; 2903 outputStream* _out; 2904 VerifyOption _vo; 2905 bool _all; 2906 2907 public: 2908 bool doHeapRegion(HeapRegion* hr) { 2909 HeapWord* b = hr->bottom(); 2910 HeapWord* e = hr->end(); 2911 HeapWord* t = hr->top(); 2912 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2913 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2914 "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p)); 2915 _out->cr(); 2916 2917 HeapWord* from = b; 2918 HeapWord* to = t; 2919 2920 if (to > from) { 2921 _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to)); 2922 _out->cr(); 2923 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2924 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2925 _out->cr(); 2926 } 2927 2928 return false; 2929 } 2930 2931 PrintReachableRegionClosure(outputStream* out, 2932 VerifyOption vo, 2933 bool all) : 2934 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2935 }; 2936 2937 void ConcurrentMark::print_reachable(const char* str, 2938 VerifyOption vo, 2939 bool all) { 2940 gclog_or_tty->cr(); 2941 gclog_or_tty->print_cr("== Doing heap dump... "); 2942 2943 if (G1PrintReachableBaseFile == NULL) { 2944 gclog_or_tty->print_cr(" #### error: no base file defined"); 2945 return; 2946 } 2947 2948 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2949 (JVM_MAXPATHLEN - 1)) { 2950 gclog_or_tty->print_cr(" #### error: file name too long"); 2951 return; 2952 } 2953 2954 char file_name[JVM_MAXPATHLEN]; 2955 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2956 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2957 2958 fileStream fout(file_name); 2959 if (!fout.is_open()) { 2960 gclog_or_tty->print_cr(" #### error: could not open file"); 2961 return; 2962 } 2963 2964 outputStream* out = &fout; 2965 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2966 out->cr(); 2967 2968 out->print_cr("--- ITERATING OVER REGIONS"); 2969 out->cr(); 2970 PrintReachableRegionClosure rcl(out, vo, all); 2971 _g1h->heap_region_iterate(&rcl); 2972 out->cr(); 2973 2974 gclog_or_tty->print_cr(" done"); 2975 gclog_or_tty->flush(); 2976 } 2977 2978 #endif // PRODUCT 2979 2980 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2981 // Note we are overriding the read-only view of the prev map here, via 2982 // the cast. 2983 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2984 } 2985 2986 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2987 _nextMarkBitMap->clearRange(mr); 2988 } 2989 2990 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2991 clearRangePrevBitmap(mr); 2992 clearRangeNextBitmap(mr); 2993 } 2994 2995 HeapRegion* 2996 ConcurrentMark::claim_region(uint worker_id) { 2997 // "checkpoint" the finger 2998 HeapWord* finger = _finger; 2999 3000 // _heap_end will not change underneath our feet; it only changes at 3001 // yield points. 3002 while (finger < _heap_end) { 3003 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 3004 3005 // Note on how this code handles humongous regions. In the 3006 // normal case the finger will reach the start of a "starts 3007 // humongous" (SH) region. Its end will either be the end of the 3008 // last "continues humongous" (CH) region in the sequence, or the 3009 // standard end of the SH region (if the SH is the only region in 3010 // the sequence). That way claim_region() will skip over the CH 3011 // regions. However, there is a subtle race between a CM thread 3012 // executing this method and a mutator thread doing a humongous 3013 // object allocation. The two are not mutually exclusive as the CM 3014 // thread does not need to hold the Heap_lock when it gets 3015 // here. So there is a chance that claim_region() will come across 3016 // a free region that's in the progress of becoming a SH or a CH 3017 // region. In the former case, it will either 3018 // a) Miss the update to the region's end, in which case it will 3019 // visit every subsequent CH region, will find their bitmaps 3020 // empty, and do nothing, or 3021 // b) Will observe the update of the region's end (in which case 3022 // it will skip the subsequent CH regions). 3023 // If it comes across a region that suddenly becomes CH, the 3024 // scenario will be similar to b). So, the race between 3025 // claim_region() and a humongous object allocation might force us 3026 // to do a bit of unnecessary work (due to some unnecessary bitmap 3027 // iterations) but it should not introduce and correctness issues. 3028 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 3029 3030 // Above heap_region_containing_raw may return NULL as we always scan claim 3031 // until the end of the heap. In this case, just jump to the next region. 3032 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 3033 3034 // Is the gap between reading the finger and doing the CAS too long? 3035 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 3036 if (res == finger && curr_region != NULL) { 3037 // we succeeded 3038 HeapWord* bottom = curr_region->bottom(); 3039 HeapWord* limit = curr_region->next_top_at_mark_start(); 3040 3041 if (verbose_low()) { 3042 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" " 3043 "["PTR_FORMAT", "PTR_FORMAT"), " 3044 "limit = "PTR_FORMAT, 3045 worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit)); 3046 } 3047 3048 // notice that _finger == end cannot be guaranteed here since, 3049 // someone else might have moved the finger even further 3050 assert(_finger >= end, "the finger should have moved forward"); 3051 3052 if (verbose_low()) { 3053 gclog_or_tty->print_cr("[%u] we were successful with region = " 3054 PTR_FORMAT, worker_id, p2i(curr_region)); 3055 } 3056 3057 if (limit > bottom) { 3058 if (verbose_low()) { 3059 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, " 3060 "returning it ", worker_id, p2i(curr_region)); 3061 } 3062 return curr_region; 3063 } else { 3064 assert(limit == bottom, 3065 "the region limit should be at bottom"); 3066 if (verbose_low()) { 3067 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, " 3068 "returning NULL", worker_id, p2i(curr_region)); 3069 } 3070 // we return NULL and the caller should try calling 3071 // claim_region() again. 3072 return NULL; 3073 } 3074 } else { 3075 assert(_finger > finger, "the finger should have moved forward"); 3076 if (verbose_low()) { 3077 if (curr_region == NULL) { 3078 gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, " 3079 "global finger = "PTR_FORMAT", " 3080 "our finger = "PTR_FORMAT, 3081 worker_id, p2i(_finger), p2i(finger)); 3082 } else { 3083 gclog_or_tty->print_cr("[%u] somebody else moved the finger, " 3084 "global finger = "PTR_FORMAT", " 3085 "our finger = "PTR_FORMAT, 3086 worker_id, p2i(_finger), p2i(finger)); 3087 } 3088 } 3089 3090 // read it again 3091 finger = _finger; 3092 } 3093 } 3094 3095 return NULL; 3096 } 3097 3098 #ifndef PRODUCT 3099 enum VerifyNoCSetOopsPhase { 3100 VerifyNoCSetOopsStack, 3101 VerifyNoCSetOopsQueues, 3102 VerifyNoCSetOopsSATBCompleted, 3103 VerifyNoCSetOopsSATBThread 3104 }; 3105 3106 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 3107 private: 3108 G1CollectedHeap* _g1h; 3109 VerifyNoCSetOopsPhase _phase; 3110 int _info; 3111 3112 const char* phase_str() { 3113 switch (_phase) { 3114 case VerifyNoCSetOopsStack: return "Stack"; 3115 case VerifyNoCSetOopsQueues: return "Queue"; 3116 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 3117 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 3118 default: ShouldNotReachHere(); 3119 } 3120 return NULL; 3121 } 3122 3123 void do_object_work(oop obj) { 3124 guarantee(!_g1h->obj_in_cs(obj), 3125 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 3126 p2i((void*) obj), phase_str(), _info)); 3127 } 3128 3129 public: 3130 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 3131 3132 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 3133 _phase = phase; 3134 _info = info; 3135 } 3136 3137 virtual void do_oop(oop* p) { 3138 oop obj = oopDesc::load_decode_heap_oop(p); 3139 do_object_work(obj); 3140 } 3141 3142 virtual void do_oop(narrowOop* p) { 3143 // We should not come across narrow oops while scanning marking 3144 // stacks and SATB buffers. 3145 ShouldNotReachHere(); 3146 } 3147 3148 virtual void do_object(oop obj) { 3149 do_object_work(obj); 3150 } 3151 }; 3152 3153 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 3154 bool verify_enqueued_buffers, 3155 bool verify_thread_buffers, 3156 bool verify_fingers) { 3157 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 3158 if (!G1CollectedHeap::heap()->mark_in_progress()) { 3159 return; 3160 } 3161 3162 VerifyNoCSetOopsClosure cl; 3163 3164 if (verify_stacks) { 3165 // Verify entries on the global mark stack 3166 cl.set_phase(VerifyNoCSetOopsStack); 3167 _markStack.oops_do(&cl); 3168 3169 // Verify entries on the task queues 3170 for (uint i = 0; i < _max_worker_id; i += 1) { 3171 cl.set_phase(VerifyNoCSetOopsQueues, i); 3172 CMTaskQueue* queue = _task_queues->queue(i); 3173 queue->oops_do(&cl); 3174 } 3175 } 3176 3177 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 3178 3179 // Verify entries on the enqueued SATB buffers 3180 if (verify_enqueued_buffers) { 3181 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 3182 satb_qs.iterate_completed_buffers_read_only(&cl); 3183 } 3184 3185 // Verify entries on the per-thread SATB buffers 3186 if (verify_thread_buffers) { 3187 cl.set_phase(VerifyNoCSetOopsSATBThread); 3188 satb_qs.iterate_thread_buffers_read_only(&cl); 3189 } 3190 3191 if (verify_fingers) { 3192 // Verify the global finger 3193 HeapWord* global_finger = finger(); 3194 if (global_finger != NULL && global_finger < _heap_end) { 3195 // The global finger always points to a heap region boundary. We 3196 // use heap_region_containing_raw() to get the containing region 3197 // given that the global finger could be pointing to a free region 3198 // which subsequently becomes continues humongous. If that 3199 // happens, heap_region_containing() will return the bottom of the 3200 // corresponding starts humongous region and the check below will 3201 // not hold any more. 3202 // Since we always iterate over all regions, we might get a NULL HeapRegion 3203 // here. 3204 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 3205 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 3206 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 3207 p2i(global_finger), HR_FORMAT_PARAMS(global_hr))); 3208 } 3209 3210 // Verify the task fingers 3211 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 3212 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 3213 CMTask* task = _tasks[i]; 3214 HeapWord* task_finger = task->finger(); 3215 if (task_finger != NULL && task_finger < _heap_end) { 3216 // See above note on the global finger verification. 3217 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 3218 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 3219 !task_hr->in_collection_set(), 3220 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 3221 p2i(task_finger), HR_FORMAT_PARAMS(task_hr))); 3222 } 3223 } 3224 } 3225 } 3226 #endif // PRODUCT 3227 3228 // Aggregate the counting data that was constructed concurrently 3229 // with marking. 3230 class AggregateCountDataHRClosure: public HeapRegionClosure { 3231 G1CollectedHeap* _g1h; 3232 ConcurrentMark* _cm; 3233 CardTableModRefBS* _ct_bs; 3234 BitMap* _cm_card_bm; 3235 uint _max_worker_id; 3236 3237 public: 3238 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 3239 BitMap* cm_card_bm, 3240 uint max_worker_id) : 3241 _g1h(g1h), _cm(g1h->concurrent_mark()), 3242 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 3243 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 3244 3245 bool doHeapRegion(HeapRegion* hr) { 3246 if (hr->continuesHumongous()) { 3247 // We will ignore these here and process them when their 3248 // associated "starts humongous" region is processed. 3249 // Note that we cannot rely on their associated 3250 // "starts humongous" region to have their bit set to 1 3251 // since, due to the region chunking in the parallel region 3252 // iteration, a "continues humongous" region might be visited 3253 // before its associated "starts humongous". 3254 return false; 3255 } 3256 3257 HeapWord* start = hr->bottom(); 3258 HeapWord* limit = hr->next_top_at_mark_start(); 3259 HeapWord* end = hr->end(); 3260 3261 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 3262 err_msg("Preconditions not met - " 3263 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 3264 "top: "PTR_FORMAT", end: "PTR_FORMAT, 3265 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()))); 3266 3267 assert(hr->next_marked_bytes() == 0, "Precondition"); 3268 3269 if (start == limit) { 3270 // NTAMS of this region has not been set so nothing to do. 3271 return false; 3272 } 3273 3274 // 'start' should be in the heap. 3275 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 3276 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 3277 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 3278 3279 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 3280 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 3281 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 3282 3283 // If ntams is not card aligned then we bump card bitmap index 3284 // for limit so that we get the all the cards spanned by 3285 // the object ending at ntams. 3286 // Note: if this is the last region in the heap then ntams 3287 // could be actually just beyond the end of the the heap; 3288 // limit_idx will then correspond to a (non-existent) card 3289 // that is also outside the heap. 3290 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 3291 limit_idx += 1; 3292 } 3293 3294 assert(limit_idx <= end_idx, "or else use atomics"); 3295 3296 // Aggregate the "stripe" in the count data associated with hr. 3297 uint hrs_index = hr->hrs_index(); 3298 size_t marked_bytes = 0; 3299 3300 for (uint i = 0; i < _max_worker_id; i += 1) { 3301 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 3302 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 3303 3304 // Fetch the marked_bytes in this region for task i and 3305 // add it to the running total for this region. 3306 marked_bytes += marked_bytes_array[hrs_index]; 3307 3308 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 3309 // into the global card bitmap. 3310 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 3311 3312 while (scan_idx < limit_idx) { 3313 assert(task_card_bm->at(scan_idx) == true, "should be"); 3314 _cm_card_bm->set_bit(scan_idx); 3315 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 3316 3317 // BitMap::get_next_one_offset() can handle the case when 3318 // its left_offset parameter is greater than its right_offset 3319 // parameter. It does, however, have an early exit if 3320 // left_offset == right_offset. So let's limit the value 3321 // passed in for left offset here. 3322 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 3323 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 3324 } 3325 } 3326 3327 // Update the marked bytes for this region. 3328 hr->add_to_marked_bytes(marked_bytes); 3329 3330 // Next heap region 3331 return false; 3332 } 3333 }; 3334 3335 class G1AggregateCountDataTask: public AbstractGangTask { 3336 protected: 3337 G1CollectedHeap* _g1h; 3338 ConcurrentMark* _cm; 3339 BitMap* _cm_card_bm; 3340 uint _max_worker_id; 3341 int _active_workers; 3342 3343 public: 3344 G1AggregateCountDataTask(G1CollectedHeap* g1h, 3345 ConcurrentMark* cm, 3346 BitMap* cm_card_bm, 3347 uint max_worker_id, 3348 int n_workers) : 3349 AbstractGangTask("Count Aggregation"), 3350 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 3351 _max_worker_id(max_worker_id), 3352 _active_workers(n_workers) { } 3353 3354 void work(uint worker_id) { 3355 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 3356 3357 if (G1CollectedHeap::use_parallel_gc_threads()) { 3358 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 3359 _active_workers, 3360 HeapRegion::AggregateCountClaimValue); 3361 } else { 3362 _g1h->heap_region_iterate(&cl); 3363 } 3364 } 3365 }; 3366 3367 3368 void ConcurrentMark::aggregate_count_data() { 3369 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3370 _g1h->workers()->active_workers() : 3371 1); 3372 3373 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3374 _max_worker_id, n_workers); 3375 3376 if (G1CollectedHeap::use_parallel_gc_threads()) { 3377 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3378 "sanity check"); 3379 _g1h->set_par_threads(n_workers); 3380 _g1h->workers()->run_task(&g1_par_agg_task); 3381 _g1h->set_par_threads(0); 3382 3383 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3384 "sanity check"); 3385 _g1h->reset_heap_region_claim_values(); 3386 } else { 3387 g1_par_agg_task.work(0); 3388 } 3389 } 3390 3391 // Clear the per-worker arrays used to store the per-region counting data 3392 void ConcurrentMark::clear_all_count_data() { 3393 // Clear the global card bitmap - it will be filled during 3394 // liveness count aggregation (during remark) and the 3395 // final counting task. 3396 _card_bm.clear(); 3397 3398 // Clear the global region bitmap - it will be filled as part 3399 // of the final counting task. 3400 _region_bm.clear(); 3401 3402 uint max_regions = _g1h->max_regions(); 3403 assert(_max_worker_id > 0, "uninitialized"); 3404 3405 for (uint i = 0; i < _max_worker_id; i += 1) { 3406 BitMap* task_card_bm = count_card_bitmap_for(i); 3407 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3408 3409 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3410 assert(marked_bytes_array != NULL, "uninitialized"); 3411 3412 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3413 task_card_bm->clear(); 3414 } 3415 } 3416 3417 void ConcurrentMark::print_stats() { 3418 if (verbose_stats()) { 3419 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3420 for (size_t i = 0; i < _active_tasks; ++i) { 3421 _tasks[i]->print_stats(); 3422 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3423 } 3424 } 3425 } 3426 3427 // abandon current marking iteration due to a Full GC 3428 void ConcurrentMark::abort() { 3429 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 3430 // concurrent bitmap clearing. 3431 _nextMarkBitMap->clearAll(); 3432 3433 // Note we cannot clear the previous marking bitmap here 3434 // since VerifyDuringGC verifies the objects marked during 3435 // a full GC against the previous bitmap. 3436 3437 // Clear the liveness counting data 3438 clear_all_count_data(); 3439 // Empty mark stack 3440 reset_marking_state(); 3441 for (uint i = 0; i < _max_worker_id; ++i) { 3442 _tasks[i]->clear_region_fields(); 3443 } 3444 _first_overflow_barrier_sync.abort(); 3445 _second_overflow_barrier_sync.abort(); 3446 const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id(); 3447 if (!gc_id.is_undefined()) { 3448 // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance 3449 // to detect that it was aborted. Only keep track of the first GC id that we aborted. 3450 _aborted_gc_id = gc_id; 3451 } 3452 _has_aborted = true; 3453 3454 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3455 satb_mq_set.abandon_partial_marking(); 3456 // This can be called either during or outside marking, we'll read 3457 // the expected_active value from the SATB queue set. 3458 satb_mq_set.set_active_all_threads( 3459 false, /* new active value */ 3460 satb_mq_set.is_active() /* expected_active */); 3461 3462 _g1h->trace_heap_after_concurrent_cycle(); 3463 _g1h->register_concurrent_cycle_end(); 3464 } 3465 3466 const GCId& ConcurrentMark::concurrent_gc_id() { 3467 if (has_aborted()) { 3468 return _aborted_gc_id; 3469 } 3470 return _g1h->gc_tracer_cm()->gc_id(); 3471 } 3472 3473 static void print_ms_time_info(const char* prefix, const char* name, 3474 NumberSeq& ns) { 3475 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3476 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3477 if (ns.num() > 0) { 3478 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3479 prefix, ns.sd(), ns.maximum()); 3480 } 3481 } 3482 3483 void ConcurrentMark::print_summary_info() { 3484 gclog_or_tty->print_cr(" Concurrent marking:"); 3485 print_ms_time_info(" ", "init marks", _init_times); 3486 print_ms_time_info(" ", "remarks", _remark_times); 3487 { 3488 print_ms_time_info(" ", "final marks", _remark_mark_times); 3489 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3490 3491 } 3492 print_ms_time_info(" ", "cleanups", _cleanup_times); 3493 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3494 _total_counting_time, 3495 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3496 (double)_cleanup_times.num() 3497 : 0.0)); 3498 if (G1ScrubRemSets) { 3499 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3500 _total_rs_scrub_time, 3501 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3502 (double)_cleanup_times.num() 3503 : 0.0)); 3504 } 3505 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3506 (_init_times.sum() + _remark_times.sum() + 3507 _cleanup_times.sum())/1000.0); 3508 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3509 "(%8.2f s marking).", 3510 cmThread()->vtime_accum(), 3511 cmThread()->vtime_mark_accum()); 3512 } 3513 3514 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3515 if (use_parallel_marking_threads()) { 3516 _parallel_workers->print_worker_threads_on(st); 3517 } 3518 } 3519 3520 void ConcurrentMark::print_on_error(outputStream* st) const { 3521 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 3522 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 3523 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 3524 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 3525 } 3526 3527 // We take a break if someone is trying to stop the world. 3528 bool ConcurrentMark::do_yield_check(uint worker_id) { 3529 if (SuspendibleThreadSet::should_yield()) { 3530 if (worker_id == 0) { 3531 _g1h->g1_policy()->record_concurrent_pause(); 3532 } 3533 SuspendibleThreadSet::yield(); 3534 return true; 3535 } else { 3536 return false; 3537 } 3538 } 3539 3540 bool ConcurrentMark::containing_card_is_marked(void* p) { 3541 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3542 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3543 } 3544 3545 bool ConcurrentMark::containing_cards_are_marked(void* start, 3546 void* last) { 3547 return containing_card_is_marked(start) && 3548 containing_card_is_marked(last); 3549 } 3550 3551 #ifndef PRODUCT 3552 // for debugging purposes 3553 void ConcurrentMark::print_finger() { 3554 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3555 p2i(_heap_start), p2i(_heap_end), p2i(_finger)); 3556 for (uint i = 0; i < _max_worker_id; ++i) { 3557 gclog_or_tty->print(" %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger())); 3558 } 3559 gclog_or_tty->cr(); 3560 } 3561 #endif 3562 3563 void CMTask::scan_object(oop obj) { 3564 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3565 3566 if (_cm->verbose_high()) { 3567 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT, 3568 _worker_id, p2i((void*) obj)); 3569 } 3570 3571 size_t obj_size = obj->size(); 3572 _words_scanned += obj_size; 3573 3574 obj->oop_iterate(_cm_oop_closure); 3575 statsOnly( ++_objs_scanned ); 3576 check_limits(); 3577 } 3578 3579 // Closure for iteration over bitmaps 3580 class CMBitMapClosure : public BitMapClosure { 3581 private: 3582 // the bitmap that is being iterated over 3583 CMBitMap* _nextMarkBitMap; 3584 ConcurrentMark* _cm; 3585 CMTask* _task; 3586 3587 public: 3588 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3589 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3590 3591 bool do_bit(size_t offset) { 3592 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3593 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3594 assert( addr < _cm->finger(), "invariant"); 3595 3596 statsOnly( _task->increase_objs_found_on_bitmap() ); 3597 assert(addr >= _task->finger(), "invariant"); 3598 3599 // We move that task's local finger along. 3600 _task->move_finger_to(addr); 3601 3602 _task->scan_object(oop(addr)); 3603 // we only partially drain the local queue and global stack 3604 _task->drain_local_queue(true); 3605 _task->drain_global_stack(true); 3606 3607 // if the has_aborted flag has been raised, we need to bail out of 3608 // the iteration 3609 return !_task->has_aborted(); 3610 } 3611 }; 3612 3613 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3614 ConcurrentMark* cm, 3615 CMTask* task) 3616 : _g1h(g1h), _cm(cm), _task(task) { 3617 assert(_ref_processor == NULL, "should be initialized to NULL"); 3618 3619 if (G1UseConcMarkReferenceProcessing) { 3620 _ref_processor = g1h->ref_processor_cm(); 3621 assert(_ref_processor != NULL, "should not be NULL"); 3622 } 3623 } 3624 3625 void CMTask::setup_for_region(HeapRegion* hr) { 3626 assert(hr != NULL, 3627 "claim_region() should have filtered out NULL regions"); 3628 assert(!hr->continuesHumongous(), 3629 "claim_region() should have filtered out continues humongous regions"); 3630 3631 if (_cm->verbose_low()) { 3632 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT, 3633 _worker_id, p2i(hr)); 3634 } 3635 3636 _curr_region = hr; 3637 _finger = hr->bottom(); 3638 update_region_limit(); 3639 } 3640 3641 void CMTask::update_region_limit() { 3642 HeapRegion* hr = _curr_region; 3643 HeapWord* bottom = hr->bottom(); 3644 HeapWord* limit = hr->next_top_at_mark_start(); 3645 3646 if (limit == bottom) { 3647 if (_cm->verbose_low()) { 3648 gclog_or_tty->print_cr("[%u] found an empty region " 3649 "["PTR_FORMAT", "PTR_FORMAT")", 3650 _worker_id, p2i(bottom), p2i(limit)); 3651 } 3652 // The region was collected underneath our feet. 3653 // We set the finger to bottom to ensure that the bitmap 3654 // iteration that will follow this will not do anything. 3655 // (this is not a condition that holds when we set the region up, 3656 // as the region is not supposed to be empty in the first place) 3657 _finger = bottom; 3658 } else if (limit >= _region_limit) { 3659 assert(limit >= _finger, "peace of mind"); 3660 } else { 3661 assert(limit < _region_limit, "only way to get here"); 3662 // This can happen under some pretty unusual circumstances. An 3663 // evacuation pause empties the region underneath our feet (NTAMS 3664 // at bottom). We then do some allocation in the region (NTAMS 3665 // stays at bottom), followed by the region being used as a GC 3666 // alloc region (NTAMS will move to top() and the objects 3667 // originally below it will be grayed). All objects now marked in 3668 // the region are explicitly grayed, if below the global finger, 3669 // and we do not need in fact to scan anything else. So, we simply 3670 // set _finger to be limit to ensure that the bitmap iteration 3671 // doesn't do anything. 3672 _finger = limit; 3673 } 3674 3675 _region_limit = limit; 3676 } 3677 3678 void CMTask::giveup_current_region() { 3679 assert(_curr_region != NULL, "invariant"); 3680 if (_cm->verbose_low()) { 3681 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT, 3682 _worker_id, p2i(_curr_region)); 3683 } 3684 clear_region_fields(); 3685 } 3686 3687 void CMTask::clear_region_fields() { 3688 // Values for these three fields that indicate that we're not 3689 // holding on to a region. 3690 _curr_region = NULL; 3691 _finger = NULL; 3692 _region_limit = NULL; 3693 } 3694 3695 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3696 if (cm_oop_closure == NULL) { 3697 assert(_cm_oop_closure != NULL, "invariant"); 3698 } else { 3699 assert(_cm_oop_closure == NULL, "invariant"); 3700 } 3701 _cm_oop_closure = cm_oop_closure; 3702 } 3703 3704 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3705 guarantee(nextMarkBitMap != NULL, "invariant"); 3706 3707 if (_cm->verbose_low()) { 3708 gclog_or_tty->print_cr("[%u] resetting", _worker_id); 3709 } 3710 3711 _nextMarkBitMap = nextMarkBitMap; 3712 clear_region_fields(); 3713 3714 _calls = 0; 3715 _elapsed_time_ms = 0.0; 3716 _termination_time_ms = 0.0; 3717 _termination_start_time_ms = 0.0; 3718 3719 #if _MARKING_STATS_ 3720 _local_pushes = 0; 3721 _local_pops = 0; 3722 _local_max_size = 0; 3723 _objs_scanned = 0; 3724 _global_pushes = 0; 3725 _global_pops = 0; 3726 _global_max_size = 0; 3727 _global_transfers_to = 0; 3728 _global_transfers_from = 0; 3729 _regions_claimed = 0; 3730 _objs_found_on_bitmap = 0; 3731 _satb_buffers_processed = 0; 3732 _steal_attempts = 0; 3733 _steals = 0; 3734 _aborted = 0; 3735 _aborted_overflow = 0; 3736 _aborted_cm_aborted = 0; 3737 _aborted_yield = 0; 3738 _aborted_timed_out = 0; 3739 _aborted_satb = 0; 3740 _aborted_termination = 0; 3741 #endif // _MARKING_STATS_ 3742 } 3743 3744 bool CMTask::should_exit_termination() { 3745 regular_clock_call(); 3746 // This is called when we are in the termination protocol. We should 3747 // quit if, for some reason, this task wants to abort or the global 3748 // stack is not empty (this means that we can get work from it). 3749 return !_cm->mark_stack_empty() || has_aborted(); 3750 } 3751 3752 void CMTask::reached_limit() { 3753 assert(_words_scanned >= _words_scanned_limit || 3754 _refs_reached >= _refs_reached_limit , 3755 "shouldn't have been called otherwise"); 3756 regular_clock_call(); 3757 } 3758 3759 void CMTask::regular_clock_call() { 3760 if (has_aborted()) return; 3761 3762 // First, we need to recalculate the words scanned and refs reached 3763 // limits for the next clock call. 3764 recalculate_limits(); 3765 3766 // During the regular clock call we do the following 3767 3768 // (1) If an overflow has been flagged, then we abort. 3769 if (_cm->has_overflown()) { 3770 set_has_aborted(); 3771 return; 3772 } 3773 3774 // If we are not concurrent (i.e. we're doing remark) we don't need 3775 // to check anything else. The other steps are only needed during 3776 // the concurrent marking phase. 3777 if (!concurrent()) return; 3778 3779 // (2) If marking has been aborted for Full GC, then we also abort. 3780 if (_cm->has_aborted()) { 3781 set_has_aborted(); 3782 statsOnly( ++_aborted_cm_aborted ); 3783 return; 3784 } 3785 3786 double curr_time_ms = os::elapsedVTime() * 1000.0; 3787 3788 // (3) If marking stats are enabled, then we update the step history. 3789 #if _MARKING_STATS_ 3790 if (_words_scanned >= _words_scanned_limit) { 3791 ++_clock_due_to_scanning; 3792 } 3793 if (_refs_reached >= _refs_reached_limit) { 3794 ++_clock_due_to_marking; 3795 } 3796 3797 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3798 _interval_start_time_ms = curr_time_ms; 3799 _all_clock_intervals_ms.add(last_interval_ms); 3800 3801 if (_cm->verbose_medium()) { 3802 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, " 3803 "scanned = %d%s, refs reached = %d%s", 3804 _worker_id, last_interval_ms, 3805 _words_scanned, 3806 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3807 _refs_reached, 3808 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3809 } 3810 #endif // _MARKING_STATS_ 3811 3812 // (4) We check whether we should yield. If we have to, then we abort. 3813 if (SuspendibleThreadSet::should_yield()) { 3814 // We should yield. To do this we abort the task. The caller is 3815 // responsible for yielding. 3816 set_has_aborted(); 3817 statsOnly( ++_aborted_yield ); 3818 return; 3819 } 3820 3821 // (5) We check whether we've reached our time quota. If we have, 3822 // then we abort. 3823 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3824 if (elapsed_time_ms > _time_target_ms) { 3825 set_has_aborted(); 3826 _has_timed_out = true; 3827 statsOnly( ++_aborted_timed_out ); 3828 return; 3829 } 3830 3831 // (6) Finally, we check whether there are enough completed STAB 3832 // buffers available for processing. If there are, we abort. 3833 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3834 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3835 if (_cm->verbose_low()) { 3836 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers", 3837 _worker_id); 3838 } 3839 // we do need to process SATB buffers, we'll abort and restart 3840 // the marking task to do so 3841 set_has_aborted(); 3842 statsOnly( ++_aborted_satb ); 3843 return; 3844 } 3845 } 3846 3847 void CMTask::recalculate_limits() { 3848 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3849 _words_scanned_limit = _real_words_scanned_limit; 3850 3851 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3852 _refs_reached_limit = _real_refs_reached_limit; 3853 } 3854 3855 void CMTask::decrease_limits() { 3856 // This is called when we believe that we're going to do an infrequent 3857 // operation which will increase the per byte scanned cost (i.e. move 3858 // entries to/from the global stack). It basically tries to decrease the 3859 // scanning limit so that the clock is called earlier. 3860 3861 if (_cm->verbose_medium()) { 3862 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id); 3863 } 3864 3865 _words_scanned_limit = _real_words_scanned_limit - 3866 3 * words_scanned_period / 4; 3867 _refs_reached_limit = _real_refs_reached_limit - 3868 3 * refs_reached_period / 4; 3869 } 3870 3871 void CMTask::move_entries_to_global_stack() { 3872 // local array where we'll store the entries that will be popped 3873 // from the local queue 3874 oop buffer[global_stack_transfer_size]; 3875 3876 int n = 0; 3877 oop obj; 3878 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3879 buffer[n] = obj; 3880 ++n; 3881 } 3882 3883 if (n > 0) { 3884 // we popped at least one entry from the local queue 3885 3886 statsOnly( ++_global_transfers_to; _local_pops += n ); 3887 3888 if (!_cm->mark_stack_push(buffer, n)) { 3889 if (_cm->verbose_low()) { 3890 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow", 3891 _worker_id); 3892 } 3893 set_has_aborted(); 3894 } else { 3895 // the transfer was successful 3896 3897 if (_cm->verbose_medium()) { 3898 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack", 3899 _worker_id, n); 3900 } 3901 statsOnly( int tmp_size = _cm->mark_stack_size(); 3902 if (tmp_size > _global_max_size) { 3903 _global_max_size = tmp_size; 3904 } 3905 _global_pushes += n ); 3906 } 3907 } 3908 3909 // this operation was quite expensive, so decrease the limits 3910 decrease_limits(); 3911 } 3912 3913 void CMTask::get_entries_from_global_stack() { 3914 // local array where we'll store the entries that will be popped 3915 // from the global stack. 3916 oop buffer[global_stack_transfer_size]; 3917 int n; 3918 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3919 assert(n <= global_stack_transfer_size, 3920 "we should not pop more than the given limit"); 3921 if (n > 0) { 3922 // yes, we did actually pop at least one entry 3923 3924 statsOnly( ++_global_transfers_from; _global_pops += n ); 3925 if (_cm->verbose_medium()) { 3926 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack", 3927 _worker_id, n); 3928 } 3929 for (int i = 0; i < n; ++i) { 3930 bool success = _task_queue->push(buffer[i]); 3931 // We only call this when the local queue is empty or under a 3932 // given target limit. So, we do not expect this push to fail. 3933 assert(success, "invariant"); 3934 } 3935 3936 statsOnly( int tmp_size = _task_queue->size(); 3937 if (tmp_size > _local_max_size) { 3938 _local_max_size = tmp_size; 3939 } 3940 _local_pushes += n ); 3941 } 3942 3943 // this operation was quite expensive, so decrease the limits 3944 decrease_limits(); 3945 } 3946 3947 void CMTask::drain_local_queue(bool partially) { 3948 if (has_aborted()) return; 3949 3950 // Decide what the target size is, depending whether we're going to 3951 // drain it partially (so that other tasks can steal if they run out 3952 // of things to do) or totally (at the very end). 3953 size_t target_size; 3954 if (partially) { 3955 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3956 } else { 3957 target_size = 0; 3958 } 3959 3960 if (_task_queue->size() > target_size) { 3961 if (_cm->verbose_high()) { 3962 gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT, 3963 _worker_id, target_size); 3964 } 3965 3966 oop obj; 3967 bool ret = _task_queue->pop_local(obj); 3968 while (ret) { 3969 statsOnly( ++_local_pops ); 3970 3971 if (_cm->verbose_high()) { 3972 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id, 3973 p2i((void*) obj)); 3974 } 3975 3976 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3977 assert(!_g1h->is_on_master_free_list( 3978 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3979 3980 scan_object(obj); 3981 3982 if (_task_queue->size() <= target_size || has_aborted()) { 3983 ret = false; 3984 } else { 3985 ret = _task_queue->pop_local(obj); 3986 } 3987 } 3988 3989 if (_cm->verbose_high()) { 3990 gclog_or_tty->print_cr("[%u] drained local queue, size = %u", 3991 _worker_id, _task_queue->size()); 3992 } 3993 } 3994 } 3995 3996 void CMTask::drain_global_stack(bool partially) { 3997 if (has_aborted()) return; 3998 3999 // We have a policy to drain the local queue before we attempt to 4000 // drain the global stack. 4001 assert(partially || _task_queue->size() == 0, "invariant"); 4002 4003 // Decide what the target size is, depending whether we're going to 4004 // drain it partially (so that other tasks can steal if they run out 4005 // of things to do) or totally (at the very end). Notice that, 4006 // because we move entries from the global stack in chunks or 4007 // because another task might be doing the same, we might in fact 4008 // drop below the target. But, this is not a problem. 4009 size_t target_size; 4010 if (partially) { 4011 target_size = _cm->partial_mark_stack_size_target(); 4012 } else { 4013 target_size = 0; 4014 } 4015 4016 if (_cm->mark_stack_size() > target_size) { 4017 if (_cm->verbose_low()) { 4018 gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT, 4019 _worker_id, target_size); 4020 } 4021 4022 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 4023 get_entries_from_global_stack(); 4024 drain_local_queue(partially); 4025 } 4026 4027 if (_cm->verbose_low()) { 4028 gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT, 4029 _worker_id, _cm->mark_stack_size()); 4030 } 4031 } 4032 } 4033 4034 // SATB Queue has several assumptions on whether to call the par or 4035 // non-par versions of the methods. this is why some of the code is 4036 // replicated. We should really get rid of the single-threaded version 4037 // of the code to simplify things. 4038 void CMTask::drain_satb_buffers() { 4039 if (has_aborted()) return; 4040 4041 // We set this so that the regular clock knows that we're in the 4042 // middle of draining buffers and doesn't set the abort flag when it 4043 // notices that SATB buffers are available for draining. It'd be 4044 // very counter productive if it did that. :-) 4045 _draining_satb_buffers = true; 4046 4047 CMObjectClosure oc(this); 4048 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 4049 if (G1CollectedHeap::use_parallel_gc_threads()) { 4050 satb_mq_set.set_par_closure(_worker_id, &oc); 4051 } else { 4052 satb_mq_set.set_closure(&oc); 4053 } 4054 4055 // This keeps claiming and applying the closure to completed buffers 4056 // until we run out of buffers or we need to abort. 4057 if (G1CollectedHeap::use_parallel_gc_threads()) { 4058 while (!has_aborted() && 4059 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) { 4060 if (_cm->verbose_medium()) { 4061 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 4062 } 4063 statsOnly( ++_satb_buffers_processed ); 4064 regular_clock_call(); 4065 } 4066 } else { 4067 while (!has_aborted() && 4068 satb_mq_set.apply_closure_to_completed_buffer()) { 4069 if (_cm->verbose_medium()) { 4070 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 4071 } 4072 statsOnly( ++_satb_buffers_processed ); 4073 regular_clock_call(); 4074 } 4075 } 4076 4077 _draining_satb_buffers = false; 4078 4079 assert(has_aborted() || 4080 concurrent() || 4081 satb_mq_set.completed_buffers_num() == 0, "invariant"); 4082 4083 if (G1CollectedHeap::use_parallel_gc_threads()) { 4084 satb_mq_set.set_par_closure(_worker_id, NULL); 4085 } else { 4086 satb_mq_set.set_closure(NULL); 4087 } 4088 4089 // again, this was a potentially expensive operation, decrease the 4090 // limits to get the regular clock call early 4091 decrease_limits(); 4092 } 4093 4094 void CMTask::print_stats() { 4095 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 4096 _worker_id, _calls); 4097 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 4098 _elapsed_time_ms, _termination_time_ms); 4099 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4100 _step_times_ms.num(), _step_times_ms.avg(), 4101 _step_times_ms.sd()); 4102 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4103 _step_times_ms.maximum(), _step_times_ms.sum()); 4104 4105 #if _MARKING_STATS_ 4106 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4107 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 4108 _all_clock_intervals_ms.sd()); 4109 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4110 _all_clock_intervals_ms.maximum(), 4111 _all_clock_intervals_ms.sum()); 4112 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 4113 _clock_due_to_scanning, _clock_due_to_marking); 4114 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 4115 _objs_scanned, _objs_found_on_bitmap); 4116 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 4117 _local_pushes, _local_pops, _local_max_size); 4118 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 4119 _global_pushes, _global_pops, _global_max_size); 4120 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 4121 _global_transfers_to,_global_transfers_from); 4122 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 4123 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 4124 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 4125 _steal_attempts, _steals); 4126 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 4127 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 4128 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 4129 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 4130 _aborted_timed_out, _aborted_satb, _aborted_termination); 4131 #endif // _MARKING_STATS_ 4132 } 4133 4134 /***************************************************************************** 4135 4136 The do_marking_step(time_target_ms, ...) method is the building 4137 block of the parallel marking framework. It can be called in parallel 4138 with other invocations of do_marking_step() on different tasks 4139 (but only one per task, obviously) and concurrently with the 4140 mutator threads, or during remark, hence it eliminates the need 4141 for two versions of the code. When called during remark, it will 4142 pick up from where the task left off during the concurrent marking 4143 phase. Interestingly, tasks are also claimable during evacuation 4144 pauses too, since do_marking_step() ensures that it aborts before 4145 it needs to yield. 4146 4147 The data structures that it uses to do marking work are the 4148 following: 4149 4150 (1) Marking Bitmap. If there are gray objects that appear only 4151 on the bitmap (this happens either when dealing with an overflow 4152 or when the initial marking phase has simply marked the roots 4153 and didn't push them on the stack), then tasks claim heap 4154 regions whose bitmap they then scan to find gray objects. A 4155 global finger indicates where the end of the last claimed region 4156 is. A local finger indicates how far into the region a task has 4157 scanned. The two fingers are used to determine how to gray an 4158 object (i.e. whether simply marking it is OK, as it will be 4159 visited by a task in the future, or whether it needs to be also 4160 pushed on a stack). 4161 4162 (2) Local Queue. The local queue of the task which is accessed 4163 reasonably efficiently by the task. Other tasks can steal from 4164 it when they run out of work. Throughout the marking phase, a 4165 task attempts to keep its local queue short but not totally 4166 empty, so that entries are available for stealing by other 4167 tasks. Only when there is no more work, a task will totally 4168 drain its local queue. 4169 4170 (3) Global Mark Stack. This handles local queue overflow. During 4171 marking only sets of entries are moved between it and the local 4172 queues, as access to it requires a mutex and more fine-grain 4173 interaction with it which might cause contention. If it 4174 overflows, then the marking phase should restart and iterate 4175 over the bitmap to identify gray objects. Throughout the marking 4176 phase, tasks attempt to keep the global mark stack at a small 4177 length but not totally empty, so that entries are available for 4178 popping by other tasks. Only when there is no more work, tasks 4179 will totally drain the global mark stack. 4180 4181 (4) SATB Buffer Queue. This is where completed SATB buffers are 4182 made available. Buffers are regularly removed from this queue 4183 and scanned for roots, so that the queue doesn't get too 4184 long. During remark, all completed buffers are processed, as 4185 well as the filled in parts of any uncompleted buffers. 4186 4187 The do_marking_step() method tries to abort when the time target 4188 has been reached. There are a few other cases when the 4189 do_marking_step() method also aborts: 4190 4191 (1) When the marking phase has been aborted (after a Full GC). 4192 4193 (2) When a global overflow (on the global stack) has been 4194 triggered. Before the task aborts, it will actually sync up with 4195 the other tasks to ensure that all the marking data structures 4196 (local queues, stacks, fingers etc.) are re-initialized so that 4197 when do_marking_step() completes, the marking phase can 4198 immediately restart. 4199 4200 (3) When enough completed SATB buffers are available. The 4201 do_marking_step() method only tries to drain SATB buffers right 4202 at the beginning. So, if enough buffers are available, the 4203 marking step aborts and the SATB buffers are processed at 4204 the beginning of the next invocation. 4205 4206 (4) To yield. when we have to yield then we abort and yield 4207 right at the end of do_marking_step(). This saves us from a lot 4208 of hassle as, by yielding we might allow a Full GC. If this 4209 happens then objects will be compacted underneath our feet, the 4210 heap might shrink, etc. We save checking for this by just 4211 aborting and doing the yield right at the end. 4212 4213 From the above it follows that the do_marking_step() method should 4214 be called in a loop (or, otherwise, regularly) until it completes. 4215 4216 If a marking step completes without its has_aborted() flag being 4217 true, it means it has completed the current marking phase (and 4218 also all other marking tasks have done so and have all synced up). 4219 4220 A method called regular_clock_call() is invoked "regularly" (in 4221 sub ms intervals) throughout marking. It is this clock method that 4222 checks all the abort conditions which were mentioned above and 4223 decides when the task should abort. A work-based scheme is used to 4224 trigger this clock method: when the number of object words the 4225 marking phase has scanned or the number of references the marking 4226 phase has visited reach a given limit. Additional invocations to 4227 the method clock have been planted in a few other strategic places 4228 too. The initial reason for the clock method was to avoid calling 4229 vtime too regularly, as it is quite expensive. So, once it was in 4230 place, it was natural to piggy-back all the other conditions on it 4231 too and not constantly check them throughout the code. 4232 4233 If do_termination is true then do_marking_step will enter its 4234 termination protocol. 4235 4236 The value of is_serial must be true when do_marking_step is being 4237 called serially (i.e. by the VMThread) and do_marking_step should 4238 skip any synchronization in the termination and overflow code. 4239 Examples include the serial remark code and the serial reference 4240 processing closures. 4241 4242 The value of is_serial must be false when do_marking_step is 4243 being called by any of the worker threads in a work gang. 4244 Examples include the concurrent marking code (CMMarkingTask), 4245 the MT remark code, and the MT reference processing closures. 4246 4247 *****************************************************************************/ 4248 4249 void CMTask::do_marking_step(double time_target_ms, 4250 bool do_termination, 4251 bool is_serial) { 4252 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 4253 assert(concurrent() == _cm->concurrent(), "they should be the same"); 4254 4255 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4256 assert(_task_queues != NULL, "invariant"); 4257 assert(_task_queue != NULL, "invariant"); 4258 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 4259 4260 assert(!_claimed, 4261 "only one thread should claim this task at any one time"); 4262 4263 // OK, this doesn't safeguard again all possible scenarios, as it is 4264 // possible for two threads to set the _claimed flag at the same 4265 // time. But it is only for debugging purposes anyway and it will 4266 // catch most problems. 4267 _claimed = true; 4268 4269 _start_time_ms = os::elapsedVTime() * 1000.0; 4270 statsOnly( _interval_start_time_ms = _start_time_ms ); 4271 4272 // If do_stealing is true then do_marking_step will attempt to 4273 // steal work from the other CMTasks. It only makes sense to 4274 // enable stealing when the termination protocol is enabled 4275 // and do_marking_step() is not being called serially. 4276 bool do_stealing = do_termination && !is_serial; 4277 4278 double diff_prediction_ms = 4279 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4280 _time_target_ms = time_target_ms - diff_prediction_ms; 4281 4282 // set up the variables that are used in the work-based scheme to 4283 // call the regular clock method 4284 _words_scanned = 0; 4285 _refs_reached = 0; 4286 recalculate_limits(); 4287 4288 // clear all flags 4289 clear_has_aborted(); 4290 _has_timed_out = false; 4291 _draining_satb_buffers = false; 4292 4293 ++_calls; 4294 4295 if (_cm->verbose_low()) { 4296 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, " 4297 "target = %1.2lfms >>>>>>>>>>", 4298 _worker_id, _calls, _time_target_ms); 4299 } 4300 4301 // Set up the bitmap and oop closures. Anything that uses them is 4302 // eventually called from this method, so it is OK to allocate these 4303 // statically. 4304 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4305 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 4306 set_cm_oop_closure(&cm_oop_closure); 4307 4308 if (_cm->has_overflown()) { 4309 // This can happen if the mark stack overflows during a GC pause 4310 // and this task, after a yield point, restarts. We have to abort 4311 // as we need to get into the overflow protocol which happens 4312 // right at the end of this task. 4313 set_has_aborted(); 4314 } 4315 4316 // First drain any available SATB buffers. After this, we will not 4317 // look at SATB buffers before the next invocation of this method. 4318 // If enough completed SATB buffers are queued up, the regular clock 4319 // will abort this task so that it restarts. 4320 drain_satb_buffers(); 4321 // ...then partially drain the local queue and the global stack 4322 drain_local_queue(true); 4323 drain_global_stack(true); 4324 4325 do { 4326 if (!has_aborted() && _curr_region != NULL) { 4327 // This means that we're already holding on to a region. 4328 assert(_finger != NULL, "if region is not NULL, then the finger " 4329 "should not be NULL either"); 4330 4331 // We might have restarted this task after an evacuation pause 4332 // which might have evacuated the region we're holding on to 4333 // underneath our feet. Let's read its limit again to make sure 4334 // that we do not iterate over a region of the heap that 4335 // contains garbage (update_region_limit() will also move 4336 // _finger to the start of the region if it is found empty). 4337 update_region_limit(); 4338 // We will start from _finger not from the start of the region, 4339 // as we might be restarting this task after aborting half-way 4340 // through scanning this region. In this case, _finger points to 4341 // the address where we last found a marked object. If this is a 4342 // fresh region, _finger points to start(). 4343 MemRegion mr = MemRegion(_finger, _region_limit); 4344 4345 if (_cm->verbose_low()) { 4346 gclog_or_tty->print_cr("[%u] we're scanning part " 4347 "["PTR_FORMAT", "PTR_FORMAT") " 4348 "of region "HR_FORMAT, 4349 _worker_id, p2i(_finger), p2i(_region_limit), 4350 HR_FORMAT_PARAMS(_curr_region)); 4351 } 4352 4353 assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(), 4354 "humongous regions should go around loop once only"); 4355 4356 // Some special cases: 4357 // If the memory region is empty, we can just give up the region. 4358 // If the current region is humongous then we only need to check 4359 // the bitmap for the bit associated with the start of the object, 4360 // scan the object if it's live, and give up the region. 4361 // Otherwise, let's iterate over the bitmap of the part of the region 4362 // that is left. 4363 // If the iteration is successful, give up the region. 4364 if (mr.is_empty()) { 4365 giveup_current_region(); 4366 regular_clock_call(); 4367 } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) { 4368 if (_nextMarkBitMap->isMarked(mr.start())) { 4369 // The object is marked - apply the closure 4370 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 4371 bitmap_closure.do_bit(offset); 4372 } 4373 // Even if this task aborted while scanning the humongous object 4374 // we can (and should) give up the current region. 4375 giveup_current_region(); 4376 regular_clock_call(); 4377 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4378 giveup_current_region(); 4379 regular_clock_call(); 4380 } else { 4381 assert(has_aborted(), "currently the only way to do so"); 4382 // The only way to abort the bitmap iteration is to return 4383 // false from the do_bit() method. However, inside the 4384 // do_bit() method we move the _finger to point to the 4385 // object currently being looked at. So, if we bail out, we 4386 // have definitely set _finger to something non-null. 4387 assert(_finger != NULL, "invariant"); 4388 4389 // Region iteration was actually aborted. So now _finger 4390 // points to the address of the object we last scanned. If we 4391 // leave it there, when we restart this task, we will rescan 4392 // the object. It is easy to avoid this. We move the finger by 4393 // enough to point to the next possible object header (the 4394 // bitmap knows by how much we need to move it as it knows its 4395 // granularity). 4396 assert(_finger < _region_limit, "invariant"); 4397 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 4398 // Check if bitmap iteration was aborted while scanning the last object 4399 if (new_finger >= _region_limit) { 4400 giveup_current_region(); 4401 } else { 4402 move_finger_to(new_finger); 4403 } 4404 } 4405 } 4406 // At this point we have either completed iterating over the 4407 // region we were holding on to, or we have aborted. 4408 4409 // We then partially drain the local queue and the global stack. 4410 // (Do we really need this?) 4411 drain_local_queue(true); 4412 drain_global_stack(true); 4413 4414 // Read the note on the claim_region() method on why it might 4415 // return NULL with potentially more regions available for 4416 // claiming and why we have to check out_of_regions() to determine 4417 // whether we're done or not. 4418 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4419 // We are going to try to claim a new region. We should have 4420 // given up on the previous one. 4421 // Separated the asserts so that we know which one fires. 4422 assert(_curr_region == NULL, "invariant"); 4423 assert(_finger == NULL, "invariant"); 4424 assert(_region_limit == NULL, "invariant"); 4425 if (_cm->verbose_low()) { 4426 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id); 4427 } 4428 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 4429 if (claimed_region != NULL) { 4430 // Yes, we managed to claim one 4431 statsOnly( ++_regions_claimed ); 4432 4433 if (_cm->verbose_low()) { 4434 gclog_or_tty->print_cr("[%u] we successfully claimed " 4435 "region "PTR_FORMAT, 4436 _worker_id, p2i(claimed_region)); 4437 } 4438 4439 setup_for_region(claimed_region); 4440 assert(_curr_region == claimed_region, "invariant"); 4441 } 4442 // It is important to call the regular clock here. It might take 4443 // a while to claim a region if, for example, we hit a large 4444 // block of empty regions. So we need to call the regular clock 4445 // method once round the loop to make sure it's called 4446 // frequently enough. 4447 regular_clock_call(); 4448 } 4449 4450 if (!has_aborted() && _curr_region == NULL) { 4451 assert(_cm->out_of_regions(), 4452 "at this point we should be out of regions"); 4453 } 4454 } while ( _curr_region != NULL && !has_aborted()); 4455 4456 if (!has_aborted()) { 4457 // We cannot check whether the global stack is empty, since other 4458 // tasks might be pushing objects to it concurrently. 4459 assert(_cm->out_of_regions(), 4460 "at this point we should be out of regions"); 4461 4462 if (_cm->verbose_low()) { 4463 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id); 4464 } 4465 4466 // Try to reduce the number of available SATB buffers so that 4467 // remark has less work to do. 4468 drain_satb_buffers(); 4469 } 4470 4471 // Since we've done everything else, we can now totally drain the 4472 // local queue and global stack. 4473 drain_local_queue(false); 4474 drain_global_stack(false); 4475 4476 // Attempt at work stealing from other task's queues. 4477 if (do_stealing && !has_aborted()) { 4478 // We have not aborted. This means that we have finished all that 4479 // we could. Let's try to do some stealing... 4480 4481 // We cannot check whether the global stack is empty, since other 4482 // tasks might be pushing objects to it concurrently. 4483 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4484 "only way to reach here"); 4485 4486 if (_cm->verbose_low()) { 4487 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id); 4488 } 4489 4490 while (!has_aborted()) { 4491 oop obj; 4492 statsOnly( ++_steal_attempts ); 4493 4494 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 4495 if (_cm->verbose_medium()) { 4496 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully", 4497 _worker_id, p2i((void*) obj)); 4498 } 4499 4500 statsOnly( ++_steals ); 4501 4502 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4503 "any stolen object should be marked"); 4504 scan_object(obj); 4505 4506 // And since we're towards the end, let's totally drain the 4507 // local queue and global stack. 4508 drain_local_queue(false); 4509 drain_global_stack(false); 4510 } else { 4511 break; 4512 } 4513 } 4514 } 4515 4516 // If we are about to wrap up and go into termination, check if we 4517 // should raise the overflow flag. 4518 if (do_termination && !has_aborted()) { 4519 if (_cm->force_overflow()->should_force()) { 4520 _cm->set_has_overflown(); 4521 regular_clock_call(); 4522 } 4523 } 4524 4525 // We still haven't aborted. Now, let's try to get into the 4526 // termination protocol. 4527 if (do_termination && !has_aborted()) { 4528 // We cannot check whether the global stack is empty, since other 4529 // tasks might be concurrently pushing objects on it. 4530 // Separated the asserts so that we know which one fires. 4531 assert(_cm->out_of_regions(), "only way to reach here"); 4532 assert(_task_queue->size() == 0, "only way to reach here"); 4533 4534 if (_cm->verbose_low()) { 4535 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id); 4536 } 4537 4538 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4539 4540 // The CMTask class also extends the TerminatorTerminator class, 4541 // hence its should_exit_termination() method will also decide 4542 // whether to exit the termination protocol or not. 4543 bool finished = (is_serial || 4544 _cm->terminator()->offer_termination(this)); 4545 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4546 _termination_time_ms += 4547 termination_end_time_ms - _termination_start_time_ms; 4548 4549 if (finished) { 4550 // We're all done. 4551 4552 if (_worker_id == 0) { 4553 // let's allow task 0 to do this 4554 if (concurrent()) { 4555 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4556 // we need to set this to false before the next 4557 // safepoint. This way we ensure that the marking phase 4558 // doesn't observe any more heap expansions. 4559 _cm->clear_concurrent_marking_in_progress(); 4560 } 4561 } 4562 4563 // We can now guarantee that the global stack is empty, since 4564 // all other tasks have finished. We separated the guarantees so 4565 // that, if a condition is false, we can immediately find out 4566 // which one. 4567 guarantee(_cm->out_of_regions(), "only way to reach here"); 4568 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4569 guarantee(_task_queue->size() == 0, "only way to reach here"); 4570 guarantee(!_cm->has_overflown(), "only way to reach here"); 4571 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4572 4573 if (_cm->verbose_low()) { 4574 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id); 4575 } 4576 } else { 4577 // Apparently there's more work to do. Let's abort this task. It 4578 // will restart it and we can hopefully find more things to do. 4579 4580 if (_cm->verbose_low()) { 4581 gclog_or_tty->print_cr("[%u] apparently there is more work to do", 4582 _worker_id); 4583 } 4584 4585 set_has_aborted(); 4586 statsOnly( ++_aborted_termination ); 4587 } 4588 } 4589 4590 // Mainly for debugging purposes to make sure that a pointer to the 4591 // closure which was statically allocated in this frame doesn't 4592 // escape it by accident. 4593 set_cm_oop_closure(NULL); 4594 double end_time_ms = os::elapsedVTime() * 1000.0; 4595 double elapsed_time_ms = end_time_ms - _start_time_ms; 4596 // Update the step history. 4597 _step_times_ms.add(elapsed_time_ms); 4598 4599 if (has_aborted()) { 4600 // The task was aborted for some reason. 4601 4602 statsOnly( ++_aborted ); 4603 4604 if (_has_timed_out) { 4605 double diff_ms = elapsed_time_ms - _time_target_ms; 4606 // Keep statistics of how well we did with respect to hitting 4607 // our target only if we actually timed out (if we aborted for 4608 // other reasons, then the results might get skewed). 4609 _marking_step_diffs_ms.add(diff_ms); 4610 } 4611 4612 if (_cm->has_overflown()) { 4613 // This is the interesting one. We aborted because a global 4614 // overflow was raised. This means we have to restart the 4615 // marking phase and start iterating over regions. However, in 4616 // order to do this we have to make sure that all tasks stop 4617 // what they are doing and re-initialize in a safe manner. We 4618 // will achieve this with the use of two barrier sync points. 4619 4620 if (_cm->verbose_low()) { 4621 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id); 4622 } 4623 4624 if (!is_serial) { 4625 // We only need to enter the sync barrier if being called 4626 // from a parallel context 4627 _cm->enter_first_sync_barrier(_worker_id); 4628 4629 // When we exit this sync barrier we know that all tasks have 4630 // stopped doing marking work. So, it's now safe to 4631 // re-initialize our data structures. At the end of this method, 4632 // task 0 will clear the global data structures. 4633 } 4634 4635 statsOnly( ++_aborted_overflow ); 4636 4637 // We clear the local state of this task... 4638 clear_region_fields(); 4639 4640 if (!is_serial) { 4641 // ...and enter the second barrier. 4642 _cm->enter_second_sync_barrier(_worker_id); 4643 } 4644 // At this point, if we're during the concurrent phase of 4645 // marking, everything has been re-initialized and we're 4646 // ready to restart. 4647 } 4648 4649 if (_cm->verbose_low()) { 4650 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4651 "elapsed = %1.2lfms <<<<<<<<<<", 4652 _worker_id, _time_target_ms, elapsed_time_ms); 4653 if (_cm->has_aborted()) { 4654 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========", 4655 _worker_id); 4656 } 4657 } 4658 } else { 4659 if (_cm->verbose_low()) { 4660 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4661 "elapsed = %1.2lfms <<<<<<<<<<", 4662 _worker_id, _time_target_ms, elapsed_time_ms); 4663 } 4664 } 4665 4666 _claimed = false; 4667 } 4668 4669 CMTask::CMTask(uint worker_id, 4670 ConcurrentMark* cm, 4671 size_t* marked_bytes, 4672 BitMap* card_bm, 4673 CMTaskQueue* task_queue, 4674 CMTaskQueueSet* task_queues) 4675 : _g1h(G1CollectedHeap::heap()), 4676 _worker_id(worker_id), _cm(cm), 4677 _claimed(false), 4678 _nextMarkBitMap(NULL), _hash_seed(17), 4679 _task_queue(task_queue), 4680 _task_queues(task_queues), 4681 _cm_oop_closure(NULL), 4682 _marked_bytes_array(marked_bytes), 4683 _card_bm(card_bm) { 4684 guarantee(task_queue != NULL, "invariant"); 4685 guarantee(task_queues != NULL, "invariant"); 4686 4687 statsOnly( _clock_due_to_scanning = 0; 4688 _clock_due_to_marking = 0 ); 4689 4690 _marking_step_diffs_ms.add(0.5); 4691 } 4692 4693 // These are formatting macros that are used below to ensure 4694 // consistent formatting. The *_H_* versions are used to format the 4695 // header for a particular value and they should be kept consistent 4696 // with the corresponding macro. Also note that most of the macros add 4697 // the necessary white space (as a prefix) which makes them a bit 4698 // easier to compose. 4699 4700 // All the output lines are prefixed with this string to be able to 4701 // identify them easily in a large log file. 4702 #define G1PPRL_LINE_PREFIX "###" 4703 4704 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4705 #ifdef _LP64 4706 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4707 #else // _LP64 4708 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4709 #endif // _LP64 4710 4711 // For per-region info 4712 #define G1PPRL_TYPE_FORMAT " %-4s" 4713 #define G1PPRL_TYPE_H_FORMAT " %4s" 4714 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4715 #define G1PPRL_BYTE_H_FORMAT " %9s" 4716 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4717 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4718 4719 // For summary info 4720 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4721 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4722 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4723 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4724 4725 G1PrintRegionLivenessInfoClosure:: 4726 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4727 : _out(out), 4728 _total_used_bytes(0), _total_capacity_bytes(0), 4729 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4730 _hum_used_bytes(0), _hum_capacity_bytes(0), 4731 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 4732 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 4733 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4734 MemRegion g1_reserved = g1h->g1_reserved(); 4735 double now = os::elapsedTime(); 4736 4737 // Print the header of the output. 4738 _out->cr(); 4739 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4740 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4741 G1PPRL_SUM_ADDR_FORMAT("reserved") 4742 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4743 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 4744 HeapRegion::GrainBytes); 4745 _out->print_cr(G1PPRL_LINE_PREFIX); 4746 _out->print_cr(G1PPRL_LINE_PREFIX 4747 G1PPRL_TYPE_H_FORMAT 4748 G1PPRL_ADDR_BASE_H_FORMAT 4749 G1PPRL_BYTE_H_FORMAT 4750 G1PPRL_BYTE_H_FORMAT 4751 G1PPRL_BYTE_H_FORMAT 4752 G1PPRL_DOUBLE_H_FORMAT 4753 G1PPRL_BYTE_H_FORMAT 4754 G1PPRL_BYTE_H_FORMAT, 4755 "type", "address-range", 4756 "used", "prev-live", "next-live", "gc-eff", 4757 "remset", "code-roots"); 4758 _out->print_cr(G1PPRL_LINE_PREFIX 4759 G1PPRL_TYPE_H_FORMAT 4760 G1PPRL_ADDR_BASE_H_FORMAT 4761 G1PPRL_BYTE_H_FORMAT 4762 G1PPRL_BYTE_H_FORMAT 4763 G1PPRL_BYTE_H_FORMAT 4764 G1PPRL_DOUBLE_H_FORMAT 4765 G1PPRL_BYTE_H_FORMAT 4766 G1PPRL_BYTE_H_FORMAT, 4767 "", "", 4768 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 4769 "(bytes)", "(bytes)"); 4770 } 4771 4772 // It takes as a parameter a reference to one of the _hum_* fields, it 4773 // deduces the corresponding value for a region in a humongous region 4774 // series (either the region size, or what's left if the _hum_* field 4775 // is < the region size), and updates the _hum_* field accordingly. 4776 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4777 size_t bytes = 0; 4778 // The > 0 check is to deal with the prev and next live bytes which 4779 // could be 0. 4780 if (*hum_bytes > 0) { 4781 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4782 *hum_bytes -= bytes; 4783 } 4784 return bytes; 4785 } 4786 4787 // It deduces the values for a region in a humongous region series 4788 // from the _hum_* fields and updates those accordingly. It assumes 4789 // that that _hum_* fields have already been set up from the "starts 4790 // humongous" region and we visit the regions in address order. 4791 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4792 size_t* capacity_bytes, 4793 size_t* prev_live_bytes, 4794 size_t* next_live_bytes) { 4795 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4796 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4797 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4798 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4799 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4800 } 4801 4802 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4803 const char* type = ""; 4804 HeapWord* bottom = r->bottom(); 4805 HeapWord* end = r->end(); 4806 size_t capacity_bytes = r->capacity(); 4807 size_t used_bytes = r->used(); 4808 size_t prev_live_bytes = r->live_bytes(); 4809 size_t next_live_bytes = r->next_live_bytes(); 4810 double gc_eff = r->gc_efficiency(); 4811 size_t remset_bytes = r->rem_set()->mem_size(); 4812 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 4813 4814 if (r->used() == 0) { 4815 type = "FREE"; 4816 } else if (r->is_survivor()) { 4817 type = "SURV"; 4818 } else if (r->is_young()) { 4819 type = "EDEN"; 4820 } else if (r->startsHumongous()) { 4821 type = "HUMS"; 4822 4823 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4824 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4825 "they should have been zeroed after the last time we used them"); 4826 // Set up the _hum_* fields. 4827 _hum_capacity_bytes = capacity_bytes; 4828 _hum_used_bytes = used_bytes; 4829 _hum_prev_live_bytes = prev_live_bytes; 4830 _hum_next_live_bytes = next_live_bytes; 4831 get_hum_bytes(&used_bytes, &capacity_bytes, 4832 &prev_live_bytes, &next_live_bytes); 4833 end = bottom + HeapRegion::GrainWords; 4834 } else if (r->continuesHumongous()) { 4835 type = "HUMC"; 4836 get_hum_bytes(&used_bytes, &capacity_bytes, 4837 &prev_live_bytes, &next_live_bytes); 4838 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4839 } else { 4840 type = "OLD"; 4841 } 4842 4843 _total_used_bytes += used_bytes; 4844 _total_capacity_bytes += capacity_bytes; 4845 _total_prev_live_bytes += prev_live_bytes; 4846 _total_next_live_bytes += next_live_bytes; 4847 _total_remset_bytes += remset_bytes; 4848 _total_strong_code_roots_bytes += strong_code_roots_bytes; 4849 4850 // Print a line for this particular region. 4851 _out->print_cr(G1PPRL_LINE_PREFIX 4852 G1PPRL_TYPE_FORMAT 4853 G1PPRL_ADDR_BASE_FORMAT 4854 G1PPRL_BYTE_FORMAT 4855 G1PPRL_BYTE_FORMAT 4856 G1PPRL_BYTE_FORMAT 4857 G1PPRL_DOUBLE_FORMAT 4858 G1PPRL_BYTE_FORMAT 4859 G1PPRL_BYTE_FORMAT, 4860 type, p2i(bottom), p2i(end), 4861 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 4862 remset_bytes, strong_code_roots_bytes); 4863 4864 return false; 4865 } 4866 4867 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4868 // add static memory usages to remembered set sizes 4869 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 4870 // Print the footer of the output. 4871 _out->print_cr(G1PPRL_LINE_PREFIX); 4872 _out->print_cr(G1PPRL_LINE_PREFIX 4873 " SUMMARY" 4874 G1PPRL_SUM_MB_FORMAT("capacity") 4875 G1PPRL_SUM_MB_PERC_FORMAT("used") 4876 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4877 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 4878 G1PPRL_SUM_MB_FORMAT("remset") 4879 G1PPRL_SUM_MB_FORMAT("code-roots"), 4880 bytes_to_mb(_total_capacity_bytes), 4881 bytes_to_mb(_total_used_bytes), 4882 perc(_total_used_bytes, _total_capacity_bytes), 4883 bytes_to_mb(_total_prev_live_bytes), 4884 perc(_total_prev_live_bytes, _total_capacity_bytes), 4885 bytes_to_mb(_total_next_live_bytes), 4886 perc(_total_next_live_bytes, _total_capacity_bytes), 4887 bytes_to_mb(_total_remset_bytes), 4888 bytes_to_mb(_total_strong_code_roots_bytes)); 4889 _out->cr(); 4890 }