1 /* 2 * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMark.inline.hpp" 30 #include "gc/g1/concurrentMarkThread.inline.hpp" 31 #include "gc/g1/g1CollectedHeap.inline.hpp" 32 #include "gc/g1/g1CollectorPolicy.hpp" 33 #include "gc/g1/g1CollectorState.hpp" 34 #include "gc/g1/g1ErgoVerbose.hpp" 35 #include "gc/g1/g1Log.hpp" 36 #include "gc/g1/g1OopClosures.inline.hpp" 37 #include "gc/g1/g1RemSet.hpp" 38 #include "gc/g1/g1StringDedup.hpp" 39 #include "gc/g1/heapRegion.inline.hpp" 40 #include "gc/g1/heapRegionManager.inline.hpp" 41 #include "gc/g1/heapRegionRemSet.hpp" 42 #include "gc/g1/heapRegionSet.inline.hpp" 43 #include "gc/g1/suspendibleThreadSet.hpp" 44 #include "gc/shared/gcId.hpp" 45 #include "gc/shared/gcTimer.hpp" 46 #include "gc/shared/gcTrace.hpp" 47 #include "gc/shared/gcTraceTime.hpp" 48 #include "gc/shared/genOopClosures.inline.hpp" 49 #include "gc/shared/referencePolicy.hpp" 50 #include "gc/shared/strongRootsScope.hpp" 51 #include "gc/shared/taskqueue.inline.hpp" 52 #include "gc/shared/vmGCOperations.hpp" 53 #include "memory/allocation.hpp" 54 #include "memory/resourceArea.hpp" 55 #include "oops/oop.inline.hpp" 56 #include "runtime/atomic.inline.hpp" 57 #include "runtime/handles.inline.hpp" 58 #include "runtime/java.hpp" 59 #include "runtime/prefetch.inline.hpp" 60 #include "services/memTracker.hpp" 61 62 // Concurrent marking bit map wrapper 63 64 CMBitMapRO::CMBitMapRO(int shifter) : 65 _bm(), 66 _shifter(shifter) { 67 _bmStartWord = 0; 68 _bmWordSize = 0; 69 } 70 71 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 72 const HeapWord* limit) const { 73 // First we must round addr *up* to a possible object boundary. 74 addr = (HeapWord*)align_size_up((intptr_t)addr, 75 HeapWordSize << _shifter); 76 size_t addrOffset = heapWordToOffset(addr); 77 if (limit == NULL) { 78 limit = _bmStartWord + _bmWordSize; 79 } 80 size_t limitOffset = heapWordToOffset(limit); 81 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 82 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 83 assert(nextAddr >= addr, "get_next_one postcondition"); 84 assert(nextAddr == limit || isMarked(nextAddr), 85 "get_next_one postcondition"); 86 return nextAddr; 87 } 88 89 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr, 90 const HeapWord* limit) const { 91 size_t addrOffset = heapWordToOffset(addr); 92 if (limit == NULL) { 93 limit = _bmStartWord + _bmWordSize; 94 } 95 size_t limitOffset = heapWordToOffset(limit); 96 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 97 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 98 assert(nextAddr >= addr, "get_next_one postcondition"); 99 assert(nextAddr == limit || !isMarked(nextAddr), 100 "get_next_one postcondition"); 101 return nextAddr; 102 } 103 104 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 105 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 106 return (int) (diff >> _shifter); 107 } 108 109 #ifndef PRODUCT 110 bool CMBitMapRO::covers(MemRegion heap_rs) const { 111 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 112 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 113 "size inconsistency"); 114 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 115 _bmWordSize == heap_rs.word_size(); 116 } 117 #endif 118 119 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 120 _bm.print_on_error(st, prefix); 121 } 122 123 size_t CMBitMap::compute_size(size_t heap_size) { 124 return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); 125 } 126 127 size_t CMBitMap::mark_distance() { 128 return MinObjAlignmentInBytes * BitsPerByte; 129 } 130 131 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 132 _bmStartWord = heap.start(); 133 _bmWordSize = heap.word_size(); 134 135 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start()); 136 _bm.set_size(_bmWordSize >> _shifter); 137 138 storage->set_mapping_changed_listener(&_listener); 139 } 140 141 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 142 if (zero_filled) { 143 return; 144 } 145 // We need to clear the bitmap on commit, removing any existing information. 146 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 147 _bm->clearRange(mr); 148 } 149 150 // Closure used for clearing the given mark bitmap. 151 class ClearBitmapHRClosure : public HeapRegionClosure { 152 private: 153 ConcurrentMark* _cm; 154 CMBitMap* _bitmap; 155 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration. 156 public: 157 ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) { 158 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield."); 159 } 160 161 virtual bool doHeapRegion(HeapRegion* r) { 162 size_t const chunk_size_in_words = M / HeapWordSize; 163 164 HeapWord* cur = r->bottom(); 165 HeapWord* const end = r->end(); 166 167 while (cur < end) { 168 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 169 _bitmap->clearRange(mr); 170 171 cur += chunk_size_in_words; 172 173 // Abort iteration if after yielding the marking has been aborted. 174 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) { 175 return true; 176 } 177 // Repeat the asserts from before the start of the closure. We will do them 178 // as asserts here to minimize their overhead on the product. However, we 179 // will have them as guarantees at the beginning / end of the bitmap 180 // clearing to get some checking in the product. 181 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant"); 182 assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 183 } 184 185 return false; 186 } 187 }; 188 189 class ParClearNextMarkBitmapTask : public AbstractGangTask { 190 ClearBitmapHRClosure* _cl; 191 HeapRegionClaimer _hrclaimer; 192 bool _suspendible; // If the task is suspendible, workers must join the STS. 193 194 public: 195 ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) : 196 _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {} 197 198 void work(uint worker_id) { 199 SuspendibleThreadSetJoiner sts_join(_suspendible); 200 G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true); 201 } 202 }; 203 204 void CMBitMap::clearAll() { 205 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 206 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */); 207 uint n_workers = g1h->workers()->active_workers(); 208 ParClearNextMarkBitmapTask task(&cl, n_workers, false); 209 g1h->workers()->run_task(&task); 210 guarantee(cl.complete(), "Must have completed iteration."); 211 return; 212 } 213 214 void CMBitMap::markRange(MemRegion mr) { 215 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 216 assert(!mr.is_empty(), "unexpected empty region"); 217 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 218 ((HeapWord *) mr.end())), 219 "markRange memory region end is not card aligned"); 220 // convert address range into offset range 221 _bm.at_put_range(heapWordToOffset(mr.start()), 222 heapWordToOffset(mr.end()), true); 223 } 224 225 void CMBitMap::clearRange(MemRegion mr) { 226 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 227 assert(!mr.is_empty(), "unexpected empty region"); 228 // convert address range into offset range 229 _bm.at_put_range(heapWordToOffset(mr.start()), 230 heapWordToOffset(mr.end()), false); 231 } 232 233 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 234 HeapWord* end_addr) { 235 HeapWord* start = getNextMarkedWordAddress(addr); 236 start = MIN2(start, end_addr); 237 HeapWord* end = getNextUnmarkedWordAddress(start); 238 end = MIN2(end, end_addr); 239 assert(start <= end, "Consistency check"); 240 MemRegion mr(start, end); 241 if (!mr.is_empty()) { 242 clearRange(mr); 243 } 244 return mr; 245 } 246 247 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 248 _base(NULL), _cm(cm) 249 {} 250 251 bool CMMarkStack::allocate(size_t capacity) { 252 // allocate a stack of the requisite depth 253 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 254 if (!rs.is_reserved()) { 255 warning("ConcurrentMark MarkStack allocation failure"); 256 return false; 257 } 258 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 259 if (!_virtual_space.initialize(rs, rs.size())) { 260 warning("ConcurrentMark MarkStack backing store failure"); 261 // Release the virtual memory reserved for the marking stack 262 rs.release(); 263 return false; 264 } 265 assert(_virtual_space.committed_size() == rs.size(), 266 "Didn't reserve backing store for all of ConcurrentMark stack?"); 267 _base = (oop*) _virtual_space.low(); 268 setEmpty(); 269 _capacity = (jint) capacity; 270 _saved_index = -1; 271 _should_expand = false; 272 return true; 273 } 274 275 void CMMarkStack::expand() { 276 // Called, during remark, if we've overflown the marking stack during marking. 277 assert(isEmpty(), "stack should been emptied while handling overflow"); 278 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 279 // Clear expansion flag 280 _should_expand = false; 281 if (_capacity == (jint) MarkStackSizeMax) { 282 if (PrintGCDetails && Verbose) { 283 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 284 } 285 return; 286 } 287 // Double capacity if possible 288 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 289 // Do not give up existing stack until we have managed to 290 // get the double capacity that we desired. 291 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 292 sizeof(oop))); 293 if (rs.is_reserved()) { 294 // Release the backing store associated with old stack 295 _virtual_space.release(); 296 // Reinitialize virtual space for new stack 297 if (!_virtual_space.initialize(rs, rs.size())) { 298 fatal("Not enough swap for expanded marking stack capacity"); 299 } 300 _base = (oop*)(_virtual_space.low()); 301 _index = 0; 302 _capacity = new_capacity; 303 } else { 304 if (PrintGCDetails && Verbose) { 305 // Failed to double capacity, continue; 306 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 307 SIZE_FORMAT "K to " SIZE_FORMAT "K", 308 _capacity / K, new_capacity / K); 309 } 310 } 311 } 312 313 void CMMarkStack::set_should_expand() { 314 // If we're resetting the marking state because of an 315 // marking stack overflow, record that we should, if 316 // possible, expand the stack. 317 _should_expand = _cm->has_overflown(); 318 } 319 320 CMMarkStack::~CMMarkStack() { 321 if (_base != NULL) { 322 _base = NULL; 323 _virtual_space.release(); 324 } 325 } 326 327 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 328 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 329 jint start = _index; 330 jint next_index = start + n; 331 if (next_index > _capacity) { 332 _overflow = true; 333 return; 334 } 335 // Otherwise. 336 _index = next_index; 337 for (int i = 0; i < n; i++) { 338 int ind = start + i; 339 assert(ind < _capacity, "By overflow test above."); 340 _base[ind] = ptr_arr[i]; 341 } 342 } 343 344 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 345 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 346 jint index = _index; 347 if (index == 0) { 348 *n = 0; 349 return false; 350 } else { 351 int k = MIN2(max, index); 352 jint new_ind = index - k; 353 for (int j = 0; j < k; j++) { 354 ptr_arr[j] = _base[new_ind + j]; 355 } 356 _index = new_ind; 357 *n = k; 358 return true; 359 } 360 } 361 362 void CMMarkStack::note_start_of_gc() { 363 assert(_saved_index == -1, 364 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 365 _saved_index = _index; 366 } 367 368 void CMMarkStack::note_end_of_gc() { 369 // This is intentionally a guarantee, instead of an assert. If we 370 // accidentally add something to the mark stack during GC, it 371 // will be a correctness issue so it's better if we crash. we'll 372 // only check this once per GC anyway, so it won't be a performance 373 // issue in any way. 374 guarantee(_saved_index == _index, 375 "saved index: %d index: %d", _saved_index, _index); 376 _saved_index = -1; 377 } 378 379 CMRootRegions::CMRootRegions() : 380 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 381 _should_abort(false), _next_survivor(NULL) { } 382 383 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 384 _young_list = g1h->young_list(); 385 _cm = cm; 386 } 387 388 void CMRootRegions::prepare_for_scan() { 389 assert(!scan_in_progress(), "pre-condition"); 390 391 // Currently, only survivors can be root regions. 392 assert(_next_survivor == NULL, "pre-condition"); 393 _next_survivor = _young_list->first_survivor_region(); 394 _scan_in_progress = (_next_survivor != NULL); 395 _should_abort = false; 396 } 397 398 HeapRegion* CMRootRegions::claim_next() { 399 if (_should_abort) { 400 // If someone has set the should_abort flag, we return NULL to 401 // force the caller to bail out of their loop. 402 return NULL; 403 } 404 405 // Currently, only survivors can be root regions. 406 HeapRegion* res = _next_survivor; 407 if (res != NULL) { 408 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 409 // Read it again in case it changed while we were waiting for the lock. 410 res = _next_survivor; 411 if (res != NULL) { 412 if (res == _young_list->last_survivor_region()) { 413 // We just claimed the last survivor so store NULL to indicate 414 // that we're done. 415 _next_survivor = NULL; 416 } else { 417 _next_survivor = res->get_next_young_region(); 418 } 419 } else { 420 // Someone else claimed the last survivor while we were trying 421 // to take the lock so nothing else to do. 422 } 423 } 424 assert(res == NULL || res->is_survivor(), "post-condition"); 425 426 return res; 427 } 428 429 void CMRootRegions::scan_finished() { 430 assert(scan_in_progress(), "pre-condition"); 431 432 // Currently, only survivors can be root regions. 433 if (!_should_abort) { 434 assert(_next_survivor == NULL, "we should have claimed all survivors"); 435 } 436 _next_survivor = NULL; 437 438 { 439 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 440 _scan_in_progress = false; 441 RootRegionScan_lock->notify_all(); 442 } 443 } 444 445 bool CMRootRegions::wait_until_scan_finished() { 446 if (!scan_in_progress()) return false; 447 448 { 449 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 450 while (scan_in_progress()) { 451 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 452 } 453 } 454 return true; 455 } 456 457 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 458 return MAX2((n_par_threads + 2) / 4, 1U); 459 } 460 461 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 462 _g1h(g1h), 463 _markBitMap1(), 464 _markBitMap2(), 465 _parallel_marking_threads(0), 466 _max_parallel_marking_threads(0), 467 _sleep_factor(0.0), 468 _marking_task_overhead(1.0), 469 _cleanup_sleep_factor(0.0), 470 _cleanup_task_overhead(1.0), 471 _cleanup_list("Cleanup List"), 472 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 473 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> 474 CardTableModRefBS::card_shift, 475 false /* in_resource_area*/), 476 477 _prevMarkBitMap(&_markBitMap1), 478 _nextMarkBitMap(&_markBitMap2), 479 480 _markStack(this), 481 // _finger set in set_non_marking_state 482 483 _max_worker_id(ParallelGCThreads), 484 // _active_tasks set in set_non_marking_state 485 // _tasks set inside the constructor 486 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 487 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 488 489 _has_overflown(false), 490 _concurrent(false), 491 _has_aborted(false), 492 _restart_for_overflow(false), 493 _concurrent_marking_in_progress(false), 494 495 // _verbose_level set below 496 497 _init_times(), 498 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 499 _cleanup_times(), 500 _total_counting_time(0.0), 501 _total_rs_scrub_time(0.0), 502 503 _parallel_workers(NULL), 504 505 _count_card_bitmaps(NULL), 506 _count_marked_bytes(NULL), 507 _completed_initialization(false) { 508 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 509 if (verbose_level < no_verbose) { 510 verbose_level = no_verbose; 511 } 512 if (verbose_level > high_verbose) { 513 verbose_level = high_verbose; 514 } 515 _verbose_level = verbose_level; 516 517 if (verbose_low()) { 518 gclog_or_tty->print_cr("[global] init, heap start = " PTR_FORMAT ", " 519 "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end)); 520 } 521 522 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 523 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 524 525 // Create & start a ConcurrentMark thread. 526 _cmThread = new ConcurrentMarkThread(this); 527 assert(cmThread() != NULL, "CM Thread should have been created"); 528 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 529 if (_cmThread->osthread() == NULL) { 530 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 531 } 532 533 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 534 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 535 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 536 537 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 538 satb_qs.set_buffer_size(G1SATBBufferSize); 539 540 _root_regions.init(_g1h, this); 541 542 if (ConcGCThreads > ParallelGCThreads) { 543 warning("Can't have more ConcGCThreads (%u) " 544 "than ParallelGCThreads (%u).", 545 ConcGCThreads, ParallelGCThreads); 546 return; 547 } 548 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 549 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 550 // if both are set 551 _sleep_factor = 0.0; 552 _marking_task_overhead = 1.0; 553 } else if (G1MarkingOverheadPercent > 0) { 554 // We will calculate the number of parallel marking threads based 555 // on a target overhead with respect to the soft real-time goal 556 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 557 double overall_cm_overhead = 558 (double) MaxGCPauseMillis * marking_overhead / 559 (double) GCPauseIntervalMillis; 560 double cpu_ratio = 1.0 / (double) os::processor_count(); 561 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 562 double marking_task_overhead = 563 overall_cm_overhead / marking_thread_num * 564 (double) os::processor_count(); 565 double sleep_factor = 566 (1.0 - marking_task_overhead) / marking_task_overhead; 567 568 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 569 _sleep_factor = sleep_factor; 570 _marking_task_overhead = marking_task_overhead; 571 } else { 572 // Calculate the number of parallel marking threads by scaling 573 // the number of parallel GC threads. 574 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 575 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 576 _sleep_factor = 0.0; 577 _marking_task_overhead = 1.0; 578 } 579 580 assert(ConcGCThreads > 0, "Should have been set"); 581 _parallel_marking_threads = ConcGCThreads; 582 _max_parallel_marking_threads = _parallel_marking_threads; 583 584 if (parallel_marking_threads() > 1) { 585 _cleanup_task_overhead = 1.0; 586 } else { 587 _cleanup_task_overhead = marking_task_overhead(); 588 } 589 _cleanup_sleep_factor = 590 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 591 592 #if 0 593 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 594 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 595 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 596 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 597 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 598 #endif 599 600 _parallel_workers = new WorkGang("G1 Marker", 601 _max_parallel_marking_threads, false, true); 602 if (_parallel_workers == NULL) { 603 vm_exit_during_initialization("Failed necessary allocation."); 604 } else { 605 _parallel_workers->initialize_workers(); 606 } 607 608 if (FLAG_IS_DEFAULT(MarkStackSize)) { 609 size_t mark_stack_size = 610 MIN2(MarkStackSizeMax, 611 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 612 // Verify that the calculated value for MarkStackSize is in range. 613 // It would be nice to use the private utility routine from Arguments. 614 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 615 warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 616 "must be between 1 and " SIZE_FORMAT, 617 mark_stack_size, MarkStackSizeMax); 618 return; 619 } 620 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 621 } else { 622 // Verify MarkStackSize is in range. 623 if (FLAG_IS_CMDLINE(MarkStackSize)) { 624 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 625 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 626 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 627 "must be between 1 and " SIZE_FORMAT, 628 MarkStackSize, MarkStackSizeMax); 629 return; 630 } 631 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 632 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 633 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 634 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 635 MarkStackSize, MarkStackSizeMax); 636 return; 637 } 638 } 639 } 640 } 641 642 if (!_markStack.allocate(MarkStackSize)) { 643 warning("Failed to allocate CM marking stack"); 644 return; 645 } 646 647 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 648 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 649 650 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 651 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 652 653 BitMap::idx_t card_bm_size = _card_bm.size(); 654 655 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 656 _active_tasks = _max_worker_id; 657 658 uint max_regions = _g1h->max_regions(); 659 for (uint i = 0; i < _max_worker_id; ++i) { 660 CMTaskQueue* task_queue = new CMTaskQueue(); 661 task_queue->initialize(); 662 _task_queues->register_queue(i, task_queue); 663 664 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 665 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 666 667 _tasks[i] = new CMTask(i, this, 668 _count_marked_bytes[i], 669 &_count_card_bitmaps[i], 670 task_queue, _task_queues); 671 672 _accum_task_vtime[i] = 0.0; 673 } 674 675 // Calculate the card number for the bottom of the heap. Used 676 // in biasing indexes into the accounting card bitmaps. 677 _heap_bottom_card_num = 678 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 679 CardTableModRefBS::card_shift); 680 681 // Clear all the liveness counting data 682 clear_all_count_data(); 683 684 // so that the call below can read a sensible value 685 _heap_start = g1h->reserved_region().start(); 686 set_non_marking_state(); 687 _completed_initialization = true; 688 } 689 690 void ConcurrentMark::reset() { 691 // Starting values for these two. This should be called in a STW 692 // phase. 693 MemRegion reserved = _g1h->g1_reserved(); 694 _heap_start = reserved.start(); 695 _heap_end = reserved.end(); 696 697 // Separated the asserts so that we know which one fires. 698 assert(_heap_start != NULL, "heap bounds should look ok"); 699 assert(_heap_end != NULL, "heap bounds should look ok"); 700 assert(_heap_start < _heap_end, "heap bounds should look ok"); 701 702 // Reset all the marking data structures and any necessary flags 703 reset_marking_state(); 704 705 if (verbose_low()) { 706 gclog_or_tty->print_cr("[global] resetting"); 707 } 708 709 // We do reset all of them, since different phases will use 710 // different number of active threads. So, it's easiest to have all 711 // of them ready. 712 for (uint i = 0; i < _max_worker_id; ++i) { 713 _tasks[i]->reset(_nextMarkBitMap); 714 } 715 716 // we need this to make sure that the flag is on during the evac 717 // pause with initial mark piggy-backed 718 set_concurrent_marking_in_progress(); 719 } 720 721 722 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 723 _markStack.set_should_expand(); 724 _markStack.setEmpty(); // Also clears the _markStack overflow flag 725 if (clear_overflow) { 726 clear_has_overflown(); 727 } else { 728 assert(has_overflown(), "pre-condition"); 729 } 730 _finger = _heap_start; 731 732 for (uint i = 0; i < _max_worker_id; ++i) { 733 CMTaskQueue* queue = _task_queues->queue(i); 734 queue->set_empty(); 735 } 736 } 737 738 void ConcurrentMark::set_concurrency(uint active_tasks) { 739 assert(active_tasks <= _max_worker_id, "we should not have more"); 740 741 _active_tasks = active_tasks; 742 // Need to update the three data structures below according to the 743 // number of active threads for this phase. 744 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 745 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 746 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 747 } 748 749 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 750 set_concurrency(active_tasks); 751 752 _concurrent = concurrent; 753 // We propagate this to all tasks, not just the active ones. 754 for (uint i = 0; i < _max_worker_id; ++i) 755 _tasks[i]->set_concurrent(concurrent); 756 757 if (concurrent) { 758 set_concurrent_marking_in_progress(); 759 } else { 760 // We currently assume that the concurrent flag has been set to 761 // false before we start remark. At this point we should also be 762 // in a STW phase. 763 assert(!concurrent_marking_in_progress(), "invariant"); 764 assert(out_of_regions(), 765 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 766 p2i(_finger), p2i(_heap_end)); 767 } 768 } 769 770 void ConcurrentMark::set_non_marking_state() { 771 // We set the global marking state to some default values when we're 772 // not doing marking. 773 reset_marking_state(); 774 _active_tasks = 0; 775 clear_concurrent_marking_in_progress(); 776 } 777 778 ConcurrentMark::~ConcurrentMark() { 779 // The ConcurrentMark instance is never freed. 780 ShouldNotReachHere(); 781 } 782 783 void ConcurrentMark::clearNextBitmap() { 784 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 785 786 // Make sure that the concurrent mark thread looks to still be in 787 // the current cycle. 788 guarantee(cmThread()->during_cycle(), "invariant"); 789 790 // We are finishing up the current cycle by clearing the next 791 // marking bitmap and getting it ready for the next cycle. During 792 // this time no other cycle can start. So, let's make sure that this 793 // is the case. 794 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 795 796 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */); 797 ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true); 798 _parallel_workers->run_task(&task); 799 800 // Clear the liveness counting data. If the marking has been aborted, the abort() 801 // call already did that. 802 if (cl.complete()) { 803 clear_all_count_data(); 804 } 805 806 // Repeat the asserts from above. 807 guarantee(cmThread()->during_cycle(), "invariant"); 808 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 809 } 810 811 class CheckBitmapClearHRClosure : public HeapRegionClosure { 812 CMBitMap* _bitmap; 813 bool _error; 814 public: 815 CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) { 816 } 817 818 virtual bool doHeapRegion(HeapRegion* r) { 819 // This closure can be called concurrently to the mutator, so we must make sure 820 // that the result of the getNextMarkedWordAddress() call is compared to the 821 // value passed to it as limit to detect any found bits. 822 // We can use the region's orig_end() for the limit and the comparison value 823 // as it always contains the "real" end of the region that never changes and 824 // has no side effects. 825 // Due to the latter, there can also be no problem with the compiler generating 826 // reloads of the orig_end() call. 827 HeapWord* end = r->orig_end(); 828 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end; 829 } 830 }; 831 832 bool ConcurrentMark::nextMarkBitmapIsClear() { 833 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 834 _g1h->heap_region_iterate(&cl); 835 return cl.complete(); 836 } 837 838 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 839 public: 840 bool doHeapRegion(HeapRegion* r) { 841 if (!r->is_continues_humongous()) { 842 r->note_start_of_marking(); 843 } 844 return false; 845 } 846 }; 847 848 void ConcurrentMark::checkpointRootsInitialPre() { 849 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 850 G1CollectorPolicy* g1p = g1h->g1_policy(); 851 852 _has_aborted = false; 853 854 // Initialize marking structures. This has to be done in a STW phase. 855 reset(); 856 857 // For each region note start of marking. 858 NoteStartOfMarkHRClosure startcl; 859 g1h->heap_region_iterate(&startcl); 860 } 861 862 863 void ConcurrentMark::checkpointRootsInitialPost() { 864 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 865 866 // If we force an overflow during remark, the remark operation will 867 // actually abort and we'll restart concurrent marking. If we always 868 // force an overflow during remark we'll never actually complete the 869 // marking phase. So, we initialize this here, at the start of the 870 // cycle, so that at the remaining overflow number will decrease at 871 // every remark and we'll eventually not need to cause one. 872 force_overflow_stw()->init(); 873 874 // Start Concurrent Marking weak-reference discovery. 875 ReferenceProcessor* rp = g1h->ref_processor_cm(); 876 // enable ("weak") refs discovery 877 rp->enable_discovery(); 878 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 879 880 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 881 // This is the start of the marking cycle, we're expected all 882 // threads to have SATB queues with active set to false. 883 satb_mq_set.set_active_all_threads(true, /* new active value */ 884 false /* expected_active */); 885 886 _root_regions.prepare_for_scan(); 887 888 // update_g1_committed() will be called at the end of an evac pause 889 // when marking is on. So, it's also called at the end of the 890 // initial-mark pause to update the heap end, if the heap expands 891 // during it. No need to call it here. 892 } 893 894 /* 895 * Notice that in the next two methods, we actually leave the STS 896 * during the barrier sync and join it immediately afterwards. If we 897 * do not do this, the following deadlock can occur: one thread could 898 * be in the barrier sync code, waiting for the other thread to also 899 * sync up, whereas another one could be trying to yield, while also 900 * waiting for the other threads to sync up too. 901 * 902 * Note, however, that this code is also used during remark and in 903 * this case we should not attempt to leave / enter the STS, otherwise 904 * we'll either hit an assert (debug / fastdebug) or deadlock 905 * (product). So we should only leave / enter the STS if we are 906 * operating concurrently. 907 * 908 * Because the thread that does the sync barrier has left the STS, it 909 * is possible to be suspended for a Full GC or an evacuation pause 910 * could occur. This is actually safe, since the entering the sync 911 * barrier is one of the last things do_marking_step() does, and it 912 * doesn't manipulate any data structures afterwards. 913 */ 914 915 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 916 bool barrier_aborted; 917 918 if (verbose_low()) { 919 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id); 920 } 921 922 { 923 SuspendibleThreadSetLeaver sts_leave(concurrent()); 924 barrier_aborted = !_first_overflow_barrier_sync.enter(); 925 } 926 927 // at this point everyone should have synced up and not be doing any 928 // more work 929 930 if (verbose_low()) { 931 if (barrier_aborted) { 932 gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id); 933 } else { 934 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); 935 } 936 } 937 938 if (barrier_aborted) { 939 // If the barrier aborted we ignore the overflow condition and 940 // just abort the whole marking phase as quickly as possible. 941 return; 942 } 943 944 // If we're executing the concurrent phase of marking, reset the marking 945 // state; otherwise the marking state is reset after reference processing, 946 // during the remark pause. 947 // If we reset here as a result of an overflow during the remark we will 948 // see assertion failures from any subsequent set_concurrency_and_phase() 949 // calls. 950 if (concurrent()) { 951 // let the task associated with with worker 0 do this 952 if (worker_id == 0) { 953 // task 0 is responsible for clearing the global data structures 954 // We should be here because of an overflow. During STW we should 955 // not clear the overflow flag since we rely on it being true when 956 // we exit this method to abort the pause and restart concurrent 957 // marking. 958 reset_marking_state(true /* clear_overflow */); 959 force_overflow()->update(); 960 961 if (G1Log::fine()) { 962 gclog_or_tty->gclog_stamp(); 963 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 964 } 965 } 966 } 967 968 // after this, each task should reset its own data structures then 969 // then go into the second barrier 970 } 971 972 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 973 bool barrier_aborted; 974 975 if (verbose_low()) { 976 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id); 977 } 978 979 { 980 SuspendibleThreadSetLeaver sts_leave(concurrent()); 981 barrier_aborted = !_second_overflow_barrier_sync.enter(); 982 } 983 984 // at this point everything should be re-initialized and ready to go 985 986 if (verbose_low()) { 987 if (barrier_aborted) { 988 gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id); 989 } else { 990 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); 991 } 992 } 993 } 994 995 #ifndef PRODUCT 996 void ForceOverflowSettings::init() { 997 _num_remaining = G1ConcMarkForceOverflow; 998 _force = false; 999 update(); 1000 } 1001 1002 void ForceOverflowSettings::update() { 1003 if (_num_remaining > 0) { 1004 _num_remaining -= 1; 1005 _force = true; 1006 } else { 1007 _force = false; 1008 } 1009 } 1010 1011 bool ForceOverflowSettings::should_force() { 1012 if (_force) { 1013 _force = false; 1014 return true; 1015 } else { 1016 return false; 1017 } 1018 } 1019 #endif // !PRODUCT 1020 1021 class CMConcurrentMarkingTask: public AbstractGangTask { 1022 private: 1023 ConcurrentMark* _cm; 1024 ConcurrentMarkThread* _cmt; 1025 1026 public: 1027 void work(uint worker_id) { 1028 assert(Thread::current()->is_ConcurrentGC_thread(), 1029 "this should only be done by a conc GC thread"); 1030 ResourceMark rm; 1031 1032 double start_vtime = os::elapsedVTime(); 1033 1034 { 1035 SuspendibleThreadSetJoiner sts_join; 1036 1037 assert(worker_id < _cm->active_tasks(), "invariant"); 1038 CMTask* the_task = _cm->task(worker_id); 1039 the_task->record_start_time(); 1040 if (!_cm->has_aborted()) { 1041 do { 1042 double start_vtime_sec = os::elapsedVTime(); 1043 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1044 1045 the_task->do_marking_step(mark_step_duration_ms, 1046 true /* do_termination */, 1047 false /* is_serial*/); 1048 1049 double end_vtime_sec = os::elapsedVTime(); 1050 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1051 _cm->clear_has_overflown(); 1052 1053 _cm->do_yield_check(worker_id); 1054 1055 jlong sleep_time_ms; 1056 if (!_cm->has_aborted() && the_task->has_aborted()) { 1057 sleep_time_ms = 1058 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1059 { 1060 SuspendibleThreadSetLeaver sts_leave; 1061 os::sleep(Thread::current(), sleep_time_ms, false); 1062 } 1063 } 1064 } while (!_cm->has_aborted() && the_task->has_aborted()); 1065 } 1066 the_task->record_end_time(); 1067 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1068 } 1069 1070 double end_vtime = os::elapsedVTime(); 1071 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1072 } 1073 1074 CMConcurrentMarkingTask(ConcurrentMark* cm, 1075 ConcurrentMarkThread* cmt) : 1076 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1077 1078 ~CMConcurrentMarkingTask() { } 1079 }; 1080 1081 // Calculates the number of active workers for a concurrent 1082 // phase. 1083 uint ConcurrentMark::calc_parallel_marking_threads() { 1084 uint n_conc_workers = 0; 1085 if (!UseDynamicNumberOfGCThreads || 1086 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1087 !ForceDynamicNumberOfGCThreads)) { 1088 n_conc_workers = max_parallel_marking_threads(); 1089 } else { 1090 n_conc_workers = 1091 AdaptiveSizePolicy::calc_default_active_workers( 1092 max_parallel_marking_threads(), 1093 1, /* Minimum workers */ 1094 parallel_marking_threads(), 1095 Threads::number_of_non_daemon_threads()); 1096 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1097 // that scaling has already gone into "_max_parallel_marking_threads". 1098 } 1099 assert(n_conc_workers > 0, "Always need at least 1"); 1100 return n_conc_workers; 1101 } 1102 1103 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1104 // Currently, only survivors can be root regions. 1105 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1106 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1107 1108 const uintx interval = PrefetchScanIntervalInBytes; 1109 HeapWord* curr = hr->bottom(); 1110 const HeapWord* end = hr->top(); 1111 while (curr < end) { 1112 Prefetch::read(curr, interval); 1113 oop obj = oop(curr); 1114 int size = obj->oop_iterate_size(&cl); 1115 assert(size == obj->size(), "sanity"); 1116 curr += size; 1117 } 1118 } 1119 1120 class CMRootRegionScanTask : public AbstractGangTask { 1121 private: 1122 ConcurrentMark* _cm; 1123 1124 public: 1125 CMRootRegionScanTask(ConcurrentMark* cm) : 1126 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1127 1128 void work(uint worker_id) { 1129 assert(Thread::current()->is_ConcurrentGC_thread(), 1130 "this should only be done by a conc GC thread"); 1131 1132 CMRootRegions* root_regions = _cm->root_regions(); 1133 HeapRegion* hr = root_regions->claim_next(); 1134 while (hr != NULL) { 1135 _cm->scanRootRegion(hr, worker_id); 1136 hr = root_regions->claim_next(); 1137 } 1138 } 1139 }; 1140 1141 void ConcurrentMark::scanRootRegions() { 1142 double scan_start = os::elapsedTime(); 1143 1144 // Start of concurrent marking. 1145 ClassLoaderDataGraph::clear_claimed_marks(); 1146 1147 // scan_in_progress() will have been set to true only if there was 1148 // at least one root region to scan. So, if it's false, we 1149 // should not attempt to do any further work. 1150 if (root_regions()->scan_in_progress()) { 1151 if (G1Log::fine()) { 1152 gclog_or_tty->gclog_stamp(); 1153 gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]"); 1154 } 1155 1156 _parallel_marking_threads = calc_parallel_marking_threads(); 1157 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1158 "Maximum number of marking threads exceeded"); 1159 uint active_workers = MAX2(1U, parallel_marking_threads()); 1160 1161 CMRootRegionScanTask task(this); 1162 _parallel_workers->set_active_workers(active_workers); 1163 _parallel_workers->run_task(&task); 1164 1165 if (G1Log::fine()) { 1166 gclog_or_tty->gclog_stamp(); 1167 gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf secs]", os::elapsedTime() - scan_start); 1168 } 1169 1170 // It's possible that has_aborted() is true here without actually 1171 // aborting the survivor scan earlier. This is OK as it's 1172 // mainly used for sanity checking. 1173 root_regions()->scan_finished(); 1174 } 1175 } 1176 1177 void ConcurrentMark::markFromRoots() { 1178 // we might be tempted to assert that: 1179 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1180 // "inconsistent argument?"); 1181 // However that wouldn't be right, because it's possible that 1182 // a safepoint is indeed in progress as a younger generation 1183 // stop-the-world GC happens even as we mark in this generation. 1184 1185 _restart_for_overflow = false; 1186 force_overflow_conc()->init(); 1187 1188 // _g1h has _n_par_threads 1189 _parallel_marking_threads = calc_parallel_marking_threads(); 1190 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1191 "Maximum number of marking threads exceeded"); 1192 1193 uint active_workers = MAX2(1U, parallel_marking_threads()); 1194 assert(active_workers > 0, "Should have been set"); 1195 1196 // Parallel task terminator is set in "set_concurrency_and_phase()" 1197 set_concurrency_and_phase(active_workers, true /* concurrent */); 1198 1199 CMConcurrentMarkingTask markingTask(this, cmThread()); 1200 _parallel_workers->set_active_workers(active_workers); 1201 _parallel_workers->run_task(&markingTask); 1202 print_stats(); 1203 } 1204 1205 // Helper class to get rid of some boilerplate code. 1206 class G1CMTraceTime : public StackObj { 1207 GCTraceTimeImpl _gc_trace_time; 1208 static bool doit_and_prepend(bool doit) { 1209 if (doit) { 1210 gclog_or_tty->put(' '); 1211 } 1212 return doit; 1213 } 1214 1215 public: 1216 G1CMTraceTime(const char* title, bool doit) 1217 : _gc_trace_time(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm()) { 1218 } 1219 }; 1220 1221 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1222 // world is stopped at this checkpoint 1223 assert(SafepointSynchronize::is_at_safepoint(), 1224 "world should be stopped"); 1225 1226 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1227 1228 // If a full collection has happened, we shouldn't do this. 1229 if (has_aborted()) { 1230 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1231 return; 1232 } 1233 1234 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1235 1236 if (VerifyDuringGC) { 1237 HandleMark hm; // handle scope 1238 g1h->prepare_for_verify(); 1239 Universe::verify(VerifyOption_G1UsePrevMarking, 1240 " VerifyDuringGC:(before)"); 1241 } 1242 g1h->check_bitmaps("Remark Start"); 1243 1244 G1CollectorPolicy* g1p = g1h->g1_policy(); 1245 g1p->record_concurrent_mark_remark_start(); 1246 1247 double start = os::elapsedTime(); 1248 1249 checkpointRootsFinalWork(); 1250 1251 double mark_work_end = os::elapsedTime(); 1252 1253 weakRefsWork(clear_all_soft_refs); 1254 1255 if (has_overflown()) { 1256 // Oops. We overflowed. Restart concurrent marking. 1257 _restart_for_overflow = true; 1258 if (G1TraceMarkStackOverflow) { 1259 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1260 } 1261 1262 // Verify the heap w.r.t. the previous marking bitmap. 1263 if (VerifyDuringGC) { 1264 HandleMark hm; // handle scope 1265 g1h->prepare_for_verify(); 1266 Universe::verify(VerifyOption_G1UsePrevMarking, 1267 " VerifyDuringGC:(overflow)"); 1268 } 1269 1270 // Clear the marking state because we will be restarting 1271 // marking due to overflowing the global mark stack. 1272 reset_marking_state(); 1273 } else { 1274 { 1275 G1CMTraceTime trace("GC aggregate-data", G1Log::finer()); 1276 1277 // Aggregate the per-task counting data that we have accumulated 1278 // while marking. 1279 aggregate_count_data(); 1280 } 1281 1282 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1283 // We're done with marking. 1284 // This is the end of the marking cycle, we're expected all 1285 // threads to have SATB queues with active set to true. 1286 satb_mq_set.set_active_all_threads(false, /* new active value */ 1287 true /* expected_active */); 1288 1289 if (VerifyDuringGC) { 1290 HandleMark hm; // handle scope 1291 g1h->prepare_for_verify(); 1292 Universe::verify(VerifyOption_G1UseNextMarking, 1293 " VerifyDuringGC:(after)"); 1294 } 1295 g1h->check_bitmaps("Remark End"); 1296 assert(!restart_for_overflow(), "sanity"); 1297 // Completely reset the marking state since marking completed 1298 set_non_marking_state(); 1299 } 1300 1301 // Expand the marking stack, if we have to and if we can. 1302 if (_markStack.should_expand()) { 1303 _markStack.expand(); 1304 } 1305 1306 // Statistics 1307 double now = os::elapsedTime(); 1308 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1309 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1310 _remark_times.add((now - start) * 1000.0); 1311 1312 g1p->record_concurrent_mark_remark_end(); 1313 1314 G1CMIsAliveClosure is_alive(g1h); 1315 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1316 } 1317 1318 // Base class of the closures that finalize and verify the 1319 // liveness counting data. 1320 class CMCountDataClosureBase: public HeapRegionClosure { 1321 protected: 1322 G1CollectedHeap* _g1h; 1323 ConcurrentMark* _cm; 1324 CardTableModRefBS* _ct_bs; 1325 1326 BitMap* _region_bm; 1327 BitMap* _card_bm; 1328 1329 // Takes a region that's not empty (i.e., it has at least one 1330 // live object in it and sets its corresponding bit on the region 1331 // bitmap to 1. If the region is "starts humongous" it will also set 1332 // to 1 the bits on the region bitmap that correspond to its 1333 // associated "continues humongous" regions. 1334 void set_bit_for_region(HeapRegion* hr) { 1335 assert(!hr->is_continues_humongous(), "should have filtered those out"); 1336 1337 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1338 if (!hr->is_starts_humongous()) { 1339 // Normal (non-humongous) case: just set the bit. 1340 _region_bm->par_at_put(index, true); 1341 } else { 1342 // Starts humongous case: calculate how many regions are part of 1343 // this humongous region and then set the bit range. 1344 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1345 _region_bm->par_at_put_range(index, end_index, true); 1346 } 1347 } 1348 1349 public: 1350 CMCountDataClosureBase(G1CollectedHeap* g1h, 1351 BitMap* region_bm, BitMap* card_bm): 1352 _g1h(g1h), _cm(g1h->concurrent_mark()), 1353 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 1354 _region_bm(region_bm), _card_bm(card_bm) { } 1355 }; 1356 1357 // Closure that calculates the # live objects per region. Used 1358 // for verification purposes during the cleanup pause. 1359 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1360 CMBitMapRO* _bm; 1361 size_t _region_marked_bytes; 1362 1363 public: 1364 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1365 BitMap* region_bm, BitMap* card_bm) : 1366 CMCountDataClosureBase(g1h, region_bm, card_bm), 1367 _bm(bm), _region_marked_bytes(0) { } 1368 1369 bool doHeapRegion(HeapRegion* hr) { 1370 1371 if (hr->is_continues_humongous()) { 1372 // We will ignore these here and process them when their 1373 // associated "starts humongous" region is processed (see 1374 // set_bit_for_heap_region()). Note that we cannot rely on their 1375 // associated "starts humongous" region to have their bit set to 1376 // 1 since, due to the region chunking in the parallel region 1377 // iteration, a "continues humongous" region might be visited 1378 // before its associated "starts humongous". 1379 return false; 1380 } 1381 1382 HeapWord* ntams = hr->next_top_at_mark_start(); 1383 HeapWord* start = hr->bottom(); 1384 1385 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1386 "Preconditions not met - " 1387 "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT, 1388 p2i(start), p2i(ntams), p2i(hr->end())); 1389 1390 // Find the first marked object at or after "start". 1391 start = _bm->getNextMarkedWordAddress(start, ntams); 1392 1393 size_t marked_bytes = 0; 1394 1395 while (start < ntams) { 1396 oop obj = oop(start); 1397 int obj_sz = obj->size(); 1398 HeapWord* obj_end = start + obj_sz; 1399 1400 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1401 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1402 1403 // Note: if we're looking at the last region in heap - obj_end 1404 // could be actually just beyond the end of the heap; end_idx 1405 // will then correspond to a (non-existent) card that is also 1406 // just beyond the heap. 1407 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1408 // end of object is not card aligned - increment to cover 1409 // all the cards spanned by the object 1410 end_idx += 1; 1411 } 1412 1413 // Set the bits in the card BM for the cards spanned by this object. 1414 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1415 1416 // Add the size of this object to the number of marked bytes. 1417 marked_bytes += (size_t)obj_sz * HeapWordSize; 1418 1419 // Find the next marked object after this one. 1420 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1421 } 1422 1423 // Mark the allocated-since-marking portion... 1424 HeapWord* top = hr->top(); 1425 if (ntams < top) { 1426 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1427 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1428 1429 // Note: if we're looking at the last region in heap - top 1430 // could be actually just beyond the end of the heap; end_idx 1431 // will then correspond to a (non-existent) card that is also 1432 // just beyond the heap. 1433 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1434 // end of object is not card aligned - increment to cover 1435 // all the cards spanned by the object 1436 end_idx += 1; 1437 } 1438 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1439 1440 // This definitely means the region has live objects. 1441 set_bit_for_region(hr); 1442 } 1443 1444 // Update the live region bitmap. 1445 if (marked_bytes > 0) { 1446 set_bit_for_region(hr); 1447 } 1448 1449 // Set the marked bytes for the current region so that 1450 // it can be queried by a calling verification routine 1451 _region_marked_bytes = marked_bytes; 1452 1453 return false; 1454 } 1455 1456 size_t region_marked_bytes() const { return _region_marked_bytes; } 1457 }; 1458 1459 // Heap region closure used for verifying the counting data 1460 // that was accumulated concurrently and aggregated during 1461 // the remark pause. This closure is applied to the heap 1462 // regions during the STW cleanup pause. 1463 1464 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1465 G1CollectedHeap* _g1h; 1466 ConcurrentMark* _cm; 1467 CalcLiveObjectsClosure _calc_cl; 1468 BitMap* _region_bm; // Region BM to be verified 1469 BitMap* _card_bm; // Card BM to be verified 1470 bool _verbose; // verbose output? 1471 1472 BitMap* _exp_region_bm; // Expected Region BM values 1473 BitMap* _exp_card_bm; // Expected card BM values 1474 1475 int _failures; 1476 1477 public: 1478 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1479 BitMap* region_bm, 1480 BitMap* card_bm, 1481 BitMap* exp_region_bm, 1482 BitMap* exp_card_bm, 1483 bool verbose) : 1484 _g1h(g1h), _cm(g1h->concurrent_mark()), 1485 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1486 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1487 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1488 _failures(0) { } 1489 1490 int failures() const { return _failures; } 1491 1492 bool doHeapRegion(HeapRegion* hr) { 1493 if (hr->is_continues_humongous()) { 1494 // We will ignore these here and process them when their 1495 // associated "starts humongous" region is processed (see 1496 // set_bit_for_heap_region()). Note that we cannot rely on their 1497 // associated "starts humongous" region to have their bit set to 1498 // 1 since, due to the region chunking in the parallel region 1499 // iteration, a "continues humongous" region might be visited 1500 // before its associated "starts humongous". 1501 return false; 1502 } 1503 1504 int failures = 0; 1505 1506 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1507 // this region and set the corresponding bits in the expected region 1508 // and card bitmaps. 1509 bool res = _calc_cl.doHeapRegion(hr); 1510 assert(res == false, "should be continuing"); 1511 1512 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1513 Mutex::_no_safepoint_check_flag); 1514 1515 // Verify the marked bytes for this region. 1516 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1517 size_t act_marked_bytes = hr->next_marked_bytes(); 1518 1519 // We're not OK if expected marked bytes > actual marked bytes. It means 1520 // we have missed accounting some objects during the actual marking. 1521 if (exp_marked_bytes > act_marked_bytes) { 1522 if (_verbose) { 1523 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1524 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1525 hr->hrm_index(), exp_marked_bytes, act_marked_bytes); 1526 } 1527 failures += 1; 1528 } 1529 1530 // Verify the bit, for this region, in the actual and expected 1531 // (which was just calculated) region bit maps. 1532 // We're not OK if the bit in the calculated expected region 1533 // bitmap is set and the bit in the actual region bitmap is not. 1534 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1535 1536 bool expected = _exp_region_bm->at(index); 1537 bool actual = _region_bm->at(index); 1538 if (expected && !actual) { 1539 if (_verbose) { 1540 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1541 "expected: %s, actual: %s", 1542 hr->hrm_index(), 1543 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1544 } 1545 failures += 1; 1546 } 1547 1548 // Verify that the card bit maps for the cards spanned by the current 1549 // region match. We have an error if we have a set bit in the expected 1550 // bit map and the corresponding bit in the actual bitmap is not set. 1551 1552 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1553 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1554 1555 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1556 expected = _exp_card_bm->at(i); 1557 actual = _card_bm->at(i); 1558 1559 if (expected && !actual) { 1560 if (_verbose) { 1561 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1562 "expected: %s, actual: %s", 1563 hr->hrm_index(), i, 1564 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1565 } 1566 failures += 1; 1567 } 1568 } 1569 1570 if (failures > 0 && _verbose) { 1571 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1572 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1573 HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()), 1574 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1575 } 1576 1577 _failures += failures; 1578 1579 // We could stop iteration over the heap when we 1580 // find the first violating region by returning true. 1581 return false; 1582 } 1583 }; 1584 1585 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1586 protected: 1587 G1CollectedHeap* _g1h; 1588 ConcurrentMark* _cm; 1589 BitMap* _actual_region_bm; 1590 BitMap* _actual_card_bm; 1591 1592 uint _n_workers; 1593 1594 BitMap* _expected_region_bm; 1595 BitMap* _expected_card_bm; 1596 1597 int _failures; 1598 bool _verbose; 1599 1600 HeapRegionClaimer _hrclaimer; 1601 1602 public: 1603 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1604 BitMap* region_bm, BitMap* card_bm, 1605 BitMap* expected_region_bm, BitMap* expected_card_bm) 1606 : AbstractGangTask("G1 verify final counting"), 1607 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1608 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1609 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1610 _failures(0), _verbose(false), 1611 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1612 assert(VerifyDuringGC, "don't call this otherwise"); 1613 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1614 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1615 1616 _verbose = _cm->verbose_medium(); 1617 } 1618 1619 void work(uint worker_id) { 1620 assert(worker_id < _n_workers, "invariant"); 1621 1622 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1623 _actual_region_bm, _actual_card_bm, 1624 _expected_region_bm, 1625 _expected_card_bm, 1626 _verbose); 1627 1628 _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer); 1629 1630 Atomic::add(verify_cl.failures(), &_failures); 1631 } 1632 1633 int failures() const { return _failures; } 1634 }; 1635 1636 // Closure that finalizes the liveness counting data. 1637 // Used during the cleanup pause. 1638 // Sets the bits corresponding to the interval [NTAMS, top] 1639 // (which contains the implicitly live objects) in the 1640 // card liveness bitmap. Also sets the bit for each region, 1641 // containing live data, in the region liveness bitmap. 1642 1643 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1644 public: 1645 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1646 BitMap* region_bm, 1647 BitMap* card_bm) : 1648 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1649 1650 bool doHeapRegion(HeapRegion* hr) { 1651 1652 if (hr->is_continues_humongous()) { 1653 // We will ignore these here and process them when their 1654 // associated "starts humongous" region is processed (see 1655 // set_bit_for_heap_region()). Note that we cannot rely on their 1656 // associated "starts humongous" region to have their bit set to 1657 // 1 since, due to the region chunking in the parallel region 1658 // iteration, a "continues humongous" region might be visited 1659 // before its associated "starts humongous". 1660 return false; 1661 } 1662 1663 HeapWord* ntams = hr->next_top_at_mark_start(); 1664 HeapWord* top = hr->top(); 1665 1666 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1667 1668 // Mark the allocated-since-marking portion... 1669 if (ntams < top) { 1670 // This definitely means the region has live objects. 1671 set_bit_for_region(hr); 1672 1673 // Now set the bits in the card bitmap for [ntams, top) 1674 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1675 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1676 1677 // Note: if we're looking at the last region in heap - top 1678 // could be actually just beyond the end of the heap; end_idx 1679 // will then correspond to a (non-existent) card that is also 1680 // just beyond the heap. 1681 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1682 // end of object is not card aligned - increment to cover 1683 // all the cards spanned by the object 1684 end_idx += 1; 1685 } 1686 1687 assert(end_idx <= _card_bm->size(), 1688 "oob: end_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1689 end_idx, _card_bm->size()); 1690 assert(start_idx < _card_bm->size(), 1691 "oob: start_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1692 start_idx, _card_bm->size()); 1693 1694 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1695 } 1696 1697 // Set the bit for the region if it contains live data 1698 if (hr->next_marked_bytes() > 0) { 1699 set_bit_for_region(hr); 1700 } 1701 1702 return false; 1703 } 1704 }; 1705 1706 class G1ParFinalCountTask: public AbstractGangTask { 1707 protected: 1708 G1CollectedHeap* _g1h; 1709 ConcurrentMark* _cm; 1710 BitMap* _actual_region_bm; 1711 BitMap* _actual_card_bm; 1712 1713 uint _n_workers; 1714 HeapRegionClaimer _hrclaimer; 1715 1716 public: 1717 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1718 : AbstractGangTask("G1 final counting"), 1719 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1720 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1721 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1722 } 1723 1724 void work(uint worker_id) { 1725 assert(worker_id < _n_workers, "invariant"); 1726 1727 FinalCountDataUpdateClosure final_update_cl(_g1h, 1728 _actual_region_bm, 1729 _actual_card_bm); 1730 1731 _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer); 1732 } 1733 }; 1734 1735 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1736 G1CollectedHeap* _g1; 1737 size_t _freed_bytes; 1738 FreeRegionList* _local_cleanup_list; 1739 HeapRegionSetCount _old_regions_removed; 1740 HeapRegionSetCount _humongous_regions_removed; 1741 HRRSCleanupTask* _hrrs_cleanup_task; 1742 1743 public: 1744 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1745 FreeRegionList* local_cleanup_list, 1746 HRRSCleanupTask* hrrs_cleanup_task) : 1747 _g1(g1), 1748 _freed_bytes(0), 1749 _local_cleanup_list(local_cleanup_list), 1750 _old_regions_removed(), 1751 _humongous_regions_removed(), 1752 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1753 1754 size_t freed_bytes() { return _freed_bytes; } 1755 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; } 1756 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } 1757 1758 bool doHeapRegion(HeapRegion *hr) { 1759 if (hr->is_continues_humongous() || hr->is_archive()) { 1760 return false; 1761 } 1762 // We use a claim value of zero here because all regions 1763 // were claimed with value 1 in the FinalCount task. 1764 _g1->reset_gc_time_stamps(hr); 1765 hr->note_end_of_marking(); 1766 1767 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1768 _freed_bytes += hr->used(); 1769 hr->set_containing_set(NULL); 1770 if (hr->is_humongous()) { 1771 assert(hr->is_starts_humongous(), "we should only see starts humongous"); 1772 _humongous_regions_removed.increment(1u, hr->capacity()); 1773 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1774 } else { 1775 _old_regions_removed.increment(1u, hr->capacity()); 1776 _g1->free_region(hr, _local_cleanup_list, true); 1777 } 1778 } else { 1779 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1780 } 1781 1782 return false; 1783 } 1784 }; 1785 1786 class G1ParNoteEndTask: public AbstractGangTask { 1787 friend class G1NoteEndOfConcMarkClosure; 1788 1789 protected: 1790 G1CollectedHeap* _g1h; 1791 FreeRegionList* _cleanup_list; 1792 HeapRegionClaimer _hrclaimer; 1793 1794 public: 1795 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1796 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1797 } 1798 1799 void work(uint worker_id) { 1800 FreeRegionList local_cleanup_list("Local Cleanup List"); 1801 HRRSCleanupTask hrrs_cleanup_task; 1802 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1803 &hrrs_cleanup_task); 1804 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1805 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1806 1807 // Now update the lists 1808 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1809 { 1810 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1811 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1812 1813 // If we iterate over the global cleanup list at the end of 1814 // cleanup to do this printing we will not guarantee to only 1815 // generate output for the newly-reclaimed regions (the list 1816 // might not be empty at the beginning of cleanup; we might 1817 // still be working on its previous contents). So we do the 1818 // printing here, before we append the new regions to the global 1819 // cleanup list. 1820 1821 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1822 if (hr_printer->is_active()) { 1823 FreeRegionListIterator iter(&local_cleanup_list); 1824 while (iter.more_available()) { 1825 HeapRegion* hr = iter.get_next(); 1826 hr_printer->cleanup(hr); 1827 } 1828 } 1829 1830 _cleanup_list->add_ordered(&local_cleanup_list); 1831 assert(local_cleanup_list.is_empty(), "post-condition"); 1832 1833 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1834 } 1835 } 1836 }; 1837 1838 class G1ParScrubRemSetTask: public AbstractGangTask { 1839 protected: 1840 G1RemSet* _g1rs; 1841 BitMap* _region_bm; 1842 BitMap* _card_bm; 1843 HeapRegionClaimer _hrclaimer; 1844 1845 public: 1846 G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) : 1847 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) { 1848 } 1849 1850 void work(uint worker_id) { 1851 _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer); 1852 } 1853 1854 }; 1855 1856 void ConcurrentMark::cleanup() { 1857 // world is stopped at this checkpoint 1858 assert(SafepointSynchronize::is_at_safepoint(), 1859 "world should be stopped"); 1860 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1861 1862 // If a full collection has happened, we shouldn't do this. 1863 if (has_aborted()) { 1864 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1865 return; 1866 } 1867 1868 g1h->verify_region_sets_optional(); 1869 1870 if (VerifyDuringGC) { 1871 HandleMark hm; // handle scope 1872 g1h->prepare_for_verify(); 1873 Universe::verify(VerifyOption_G1UsePrevMarking, 1874 " VerifyDuringGC:(before)"); 1875 } 1876 g1h->check_bitmaps("Cleanup Start"); 1877 1878 G1CollectorPolicy* g1p = g1h->g1_policy(); 1879 g1p->record_concurrent_mark_cleanup_start(); 1880 1881 double start = os::elapsedTime(); 1882 1883 HeapRegionRemSet::reset_for_cleanup_tasks(); 1884 1885 // Do counting once more with the world stopped for good measure. 1886 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1887 1888 g1h->workers()->run_task(&g1_par_count_task); 1889 1890 if (VerifyDuringGC) { 1891 // Verify that the counting data accumulated during marking matches 1892 // that calculated by walking the marking bitmap. 1893 1894 // Bitmaps to hold expected values 1895 BitMap expected_region_bm(_region_bm.size(), true); 1896 BitMap expected_card_bm(_card_bm.size(), true); 1897 1898 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1899 &_region_bm, 1900 &_card_bm, 1901 &expected_region_bm, 1902 &expected_card_bm); 1903 1904 g1h->workers()->run_task(&g1_par_verify_task); 1905 1906 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1907 } 1908 1909 size_t start_used_bytes = g1h->used(); 1910 g1h->collector_state()->set_mark_in_progress(false); 1911 1912 double count_end = os::elapsedTime(); 1913 double this_final_counting_time = (count_end - start); 1914 _total_counting_time += this_final_counting_time; 1915 1916 if (G1PrintRegionLivenessInfo) { 1917 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1918 _g1h->heap_region_iterate(&cl); 1919 } 1920 1921 // Install newly created mark bitMap as "prev". 1922 swapMarkBitMaps(); 1923 1924 g1h->reset_gc_time_stamp(); 1925 1926 uint n_workers = _g1h->workers()->active_workers(); 1927 1928 // Note end of marking in all heap regions. 1929 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1930 g1h->workers()->run_task(&g1_par_note_end_task); 1931 g1h->check_gc_time_stamps(); 1932 1933 if (!cleanup_list_is_empty()) { 1934 // The cleanup list is not empty, so we'll have to process it 1935 // concurrently. Notify anyone else that might be wanting free 1936 // regions that there will be more free regions coming soon. 1937 g1h->set_free_regions_coming(); 1938 } 1939 1940 // call below, since it affects the metric by which we sort the heap 1941 // regions. 1942 if (G1ScrubRemSets) { 1943 double rs_scrub_start = os::elapsedTime(); 1944 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers); 1945 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1946 1947 double rs_scrub_end = os::elapsedTime(); 1948 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1949 _total_rs_scrub_time += this_rs_scrub_time; 1950 } 1951 1952 // this will also free any regions totally full of garbage objects, 1953 // and sort the regions. 1954 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1955 1956 // Statistics. 1957 double end = os::elapsedTime(); 1958 _cleanup_times.add((end - start) * 1000.0); 1959 1960 if (G1Log::fine()) { 1961 g1h->g1_policy()->print_heap_transition(start_used_bytes); 1962 } 1963 1964 // Clean up will have freed any regions completely full of garbage. 1965 // Update the soft reference policy with the new heap occupancy. 1966 Universe::update_heap_info_at_gc(); 1967 1968 if (VerifyDuringGC) { 1969 HandleMark hm; // handle scope 1970 g1h->prepare_for_verify(); 1971 Universe::verify(VerifyOption_G1UsePrevMarking, 1972 " VerifyDuringGC:(after)"); 1973 } 1974 1975 g1h->check_bitmaps("Cleanup End"); 1976 1977 g1h->verify_region_sets_optional(); 1978 1979 // We need to make this be a "collection" so any collection pause that 1980 // races with it goes around and waits for completeCleanup to finish. 1981 g1h->increment_total_collections(); 1982 1983 // Clean out dead classes and update Metaspace sizes. 1984 if (ClassUnloadingWithConcurrentMark) { 1985 ClassLoaderDataGraph::purge(); 1986 } 1987 MetaspaceGC::compute_new_size(); 1988 1989 // We reclaimed old regions so we should calculate the sizes to make 1990 // sure we update the old gen/space data. 1991 g1h->g1mm()->update_sizes(); 1992 g1h->allocation_context_stats().update_after_mark(); 1993 1994 g1h->trace_heap_after_concurrent_cycle(); 1995 } 1996 1997 void ConcurrentMark::completeCleanup() { 1998 if (has_aborted()) return; 1999 2000 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2001 2002 _cleanup_list.verify_optional(); 2003 FreeRegionList tmp_free_list("Tmp Free List"); 2004 2005 if (G1ConcRegionFreeingVerbose) { 2006 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2007 "cleanup list has %u entries", 2008 _cleanup_list.length()); 2009 } 2010 2011 // No one else should be accessing the _cleanup_list at this point, 2012 // so it is not necessary to take any locks 2013 while (!_cleanup_list.is_empty()) { 2014 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 2015 assert(hr != NULL, "Got NULL from a non-empty list"); 2016 hr->par_clear(); 2017 tmp_free_list.add_ordered(hr); 2018 2019 // Instead of adding one region at a time to the secondary_free_list, 2020 // we accumulate them in the local list and move them a few at a 2021 // time. This also cuts down on the number of notify_all() calls 2022 // we do during this process. We'll also append the local list when 2023 // _cleanup_list is empty (which means we just removed the last 2024 // region from the _cleanup_list). 2025 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2026 _cleanup_list.is_empty()) { 2027 if (G1ConcRegionFreeingVerbose) { 2028 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2029 "appending %u entries to the secondary_free_list, " 2030 "cleanup list still has %u entries", 2031 tmp_free_list.length(), 2032 _cleanup_list.length()); 2033 } 2034 2035 { 2036 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2037 g1h->secondary_free_list_add(&tmp_free_list); 2038 SecondaryFreeList_lock->notify_all(); 2039 } 2040 #ifndef PRODUCT 2041 if (G1StressConcRegionFreeing) { 2042 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2043 os::sleep(Thread::current(), (jlong) 1, false); 2044 } 2045 } 2046 #endif 2047 } 2048 } 2049 assert(tmp_free_list.is_empty(), "post-condition"); 2050 } 2051 2052 // Supporting Object and Oop closures for reference discovery 2053 // and processing in during marking 2054 2055 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2056 HeapWord* addr = (HeapWord*)obj; 2057 return addr != NULL && 2058 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2059 } 2060 2061 // 'Keep Alive' oop closure used by both serial parallel reference processing. 2062 // Uses the CMTask associated with a worker thread (for serial reference 2063 // processing the CMTask for worker 0 is used) to preserve (mark) and 2064 // trace referent objects. 2065 // 2066 // Using the CMTask and embedded local queues avoids having the worker 2067 // threads operating on the global mark stack. This reduces the risk 2068 // of overflowing the stack - which we would rather avoid at this late 2069 // state. Also using the tasks' local queues removes the potential 2070 // of the workers interfering with each other that could occur if 2071 // operating on the global stack. 2072 2073 class G1CMKeepAliveAndDrainClosure: public OopClosure { 2074 ConcurrentMark* _cm; 2075 CMTask* _task; 2076 int _ref_counter_limit; 2077 int _ref_counter; 2078 bool _is_serial; 2079 public: 2080 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2081 _cm(cm), _task(task), _is_serial(is_serial), 2082 _ref_counter_limit(G1RefProcDrainInterval) { 2083 assert(_ref_counter_limit > 0, "sanity"); 2084 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2085 _ref_counter = _ref_counter_limit; 2086 } 2087 2088 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2089 virtual void do_oop( oop* p) { do_oop_work(p); } 2090 2091 template <class T> void do_oop_work(T* p) { 2092 if (!_cm->has_overflown()) { 2093 oop obj = oopDesc::load_decode_heap_oop(p); 2094 if (_cm->verbose_high()) { 2095 gclog_or_tty->print_cr("\t[%u] we're looking at location " 2096 "*" PTR_FORMAT " = " PTR_FORMAT, 2097 _task->worker_id(), p2i(p), p2i((void*) obj)); 2098 } 2099 2100 _task->deal_with_reference(obj); 2101 _ref_counter--; 2102 2103 if (_ref_counter == 0) { 2104 // We have dealt with _ref_counter_limit references, pushing them 2105 // and objects reachable from them on to the local stack (and 2106 // possibly the global stack). Call CMTask::do_marking_step() to 2107 // process these entries. 2108 // 2109 // We call CMTask::do_marking_step() in a loop, which we'll exit if 2110 // there's nothing more to do (i.e. we're done with the entries that 2111 // were pushed as a result of the CMTask::deal_with_reference() calls 2112 // above) or we overflow. 2113 // 2114 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2115 // flag while there may still be some work to do. (See the comment at 2116 // the beginning of CMTask::do_marking_step() for those conditions - 2117 // one of which is reaching the specified time target.) It is only 2118 // when CMTask::do_marking_step() returns without setting the 2119 // has_aborted() flag that the marking step has completed. 2120 do { 2121 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2122 _task->do_marking_step(mark_step_duration_ms, 2123 false /* do_termination */, 2124 _is_serial); 2125 } while (_task->has_aborted() && !_cm->has_overflown()); 2126 _ref_counter = _ref_counter_limit; 2127 } 2128 } else { 2129 if (_cm->verbose_high()) { 2130 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id()); 2131 } 2132 } 2133 } 2134 }; 2135 2136 // 'Drain' oop closure used by both serial and parallel reference processing. 2137 // Uses the CMTask associated with a given worker thread (for serial 2138 // reference processing the CMtask for worker 0 is used). Calls the 2139 // do_marking_step routine, with an unbelievably large timeout value, 2140 // to drain the marking data structures of the remaining entries 2141 // added by the 'keep alive' oop closure above. 2142 2143 class G1CMDrainMarkingStackClosure: public VoidClosure { 2144 ConcurrentMark* _cm; 2145 CMTask* _task; 2146 bool _is_serial; 2147 public: 2148 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2149 _cm(cm), _task(task), _is_serial(is_serial) { 2150 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2151 } 2152 2153 void do_void() { 2154 do { 2155 if (_cm->verbose_high()) { 2156 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s", 2157 _task->worker_id(), BOOL_TO_STR(_is_serial)); 2158 } 2159 2160 // We call CMTask::do_marking_step() to completely drain the local 2161 // and global marking stacks of entries pushed by the 'keep alive' 2162 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2163 // 2164 // CMTask::do_marking_step() is called in a loop, which we'll exit 2165 // if there's nothing more to do (i.e. we've completely drained the 2166 // entries that were pushed as a a result of applying the 'keep alive' 2167 // closure to the entries on the discovered ref lists) or we overflow 2168 // the global marking stack. 2169 // 2170 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2171 // flag while there may still be some work to do. (See the comment at 2172 // the beginning of CMTask::do_marking_step() for those conditions - 2173 // one of which is reaching the specified time target.) It is only 2174 // when CMTask::do_marking_step() returns without setting the 2175 // has_aborted() flag that the marking step has completed. 2176 2177 _task->do_marking_step(1000000000.0 /* something very large */, 2178 true /* do_termination */, 2179 _is_serial); 2180 } while (_task->has_aborted() && !_cm->has_overflown()); 2181 } 2182 }; 2183 2184 // Implementation of AbstractRefProcTaskExecutor for parallel 2185 // reference processing at the end of G1 concurrent marking 2186 2187 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2188 private: 2189 G1CollectedHeap* _g1h; 2190 ConcurrentMark* _cm; 2191 WorkGang* _workers; 2192 uint _active_workers; 2193 2194 public: 2195 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2196 ConcurrentMark* cm, 2197 WorkGang* workers, 2198 uint n_workers) : 2199 _g1h(g1h), _cm(cm), 2200 _workers(workers), _active_workers(n_workers) { } 2201 2202 // Executes the given task using concurrent marking worker threads. 2203 virtual void execute(ProcessTask& task); 2204 virtual void execute(EnqueueTask& task); 2205 }; 2206 2207 class G1CMRefProcTaskProxy: public AbstractGangTask { 2208 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2209 ProcessTask& _proc_task; 2210 G1CollectedHeap* _g1h; 2211 ConcurrentMark* _cm; 2212 2213 public: 2214 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2215 G1CollectedHeap* g1h, 2216 ConcurrentMark* cm) : 2217 AbstractGangTask("Process reference objects in parallel"), 2218 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2219 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2220 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2221 } 2222 2223 virtual void work(uint worker_id) { 2224 ResourceMark rm; 2225 HandleMark hm; 2226 CMTask* task = _cm->task(worker_id); 2227 G1CMIsAliveClosure g1_is_alive(_g1h); 2228 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2229 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2230 2231 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2232 } 2233 }; 2234 2235 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2236 assert(_workers != NULL, "Need parallel worker threads."); 2237 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2238 2239 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2240 2241 // We need to reset the concurrency level before each 2242 // proxy task execution, so that the termination protocol 2243 // and overflow handling in CMTask::do_marking_step() knows 2244 // how many workers to wait for. 2245 _cm->set_concurrency(_active_workers); 2246 _workers->run_task(&proc_task_proxy); 2247 } 2248 2249 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2250 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2251 EnqueueTask& _enq_task; 2252 2253 public: 2254 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2255 AbstractGangTask("Enqueue reference objects in parallel"), 2256 _enq_task(enq_task) { } 2257 2258 virtual void work(uint worker_id) { 2259 _enq_task.work(worker_id); 2260 } 2261 }; 2262 2263 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2264 assert(_workers != NULL, "Need parallel worker threads."); 2265 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2266 2267 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2268 2269 // Not strictly necessary but... 2270 // 2271 // We need to reset the concurrency level before each 2272 // proxy task execution, so that the termination protocol 2273 // and overflow handling in CMTask::do_marking_step() knows 2274 // how many workers to wait for. 2275 _cm->set_concurrency(_active_workers); 2276 _workers->run_task(&enq_task_proxy); 2277 } 2278 2279 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 2280 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 2281 } 2282 2283 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2284 if (has_overflown()) { 2285 // Skip processing the discovered references if we have 2286 // overflown the global marking stack. Reference objects 2287 // only get discovered once so it is OK to not 2288 // de-populate the discovered reference lists. We could have, 2289 // but the only benefit would be that, when marking restarts, 2290 // less reference objects are discovered. 2291 return; 2292 } 2293 2294 ResourceMark rm; 2295 HandleMark hm; 2296 2297 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2298 2299 // Is alive closure. 2300 G1CMIsAliveClosure g1_is_alive(g1h); 2301 2302 // Inner scope to exclude the cleaning of the string and symbol 2303 // tables from the displayed time. 2304 { 2305 G1CMTraceTime t("GC ref-proc", G1Log::finer()); 2306 2307 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2308 2309 // See the comment in G1CollectedHeap::ref_processing_init() 2310 // about how reference processing currently works in G1. 2311 2312 // Set the soft reference policy 2313 rp->setup_policy(clear_all_soft_refs); 2314 assert(_markStack.isEmpty(), "mark stack should be empty"); 2315 2316 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2317 // in serial reference processing. Note these closures are also 2318 // used for serially processing (by the the current thread) the 2319 // JNI references during parallel reference processing. 2320 // 2321 // These closures do not need to synchronize with the worker 2322 // threads involved in parallel reference processing as these 2323 // instances are executed serially by the current thread (e.g. 2324 // reference processing is not multi-threaded and is thus 2325 // performed by the current thread instead of a gang worker). 2326 // 2327 // The gang tasks involved in parallel reference processing create 2328 // their own instances of these closures, which do their own 2329 // synchronization among themselves. 2330 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2331 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2332 2333 // We need at least one active thread. If reference processing 2334 // is not multi-threaded we use the current (VMThread) thread, 2335 // otherwise we use the work gang from the G1CollectedHeap and 2336 // we utilize all the worker threads we can. 2337 bool processing_is_mt = rp->processing_is_mt(); 2338 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2339 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2340 2341 // Parallel processing task executor. 2342 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2343 g1h->workers(), active_workers); 2344 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2345 2346 // Set the concurrency level. The phase was already set prior to 2347 // executing the remark task. 2348 set_concurrency(active_workers); 2349 2350 // Set the degree of MT processing here. If the discovery was done MT, 2351 // the number of threads involved during discovery could differ from 2352 // the number of active workers. This is OK as long as the discovered 2353 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2354 rp->set_active_mt_degree(active_workers); 2355 2356 // Process the weak references. 2357 const ReferenceProcessorStats& stats = 2358 rp->process_discovered_references(&g1_is_alive, 2359 &g1_keep_alive, 2360 &g1_drain_mark_stack, 2361 executor, 2362 g1h->gc_timer_cm()); 2363 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2364 2365 // The do_oop work routines of the keep_alive and drain_marking_stack 2366 // oop closures will set the has_overflown flag if we overflow the 2367 // global marking stack. 2368 2369 assert(_markStack.overflow() || _markStack.isEmpty(), 2370 "mark stack should be empty (unless it overflowed)"); 2371 2372 if (_markStack.overflow()) { 2373 // This should have been done already when we tried to push an 2374 // entry on to the global mark stack. But let's do it again. 2375 set_has_overflown(); 2376 } 2377 2378 assert(rp->num_q() == active_workers, "why not"); 2379 2380 rp->enqueue_discovered_references(executor); 2381 2382 rp->verify_no_references_recorded(); 2383 assert(!rp->discovery_enabled(), "Post condition"); 2384 } 2385 2386 if (has_overflown()) { 2387 // We can not trust g1_is_alive if the marking stack overflowed 2388 return; 2389 } 2390 2391 assert(_markStack.isEmpty(), "Marking should have completed"); 2392 2393 // Unload Klasses, String, Symbols, Code Cache, etc. 2394 { 2395 G1CMTraceTime trace("Unloading", G1Log::finer()); 2396 2397 if (ClassUnloadingWithConcurrentMark) { 2398 bool purged_classes; 2399 2400 { 2401 G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest()); 2402 purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */); 2403 } 2404 2405 { 2406 G1CMTraceTime trace("Parallel Unloading", G1Log::finest()); 2407 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2408 } 2409 } 2410 2411 if (G1StringDedup::is_enabled()) { 2412 G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest()); 2413 G1StringDedup::unlink(&g1_is_alive); 2414 } 2415 } 2416 } 2417 2418 void ConcurrentMark::swapMarkBitMaps() { 2419 CMBitMapRO* temp = _prevMarkBitMap; 2420 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2421 _nextMarkBitMap = (CMBitMap*) temp; 2422 } 2423 2424 // Closure for marking entries in SATB buffers. 2425 class CMSATBBufferClosure : public SATBBufferClosure { 2426 private: 2427 CMTask* _task; 2428 G1CollectedHeap* _g1h; 2429 2430 // This is very similar to CMTask::deal_with_reference, but with 2431 // more relaxed requirements for the argument, so this must be more 2432 // circumspect about treating the argument as an object. 2433 void do_entry(void* entry) const { 2434 _task->increment_refs_reached(); 2435 HeapRegion* hr = _g1h->heap_region_containing_raw(entry); 2436 if (entry < hr->next_top_at_mark_start()) { 2437 // Until we get here, we don't know whether entry refers to a valid 2438 // object; it could instead have been a stale reference. 2439 oop obj = static_cast<oop>(entry); 2440 assert(obj->is_oop(true /* ignore mark word */), 2441 "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)); 2442 _task->make_reference_grey(obj, hr); 2443 } 2444 } 2445 2446 public: 2447 CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h) 2448 : _task(task), _g1h(g1h) { } 2449 2450 virtual void do_buffer(void** buffer, size_t size) { 2451 for (size_t i = 0; i < size; ++i) { 2452 do_entry(buffer[i]); 2453 } 2454 } 2455 }; 2456 2457 class G1RemarkThreadsClosure : public ThreadClosure { 2458 CMSATBBufferClosure _cm_satb_cl; 2459 G1CMOopClosure _cm_cl; 2460 MarkingCodeBlobClosure _code_cl; 2461 int _thread_parity; 2462 2463 public: 2464 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) : 2465 _cm_satb_cl(task, g1h), 2466 _cm_cl(g1h, g1h->concurrent_mark(), task), 2467 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2468 _thread_parity(Threads::thread_claim_parity()) {} 2469 2470 void do_thread(Thread* thread) { 2471 if (thread->is_Java_thread()) { 2472 if (thread->claim_oops_do(true, _thread_parity)) { 2473 JavaThread* jt = (JavaThread*)thread; 2474 2475 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2476 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2477 // * Alive if on the stack of an executing method 2478 // * Weakly reachable otherwise 2479 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2480 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2481 jt->nmethods_do(&_code_cl); 2482 2483 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 2484 } 2485 } else if (thread->is_VM_thread()) { 2486 if (thread->claim_oops_do(true, _thread_parity)) { 2487 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 2488 } 2489 } 2490 } 2491 }; 2492 2493 class CMRemarkTask: public AbstractGangTask { 2494 private: 2495 ConcurrentMark* _cm; 2496 public: 2497 void work(uint worker_id) { 2498 // Since all available tasks are actually started, we should 2499 // only proceed if we're supposed to be active. 2500 if (worker_id < _cm->active_tasks()) { 2501 CMTask* task = _cm->task(worker_id); 2502 task->record_start_time(); 2503 { 2504 ResourceMark rm; 2505 HandleMark hm; 2506 2507 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 2508 Threads::threads_do(&threads_f); 2509 } 2510 2511 do { 2512 task->do_marking_step(1000000000.0 /* something very large */, 2513 true /* do_termination */, 2514 false /* is_serial */); 2515 } while (task->has_aborted() && !_cm->has_overflown()); 2516 // If we overflow, then we do not want to restart. We instead 2517 // want to abort remark and do concurrent marking again. 2518 task->record_end_time(); 2519 } 2520 } 2521 2522 CMRemarkTask(ConcurrentMark* cm, uint active_workers) : 2523 AbstractGangTask("Par Remark"), _cm(cm) { 2524 _cm->terminator()->reset_for_reuse(active_workers); 2525 } 2526 }; 2527 2528 void ConcurrentMark::checkpointRootsFinalWork() { 2529 ResourceMark rm; 2530 HandleMark hm; 2531 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2532 2533 G1CMTraceTime trace("Finalize Marking", G1Log::finer()); 2534 2535 g1h->ensure_parsability(false); 2536 2537 // this is remark, so we'll use up all active threads 2538 uint active_workers = g1h->workers()->active_workers(); 2539 set_concurrency_and_phase(active_workers, false /* concurrent */); 2540 // Leave _parallel_marking_threads at it's 2541 // value originally calculated in the ConcurrentMark 2542 // constructor and pass values of the active workers 2543 // through the gang in the task. 2544 2545 { 2546 StrongRootsScope srs(active_workers); 2547 2548 CMRemarkTask remarkTask(this, active_workers); 2549 // We will start all available threads, even if we decide that the 2550 // active_workers will be fewer. The extra ones will just bail out 2551 // immediately. 2552 g1h->workers()->run_task(&remarkTask); 2553 } 2554 2555 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2556 guarantee(has_overflown() || 2557 satb_mq_set.completed_buffers_num() == 0, 2558 "Invariant: has_overflown = %s, num buffers = %d", 2559 BOOL_TO_STR(has_overflown()), 2560 satb_mq_set.completed_buffers_num()); 2561 2562 print_stats(); 2563 } 2564 2565 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2566 // Note we are overriding the read-only view of the prev map here, via 2567 // the cast. 2568 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2569 } 2570 2571 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2572 _nextMarkBitMap->clearRange(mr); 2573 } 2574 2575 HeapRegion* 2576 ConcurrentMark::claim_region(uint worker_id) { 2577 // "checkpoint" the finger 2578 HeapWord* finger = _finger; 2579 2580 // _heap_end will not change underneath our feet; it only changes at 2581 // yield points. 2582 while (finger < _heap_end) { 2583 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2584 2585 // Note on how this code handles humongous regions. In the 2586 // normal case the finger will reach the start of a "starts 2587 // humongous" (SH) region. Its end will either be the end of the 2588 // last "continues humongous" (CH) region in the sequence, or the 2589 // standard end of the SH region (if the SH is the only region in 2590 // the sequence). That way claim_region() will skip over the CH 2591 // regions. However, there is a subtle race between a CM thread 2592 // executing this method and a mutator thread doing a humongous 2593 // object allocation. The two are not mutually exclusive as the CM 2594 // thread does not need to hold the Heap_lock when it gets 2595 // here. So there is a chance that claim_region() will come across 2596 // a free region that's in the progress of becoming a SH or a CH 2597 // region. In the former case, it will either 2598 // a) Miss the update to the region's end, in which case it will 2599 // visit every subsequent CH region, will find their bitmaps 2600 // empty, and do nothing, or 2601 // b) Will observe the update of the region's end (in which case 2602 // it will skip the subsequent CH regions). 2603 // If it comes across a region that suddenly becomes CH, the 2604 // scenario will be similar to b). So, the race between 2605 // claim_region() and a humongous object allocation might force us 2606 // to do a bit of unnecessary work (due to some unnecessary bitmap 2607 // iterations) but it should not introduce and correctness issues. 2608 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2609 2610 // Above heap_region_containing_raw may return NULL as we always scan claim 2611 // until the end of the heap. In this case, just jump to the next region. 2612 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 2613 2614 // Is the gap between reading the finger and doing the CAS too long? 2615 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2616 if (res == finger && curr_region != NULL) { 2617 // we succeeded 2618 HeapWord* bottom = curr_region->bottom(); 2619 HeapWord* limit = curr_region->next_top_at_mark_start(); 2620 2621 if (verbose_low()) { 2622 gclog_or_tty->print_cr("[%u] curr_region = " PTR_FORMAT " " 2623 "[" PTR_FORMAT ", " PTR_FORMAT "), " 2624 "limit = " PTR_FORMAT, 2625 worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit)); 2626 } 2627 2628 // notice that _finger == end cannot be guaranteed here since, 2629 // someone else might have moved the finger even further 2630 assert(_finger >= end, "the finger should have moved forward"); 2631 2632 if (verbose_low()) { 2633 gclog_or_tty->print_cr("[%u] we were successful with region = " 2634 PTR_FORMAT, worker_id, p2i(curr_region)); 2635 } 2636 2637 if (limit > bottom) { 2638 if (verbose_low()) { 2639 gclog_or_tty->print_cr("[%u] region " PTR_FORMAT " is not empty, " 2640 "returning it ", worker_id, p2i(curr_region)); 2641 } 2642 return curr_region; 2643 } else { 2644 assert(limit == bottom, 2645 "the region limit should be at bottom"); 2646 if (verbose_low()) { 2647 gclog_or_tty->print_cr("[%u] region " PTR_FORMAT " is empty, " 2648 "returning NULL", worker_id, p2i(curr_region)); 2649 } 2650 // we return NULL and the caller should try calling 2651 // claim_region() again. 2652 return NULL; 2653 } 2654 } else { 2655 assert(_finger > finger, "the finger should have moved forward"); 2656 if (verbose_low()) { 2657 if (curr_region == NULL) { 2658 gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, " 2659 "global finger = " PTR_FORMAT ", " 2660 "our finger = " PTR_FORMAT, 2661 worker_id, p2i(_finger), p2i(finger)); 2662 } else { 2663 gclog_or_tty->print_cr("[%u] somebody else moved the finger, " 2664 "global finger = " PTR_FORMAT ", " 2665 "our finger = " PTR_FORMAT, 2666 worker_id, p2i(_finger), p2i(finger)); 2667 } 2668 } 2669 2670 // read it again 2671 finger = _finger; 2672 } 2673 } 2674 2675 return NULL; 2676 } 2677 2678 #ifndef PRODUCT 2679 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 2680 private: 2681 G1CollectedHeap* _g1h; 2682 const char* _phase; 2683 int _info; 2684 2685 public: 2686 VerifyNoCSetOops(const char* phase, int info = -1) : 2687 _g1h(G1CollectedHeap::heap()), 2688 _phase(phase), 2689 _info(info) 2690 { } 2691 2692 void operator()(oop obj) const { 2693 guarantee(obj->is_oop(), 2694 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 2695 p2i(obj), _phase, _info); 2696 guarantee(!_g1h->obj_in_cs(obj), 2697 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 2698 p2i(obj), _phase, _info); 2699 } 2700 }; 2701 2702 void ConcurrentMark::verify_no_cset_oops() { 2703 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2704 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 2705 return; 2706 } 2707 2708 // Verify entries on the global mark stack 2709 _markStack.iterate(VerifyNoCSetOops("Stack")); 2710 2711 // Verify entries on the task queues 2712 for (uint i = 0; i < _max_worker_id; ++i) { 2713 CMTaskQueue* queue = _task_queues->queue(i); 2714 queue->iterate(VerifyNoCSetOops("Queue", i)); 2715 } 2716 2717 // Verify the global finger 2718 HeapWord* global_finger = finger(); 2719 if (global_finger != NULL && global_finger < _heap_end) { 2720 // The global finger always points to a heap region boundary. We 2721 // use heap_region_containing_raw() to get the containing region 2722 // given that the global finger could be pointing to a free region 2723 // which subsequently becomes continues humongous. If that 2724 // happens, heap_region_containing() will return the bottom of the 2725 // corresponding starts humongous region and the check below will 2726 // not hold any more. 2727 // Since we always iterate over all regions, we might get a NULL HeapRegion 2728 // here. 2729 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 2730 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 2731 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 2732 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 2733 } 2734 2735 // Verify the task fingers 2736 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2737 for (uint i = 0; i < parallel_marking_threads(); ++i) { 2738 CMTask* task = _tasks[i]; 2739 HeapWord* task_finger = task->finger(); 2740 if (task_finger != NULL && task_finger < _heap_end) { 2741 // See above note on the global finger verification. 2742 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 2743 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 2744 !task_hr->in_collection_set(), 2745 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 2746 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 2747 } 2748 } 2749 } 2750 #endif // PRODUCT 2751 2752 // Aggregate the counting data that was constructed concurrently 2753 // with marking. 2754 class AggregateCountDataHRClosure: public HeapRegionClosure { 2755 G1CollectedHeap* _g1h; 2756 ConcurrentMark* _cm; 2757 CardTableModRefBS* _ct_bs; 2758 BitMap* _cm_card_bm; 2759 uint _max_worker_id; 2760 2761 public: 2762 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2763 BitMap* cm_card_bm, 2764 uint max_worker_id) : 2765 _g1h(g1h), _cm(g1h->concurrent_mark()), 2766 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 2767 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 2768 2769 bool doHeapRegion(HeapRegion* hr) { 2770 if (hr->is_continues_humongous()) { 2771 // We will ignore these here and process them when their 2772 // associated "starts humongous" region is processed. 2773 // Note that we cannot rely on their associated 2774 // "starts humongous" region to have their bit set to 1 2775 // since, due to the region chunking in the parallel region 2776 // iteration, a "continues humongous" region might be visited 2777 // before its associated "starts humongous". 2778 return false; 2779 } 2780 2781 HeapWord* start = hr->bottom(); 2782 HeapWord* limit = hr->next_top_at_mark_start(); 2783 HeapWord* end = hr->end(); 2784 2785 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2786 "Preconditions not met - " 2787 "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", " 2788 "top: " PTR_FORMAT ", end: " PTR_FORMAT, 2789 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())); 2790 2791 assert(hr->next_marked_bytes() == 0, "Precondition"); 2792 2793 if (start == limit) { 2794 // NTAMS of this region has not been set so nothing to do. 2795 return false; 2796 } 2797 2798 // 'start' should be in the heap. 2799 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2800 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 2801 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2802 2803 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2804 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2805 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2806 2807 // If ntams is not card aligned then we bump card bitmap index 2808 // for limit so that we get the all the cards spanned by 2809 // the object ending at ntams. 2810 // Note: if this is the last region in the heap then ntams 2811 // could be actually just beyond the end of the the heap; 2812 // limit_idx will then correspond to a (non-existent) card 2813 // that is also outside the heap. 2814 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2815 limit_idx += 1; 2816 } 2817 2818 assert(limit_idx <= end_idx, "or else use atomics"); 2819 2820 // Aggregate the "stripe" in the count data associated with hr. 2821 uint hrm_index = hr->hrm_index(); 2822 size_t marked_bytes = 0; 2823 2824 for (uint i = 0; i < _max_worker_id; i += 1) { 2825 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2826 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2827 2828 // Fetch the marked_bytes in this region for task i and 2829 // add it to the running total for this region. 2830 marked_bytes += marked_bytes_array[hrm_index]; 2831 2832 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 2833 // into the global card bitmap. 2834 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2835 2836 while (scan_idx < limit_idx) { 2837 assert(task_card_bm->at(scan_idx) == true, "should be"); 2838 _cm_card_bm->set_bit(scan_idx); 2839 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2840 2841 // BitMap::get_next_one_offset() can handle the case when 2842 // its left_offset parameter is greater than its right_offset 2843 // parameter. It does, however, have an early exit if 2844 // left_offset == right_offset. So let's limit the value 2845 // passed in for left offset here. 2846 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2847 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2848 } 2849 } 2850 2851 // Update the marked bytes for this region. 2852 hr->add_to_marked_bytes(marked_bytes); 2853 2854 // Next heap region 2855 return false; 2856 } 2857 }; 2858 2859 class G1AggregateCountDataTask: public AbstractGangTask { 2860 protected: 2861 G1CollectedHeap* _g1h; 2862 ConcurrentMark* _cm; 2863 BitMap* _cm_card_bm; 2864 uint _max_worker_id; 2865 uint _active_workers; 2866 HeapRegionClaimer _hrclaimer; 2867 2868 public: 2869 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2870 ConcurrentMark* cm, 2871 BitMap* cm_card_bm, 2872 uint max_worker_id, 2873 uint n_workers) : 2874 AbstractGangTask("Count Aggregation"), 2875 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2876 _max_worker_id(max_worker_id), 2877 _active_workers(n_workers), 2878 _hrclaimer(_active_workers) { 2879 } 2880 2881 void work(uint worker_id) { 2882 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 2883 2884 _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer); 2885 } 2886 }; 2887 2888 2889 void ConcurrentMark::aggregate_count_data() { 2890 uint n_workers = _g1h->workers()->active_workers(); 2891 2892 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 2893 _max_worker_id, n_workers); 2894 2895 _g1h->workers()->run_task(&g1_par_agg_task); 2896 } 2897 2898 // Clear the per-worker arrays used to store the per-region counting data 2899 void ConcurrentMark::clear_all_count_data() { 2900 // Clear the global card bitmap - it will be filled during 2901 // liveness count aggregation (during remark) and the 2902 // final counting task. 2903 _card_bm.clear(); 2904 2905 // Clear the global region bitmap - it will be filled as part 2906 // of the final counting task. 2907 _region_bm.clear(); 2908 2909 uint max_regions = _g1h->max_regions(); 2910 assert(_max_worker_id > 0, "uninitialized"); 2911 2912 for (uint i = 0; i < _max_worker_id; i += 1) { 2913 BitMap* task_card_bm = count_card_bitmap_for(i); 2914 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 2915 2916 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 2917 assert(marked_bytes_array != NULL, "uninitialized"); 2918 2919 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 2920 task_card_bm->clear(); 2921 } 2922 } 2923 2924 void ConcurrentMark::print_stats() { 2925 if (verbose_stats()) { 2926 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2927 for (size_t i = 0; i < _active_tasks; ++i) { 2928 _tasks[i]->print_stats(); 2929 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2930 } 2931 } 2932 } 2933 2934 // abandon current marking iteration due to a Full GC 2935 void ConcurrentMark::abort() { 2936 if (!cmThread()->during_cycle() || _has_aborted) { 2937 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2938 return; 2939 } 2940 2941 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2942 // concurrent bitmap clearing. 2943 _nextMarkBitMap->clearAll(); 2944 2945 // Note we cannot clear the previous marking bitmap here 2946 // since VerifyDuringGC verifies the objects marked during 2947 // a full GC against the previous bitmap. 2948 2949 // Clear the liveness counting data 2950 clear_all_count_data(); 2951 // Empty mark stack 2952 reset_marking_state(); 2953 for (uint i = 0; i < _max_worker_id; ++i) { 2954 _tasks[i]->clear_region_fields(); 2955 } 2956 _first_overflow_barrier_sync.abort(); 2957 _second_overflow_barrier_sync.abort(); 2958 _has_aborted = true; 2959 2960 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2961 satb_mq_set.abandon_partial_marking(); 2962 // This can be called either during or outside marking, we'll read 2963 // the expected_active value from the SATB queue set. 2964 satb_mq_set.set_active_all_threads( 2965 false, /* new active value */ 2966 satb_mq_set.is_active() /* expected_active */); 2967 2968 _g1h->trace_heap_after_concurrent_cycle(); 2969 _g1h->register_concurrent_cycle_end(); 2970 } 2971 2972 static void print_ms_time_info(const char* prefix, const char* name, 2973 NumberSeq& ns) { 2974 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2975 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2976 if (ns.num() > 0) { 2977 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2978 prefix, ns.sd(), ns.maximum()); 2979 } 2980 } 2981 2982 void ConcurrentMark::print_summary_info() { 2983 gclog_or_tty->print_cr(" Concurrent marking:"); 2984 print_ms_time_info(" ", "init marks", _init_times); 2985 print_ms_time_info(" ", "remarks", _remark_times); 2986 { 2987 print_ms_time_info(" ", "final marks", _remark_mark_times); 2988 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2989 2990 } 2991 print_ms_time_info(" ", "cleanups", _cleanup_times); 2992 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 2993 _total_counting_time, 2994 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 2995 (double)_cleanup_times.num() 2996 : 0.0)); 2997 if (G1ScrubRemSets) { 2998 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2999 _total_rs_scrub_time, 3000 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3001 (double)_cleanup_times.num() 3002 : 0.0)); 3003 } 3004 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3005 (_init_times.sum() + _remark_times.sum() + 3006 _cleanup_times.sum())/1000.0); 3007 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3008 "(%8.2f s marking).", 3009 cmThread()->vtime_accum(), 3010 cmThread()->vtime_mark_accum()); 3011 } 3012 3013 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3014 _parallel_workers->print_worker_threads_on(st); 3015 } 3016 3017 void ConcurrentMark::print_on_error(outputStream* st) const { 3018 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 3019 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 3020 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 3021 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 3022 } 3023 3024 // We take a break if someone is trying to stop the world. 3025 bool ConcurrentMark::do_yield_check(uint worker_id) { 3026 if (SuspendibleThreadSet::should_yield()) { 3027 if (worker_id == 0) { 3028 _g1h->g1_policy()->record_concurrent_pause(); 3029 } 3030 SuspendibleThreadSet::yield(); 3031 return true; 3032 } else { 3033 return false; 3034 } 3035 } 3036 3037 #ifndef PRODUCT 3038 // for debugging purposes 3039 void ConcurrentMark::print_finger() { 3040 gclog_or_tty->print_cr("heap [" PTR_FORMAT ", " PTR_FORMAT "), global finger = " PTR_FORMAT, 3041 p2i(_heap_start), p2i(_heap_end), p2i(_finger)); 3042 for (uint i = 0; i < _max_worker_id; ++i) { 3043 gclog_or_tty->print(" %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger())); 3044 } 3045 gclog_or_tty->cr(); 3046 } 3047 #endif 3048 3049 // Closure for iteration over bitmaps 3050 class CMBitMapClosure : public BitMapClosure { 3051 private: 3052 // the bitmap that is being iterated over 3053 CMBitMap* _nextMarkBitMap; 3054 ConcurrentMark* _cm; 3055 CMTask* _task; 3056 3057 public: 3058 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3059 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3060 3061 bool do_bit(size_t offset) { 3062 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3063 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3064 assert( addr < _cm->finger(), "invariant"); 3065 assert(addr >= _task->finger(), "invariant"); 3066 3067 // We move that task's local finger along. 3068 _task->move_finger_to(addr); 3069 3070 _task->scan_object(oop(addr)); 3071 // we only partially drain the local queue and global stack 3072 _task->drain_local_queue(true); 3073 _task->drain_global_stack(true); 3074 3075 // if the has_aborted flag has been raised, we need to bail out of 3076 // the iteration 3077 return !_task->has_aborted(); 3078 } 3079 }; 3080 3081 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 3082 ReferenceProcessor* result = NULL; 3083 if (G1UseConcMarkReferenceProcessing) { 3084 result = g1h->ref_processor_cm(); 3085 assert(result != NULL, "should not be NULL"); 3086 } 3087 return result; 3088 } 3089 3090 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3091 ConcurrentMark* cm, 3092 CMTask* task) 3093 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 3094 _g1h(g1h), _cm(cm), _task(task) 3095 { } 3096 3097 void CMTask::setup_for_region(HeapRegion* hr) { 3098 assert(hr != NULL, 3099 "claim_region() should have filtered out NULL regions"); 3100 assert(!hr->is_continues_humongous(), 3101 "claim_region() should have filtered out continues humongous regions"); 3102 3103 if (_cm->verbose_low()) { 3104 gclog_or_tty->print_cr("[%u] setting up for region " PTR_FORMAT, 3105 _worker_id, p2i(hr)); 3106 } 3107 3108 _curr_region = hr; 3109 _finger = hr->bottom(); 3110 update_region_limit(); 3111 } 3112 3113 void CMTask::update_region_limit() { 3114 HeapRegion* hr = _curr_region; 3115 HeapWord* bottom = hr->bottom(); 3116 HeapWord* limit = hr->next_top_at_mark_start(); 3117 3118 if (limit == bottom) { 3119 if (_cm->verbose_low()) { 3120 gclog_or_tty->print_cr("[%u] found an empty region " 3121 "[" PTR_FORMAT ", " PTR_FORMAT ")", 3122 _worker_id, p2i(bottom), p2i(limit)); 3123 } 3124 // The region was collected underneath our feet. 3125 // We set the finger to bottom to ensure that the bitmap 3126 // iteration that will follow this will not do anything. 3127 // (this is not a condition that holds when we set the region up, 3128 // as the region is not supposed to be empty in the first place) 3129 _finger = bottom; 3130 } else if (limit >= _region_limit) { 3131 assert(limit >= _finger, "peace of mind"); 3132 } else { 3133 assert(limit < _region_limit, "only way to get here"); 3134 // This can happen under some pretty unusual circumstances. An 3135 // evacuation pause empties the region underneath our feet (NTAMS 3136 // at bottom). We then do some allocation in the region (NTAMS 3137 // stays at bottom), followed by the region being used as a GC 3138 // alloc region (NTAMS will move to top() and the objects 3139 // originally below it will be grayed). All objects now marked in 3140 // the region are explicitly grayed, if below the global finger, 3141 // and we do not need in fact to scan anything else. So, we simply 3142 // set _finger to be limit to ensure that the bitmap iteration 3143 // doesn't do anything. 3144 _finger = limit; 3145 } 3146 3147 _region_limit = limit; 3148 } 3149 3150 void CMTask::giveup_current_region() { 3151 assert(_curr_region != NULL, "invariant"); 3152 if (_cm->verbose_low()) { 3153 gclog_or_tty->print_cr("[%u] giving up region " PTR_FORMAT, 3154 _worker_id, p2i(_curr_region)); 3155 } 3156 clear_region_fields(); 3157 } 3158 3159 void CMTask::clear_region_fields() { 3160 // Values for these three fields that indicate that we're not 3161 // holding on to a region. 3162 _curr_region = NULL; 3163 _finger = NULL; 3164 _region_limit = NULL; 3165 } 3166 3167 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3168 if (cm_oop_closure == NULL) { 3169 assert(_cm_oop_closure != NULL, "invariant"); 3170 } else { 3171 assert(_cm_oop_closure == NULL, "invariant"); 3172 } 3173 _cm_oop_closure = cm_oop_closure; 3174 } 3175 3176 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3177 guarantee(nextMarkBitMap != NULL, "invariant"); 3178 3179 if (_cm->verbose_low()) { 3180 gclog_or_tty->print_cr("[%u] resetting", _worker_id); 3181 } 3182 3183 _nextMarkBitMap = nextMarkBitMap; 3184 clear_region_fields(); 3185 3186 _calls = 0; 3187 _elapsed_time_ms = 0.0; 3188 _termination_time_ms = 0.0; 3189 _termination_start_time_ms = 0.0; 3190 } 3191 3192 bool CMTask::should_exit_termination() { 3193 regular_clock_call(); 3194 // This is called when we are in the termination protocol. We should 3195 // quit if, for some reason, this task wants to abort or the global 3196 // stack is not empty (this means that we can get work from it). 3197 return !_cm->mark_stack_empty() || has_aborted(); 3198 } 3199 3200 void CMTask::reached_limit() { 3201 assert(_words_scanned >= _words_scanned_limit || 3202 _refs_reached >= _refs_reached_limit , 3203 "shouldn't have been called otherwise"); 3204 regular_clock_call(); 3205 } 3206 3207 void CMTask::regular_clock_call() { 3208 if (has_aborted()) return; 3209 3210 // First, we need to recalculate the words scanned and refs reached 3211 // limits for the next clock call. 3212 recalculate_limits(); 3213 3214 // During the regular clock call we do the following 3215 3216 // (1) If an overflow has been flagged, then we abort. 3217 if (_cm->has_overflown()) { 3218 set_has_aborted(); 3219 return; 3220 } 3221 3222 // If we are not concurrent (i.e. we're doing remark) we don't need 3223 // to check anything else. The other steps are only needed during 3224 // the concurrent marking phase. 3225 if (!concurrent()) return; 3226 3227 // (2) If marking has been aborted for Full GC, then we also abort. 3228 if (_cm->has_aborted()) { 3229 set_has_aborted(); 3230 return; 3231 } 3232 3233 double curr_time_ms = os::elapsedVTime() * 1000.0; 3234 3235 // (4) We check whether we should yield. If we have to, then we abort. 3236 if (SuspendibleThreadSet::should_yield()) { 3237 // We should yield. To do this we abort the task. The caller is 3238 // responsible for yielding. 3239 set_has_aborted(); 3240 return; 3241 } 3242 3243 // (5) We check whether we've reached our time quota. If we have, 3244 // then we abort. 3245 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3246 if (elapsed_time_ms > _time_target_ms) { 3247 set_has_aborted(); 3248 _has_timed_out = true; 3249 return; 3250 } 3251 3252 // (6) Finally, we check whether there are enough completed STAB 3253 // buffers available for processing. If there are, we abort. 3254 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3255 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3256 if (_cm->verbose_low()) { 3257 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers", 3258 _worker_id); 3259 } 3260 // we do need to process SATB buffers, we'll abort and restart 3261 // the marking task to do so 3262 set_has_aborted(); 3263 return; 3264 } 3265 } 3266 3267 void CMTask::recalculate_limits() { 3268 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3269 _words_scanned_limit = _real_words_scanned_limit; 3270 3271 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3272 _refs_reached_limit = _real_refs_reached_limit; 3273 } 3274 3275 void CMTask::decrease_limits() { 3276 // This is called when we believe that we're going to do an infrequent 3277 // operation which will increase the per byte scanned cost (i.e. move 3278 // entries to/from the global stack). It basically tries to decrease the 3279 // scanning limit so that the clock is called earlier. 3280 3281 if (_cm->verbose_medium()) { 3282 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id); 3283 } 3284 3285 _words_scanned_limit = _real_words_scanned_limit - 3286 3 * words_scanned_period / 4; 3287 _refs_reached_limit = _real_refs_reached_limit - 3288 3 * refs_reached_period / 4; 3289 } 3290 3291 void CMTask::move_entries_to_global_stack() { 3292 // local array where we'll store the entries that will be popped 3293 // from the local queue 3294 oop buffer[global_stack_transfer_size]; 3295 3296 int n = 0; 3297 oop obj; 3298 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3299 buffer[n] = obj; 3300 ++n; 3301 } 3302 3303 if (n > 0) { 3304 // we popped at least one entry from the local queue 3305 3306 if (!_cm->mark_stack_push(buffer, n)) { 3307 if (_cm->verbose_low()) { 3308 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow", 3309 _worker_id); 3310 } 3311 set_has_aborted(); 3312 } else { 3313 // the transfer was successful 3314 3315 if (_cm->verbose_medium()) { 3316 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack", 3317 _worker_id, n); 3318 } 3319 } 3320 } 3321 3322 // this operation was quite expensive, so decrease the limits 3323 decrease_limits(); 3324 } 3325 3326 void CMTask::get_entries_from_global_stack() { 3327 // local array where we'll store the entries that will be popped 3328 // from the global stack. 3329 oop buffer[global_stack_transfer_size]; 3330 int n; 3331 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3332 assert(n <= global_stack_transfer_size, 3333 "we should not pop more than the given limit"); 3334 if (n > 0) { 3335 // yes, we did actually pop at least one entry 3336 if (_cm->verbose_medium()) { 3337 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack", 3338 _worker_id, n); 3339 } 3340 for (int i = 0; i < n; ++i) { 3341 bool success = _task_queue->push(buffer[i]); 3342 // We only call this when the local queue is empty or under a 3343 // given target limit. So, we do not expect this push to fail. 3344 assert(success, "invariant"); 3345 } 3346 } 3347 3348 // this operation was quite expensive, so decrease the limits 3349 decrease_limits(); 3350 } 3351 3352 void CMTask::drain_local_queue(bool partially) { 3353 if (has_aborted()) return; 3354 3355 // Decide what the target size is, depending whether we're going to 3356 // drain it partially (so that other tasks can steal if they run out 3357 // of things to do) or totally (at the very end). 3358 size_t target_size; 3359 if (partially) { 3360 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3361 } else { 3362 target_size = 0; 3363 } 3364 3365 if (_task_queue->size() > target_size) { 3366 if (_cm->verbose_high()) { 3367 gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT, 3368 _worker_id, target_size); 3369 } 3370 3371 oop obj; 3372 bool ret = _task_queue->pop_local(obj); 3373 while (ret) { 3374 if (_cm->verbose_high()) { 3375 gclog_or_tty->print_cr("[%u] popped " PTR_FORMAT, _worker_id, 3376 p2i((void*) obj)); 3377 } 3378 3379 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3380 assert(!_g1h->is_on_master_free_list( 3381 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3382 3383 scan_object(obj); 3384 3385 if (_task_queue->size() <= target_size || has_aborted()) { 3386 ret = false; 3387 } else { 3388 ret = _task_queue->pop_local(obj); 3389 } 3390 } 3391 3392 if (_cm->verbose_high()) { 3393 gclog_or_tty->print_cr("[%u] drained local queue, size = %u", 3394 _worker_id, _task_queue->size()); 3395 } 3396 } 3397 } 3398 3399 void CMTask::drain_global_stack(bool partially) { 3400 if (has_aborted()) return; 3401 3402 // We have a policy to drain the local queue before we attempt to 3403 // drain the global stack. 3404 assert(partially || _task_queue->size() == 0, "invariant"); 3405 3406 // Decide what the target size is, depending whether we're going to 3407 // drain it partially (so that other tasks can steal if they run out 3408 // of things to do) or totally (at the very end). Notice that, 3409 // because we move entries from the global stack in chunks or 3410 // because another task might be doing the same, we might in fact 3411 // drop below the target. But, this is not a problem. 3412 size_t target_size; 3413 if (partially) { 3414 target_size = _cm->partial_mark_stack_size_target(); 3415 } else { 3416 target_size = 0; 3417 } 3418 3419 if (_cm->mark_stack_size() > target_size) { 3420 if (_cm->verbose_low()) { 3421 gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT, 3422 _worker_id, target_size); 3423 } 3424 3425 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3426 get_entries_from_global_stack(); 3427 drain_local_queue(partially); 3428 } 3429 3430 if (_cm->verbose_low()) { 3431 gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT, 3432 _worker_id, _cm->mark_stack_size()); 3433 } 3434 } 3435 } 3436 3437 // SATB Queue has several assumptions on whether to call the par or 3438 // non-par versions of the methods. this is why some of the code is 3439 // replicated. We should really get rid of the single-threaded version 3440 // of the code to simplify things. 3441 void CMTask::drain_satb_buffers() { 3442 if (has_aborted()) return; 3443 3444 // We set this so that the regular clock knows that we're in the 3445 // middle of draining buffers and doesn't set the abort flag when it 3446 // notices that SATB buffers are available for draining. It'd be 3447 // very counter productive if it did that. :-) 3448 _draining_satb_buffers = true; 3449 3450 CMSATBBufferClosure satb_cl(this, _g1h); 3451 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3452 3453 // This keeps claiming and applying the closure to completed buffers 3454 // until we run out of buffers or we need to abort. 3455 while (!has_aborted() && 3456 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 3457 if (_cm->verbose_medium()) { 3458 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3459 } 3460 regular_clock_call(); 3461 } 3462 3463 _draining_satb_buffers = false; 3464 3465 assert(has_aborted() || 3466 concurrent() || 3467 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3468 3469 // again, this was a potentially expensive operation, decrease the 3470 // limits to get the regular clock call early 3471 decrease_limits(); 3472 } 3473 3474 void CMTask::print_stats() { 3475 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 3476 _worker_id, _calls); 3477 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3478 _elapsed_time_ms, _termination_time_ms); 3479 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3480 _step_times_ms.num(), _step_times_ms.avg(), 3481 _step_times_ms.sd()); 3482 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3483 _step_times_ms.maximum(), _step_times_ms.sum()); 3484 } 3485 3486 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) { 3487 return _task_queues->steal(worker_id, hash_seed, obj); 3488 } 3489 3490 /***************************************************************************** 3491 3492 The do_marking_step(time_target_ms, ...) method is the building 3493 block of the parallel marking framework. It can be called in parallel 3494 with other invocations of do_marking_step() on different tasks 3495 (but only one per task, obviously) and concurrently with the 3496 mutator threads, or during remark, hence it eliminates the need 3497 for two versions of the code. When called during remark, it will 3498 pick up from where the task left off during the concurrent marking 3499 phase. Interestingly, tasks are also claimable during evacuation 3500 pauses too, since do_marking_step() ensures that it aborts before 3501 it needs to yield. 3502 3503 The data structures that it uses to do marking work are the 3504 following: 3505 3506 (1) Marking Bitmap. If there are gray objects that appear only 3507 on the bitmap (this happens either when dealing with an overflow 3508 or when the initial marking phase has simply marked the roots 3509 and didn't push them on the stack), then tasks claim heap 3510 regions whose bitmap they then scan to find gray objects. A 3511 global finger indicates where the end of the last claimed region 3512 is. A local finger indicates how far into the region a task has 3513 scanned. The two fingers are used to determine how to gray an 3514 object (i.e. whether simply marking it is OK, as it will be 3515 visited by a task in the future, or whether it needs to be also 3516 pushed on a stack). 3517 3518 (2) Local Queue. The local queue of the task which is accessed 3519 reasonably efficiently by the task. Other tasks can steal from 3520 it when they run out of work. Throughout the marking phase, a 3521 task attempts to keep its local queue short but not totally 3522 empty, so that entries are available for stealing by other 3523 tasks. Only when there is no more work, a task will totally 3524 drain its local queue. 3525 3526 (3) Global Mark Stack. This handles local queue overflow. During 3527 marking only sets of entries are moved between it and the local 3528 queues, as access to it requires a mutex and more fine-grain 3529 interaction with it which might cause contention. If it 3530 overflows, then the marking phase should restart and iterate 3531 over the bitmap to identify gray objects. Throughout the marking 3532 phase, tasks attempt to keep the global mark stack at a small 3533 length but not totally empty, so that entries are available for 3534 popping by other tasks. Only when there is no more work, tasks 3535 will totally drain the global mark stack. 3536 3537 (4) SATB Buffer Queue. This is where completed SATB buffers are 3538 made available. Buffers are regularly removed from this queue 3539 and scanned for roots, so that the queue doesn't get too 3540 long. During remark, all completed buffers are processed, as 3541 well as the filled in parts of any uncompleted buffers. 3542 3543 The do_marking_step() method tries to abort when the time target 3544 has been reached. There are a few other cases when the 3545 do_marking_step() method also aborts: 3546 3547 (1) When the marking phase has been aborted (after a Full GC). 3548 3549 (2) When a global overflow (on the global stack) has been 3550 triggered. Before the task aborts, it will actually sync up with 3551 the other tasks to ensure that all the marking data structures 3552 (local queues, stacks, fingers etc.) are re-initialized so that 3553 when do_marking_step() completes, the marking phase can 3554 immediately restart. 3555 3556 (3) When enough completed SATB buffers are available. The 3557 do_marking_step() method only tries to drain SATB buffers right 3558 at the beginning. So, if enough buffers are available, the 3559 marking step aborts and the SATB buffers are processed at 3560 the beginning of the next invocation. 3561 3562 (4) To yield. when we have to yield then we abort and yield 3563 right at the end of do_marking_step(). This saves us from a lot 3564 of hassle as, by yielding we might allow a Full GC. If this 3565 happens then objects will be compacted underneath our feet, the 3566 heap might shrink, etc. We save checking for this by just 3567 aborting and doing the yield right at the end. 3568 3569 From the above it follows that the do_marking_step() method should 3570 be called in a loop (or, otherwise, regularly) until it completes. 3571 3572 If a marking step completes without its has_aborted() flag being 3573 true, it means it has completed the current marking phase (and 3574 also all other marking tasks have done so and have all synced up). 3575 3576 A method called regular_clock_call() is invoked "regularly" (in 3577 sub ms intervals) throughout marking. It is this clock method that 3578 checks all the abort conditions which were mentioned above and 3579 decides when the task should abort. A work-based scheme is used to 3580 trigger this clock method: when the number of object words the 3581 marking phase has scanned or the number of references the marking 3582 phase has visited reach a given limit. Additional invocations to 3583 the method clock have been planted in a few other strategic places 3584 too. The initial reason for the clock method was to avoid calling 3585 vtime too regularly, as it is quite expensive. So, once it was in 3586 place, it was natural to piggy-back all the other conditions on it 3587 too and not constantly check them throughout the code. 3588 3589 If do_termination is true then do_marking_step will enter its 3590 termination protocol. 3591 3592 The value of is_serial must be true when do_marking_step is being 3593 called serially (i.e. by the VMThread) and do_marking_step should 3594 skip any synchronization in the termination and overflow code. 3595 Examples include the serial remark code and the serial reference 3596 processing closures. 3597 3598 The value of is_serial must be false when do_marking_step is 3599 being called by any of the worker threads in a work gang. 3600 Examples include the concurrent marking code (CMMarkingTask), 3601 the MT remark code, and the MT reference processing closures. 3602 3603 *****************************************************************************/ 3604 3605 void CMTask::do_marking_step(double time_target_ms, 3606 bool do_termination, 3607 bool is_serial) { 3608 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3609 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3610 3611 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3612 assert(_task_queues != NULL, "invariant"); 3613 assert(_task_queue != NULL, "invariant"); 3614 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 3615 3616 assert(!_claimed, 3617 "only one thread should claim this task at any one time"); 3618 3619 // OK, this doesn't safeguard again all possible scenarios, as it is 3620 // possible for two threads to set the _claimed flag at the same 3621 // time. But it is only for debugging purposes anyway and it will 3622 // catch most problems. 3623 _claimed = true; 3624 3625 _start_time_ms = os::elapsedVTime() * 1000.0; 3626 3627 // If do_stealing is true then do_marking_step will attempt to 3628 // steal work from the other CMTasks. It only makes sense to 3629 // enable stealing when the termination protocol is enabled 3630 // and do_marking_step() is not being called serially. 3631 bool do_stealing = do_termination && !is_serial; 3632 3633 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 3634 _time_target_ms = time_target_ms - diff_prediction_ms; 3635 3636 // set up the variables that are used in the work-based scheme to 3637 // call the regular clock method 3638 _words_scanned = 0; 3639 _refs_reached = 0; 3640 recalculate_limits(); 3641 3642 // clear all flags 3643 clear_has_aborted(); 3644 _has_timed_out = false; 3645 _draining_satb_buffers = false; 3646 3647 ++_calls; 3648 3649 if (_cm->verbose_low()) { 3650 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, " 3651 "target = %1.2lfms >>>>>>>>>>", 3652 _worker_id, _calls, _time_target_ms); 3653 } 3654 3655 // Set up the bitmap and oop closures. Anything that uses them is 3656 // eventually called from this method, so it is OK to allocate these 3657 // statically. 3658 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3659 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3660 set_cm_oop_closure(&cm_oop_closure); 3661 3662 if (_cm->has_overflown()) { 3663 // This can happen if the mark stack overflows during a GC pause 3664 // and this task, after a yield point, restarts. We have to abort 3665 // as we need to get into the overflow protocol which happens 3666 // right at the end of this task. 3667 set_has_aborted(); 3668 } 3669 3670 // First drain any available SATB buffers. After this, we will not 3671 // look at SATB buffers before the next invocation of this method. 3672 // If enough completed SATB buffers are queued up, the regular clock 3673 // will abort this task so that it restarts. 3674 drain_satb_buffers(); 3675 // ...then partially drain the local queue and the global stack 3676 drain_local_queue(true); 3677 drain_global_stack(true); 3678 3679 do { 3680 if (!has_aborted() && _curr_region != NULL) { 3681 // This means that we're already holding on to a region. 3682 assert(_finger != NULL, "if region is not NULL, then the finger " 3683 "should not be NULL either"); 3684 3685 // We might have restarted this task after an evacuation pause 3686 // which might have evacuated the region we're holding on to 3687 // underneath our feet. Let's read its limit again to make sure 3688 // that we do not iterate over a region of the heap that 3689 // contains garbage (update_region_limit() will also move 3690 // _finger to the start of the region if it is found empty). 3691 update_region_limit(); 3692 // We will start from _finger not from the start of the region, 3693 // as we might be restarting this task after aborting half-way 3694 // through scanning this region. In this case, _finger points to 3695 // the address where we last found a marked object. If this is a 3696 // fresh region, _finger points to start(). 3697 MemRegion mr = MemRegion(_finger, _region_limit); 3698 3699 if (_cm->verbose_low()) { 3700 gclog_or_tty->print_cr("[%u] we're scanning part " 3701 "[" PTR_FORMAT ", " PTR_FORMAT ") " 3702 "of region " HR_FORMAT, 3703 _worker_id, p2i(_finger), p2i(_region_limit), 3704 HR_FORMAT_PARAMS(_curr_region)); 3705 } 3706 3707 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 3708 "humongous regions should go around loop once only"); 3709 3710 // Some special cases: 3711 // If the memory region is empty, we can just give up the region. 3712 // If the current region is humongous then we only need to check 3713 // the bitmap for the bit associated with the start of the object, 3714 // scan the object if it's live, and give up the region. 3715 // Otherwise, let's iterate over the bitmap of the part of the region 3716 // that is left. 3717 // If the iteration is successful, give up the region. 3718 if (mr.is_empty()) { 3719 giveup_current_region(); 3720 regular_clock_call(); 3721 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 3722 if (_nextMarkBitMap->isMarked(mr.start())) { 3723 // The object is marked - apply the closure 3724 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 3725 bitmap_closure.do_bit(offset); 3726 } 3727 // Even if this task aborted while scanning the humongous object 3728 // we can (and should) give up the current region. 3729 giveup_current_region(); 3730 regular_clock_call(); 3731 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3732 giveup_current_region(); 3733 regular_clock_call(); 3734 } else { 3735 assert(has_aborted(), "currently the only way to do so"); 3736 // The only way to abort the bitmap iteration is to return 3737 // false from the do_bit() method. However, inside the 3738 // do_bit() method we move the _finger to point to the 3739 // object currently being looked at. So, if we bail out, we 3740 // have definitely set _finger to something non-null. 3741 assert(_finger != NULL, "invariant"); 3742 3743 // Region iteration was actually aborted. So now _finger 3744 // points to the address of the object we last scanned. If we 3745 // leave it there, when we restart this task, we will rescan 3746 // the object. It is easy to avoid this. We move the finger by 3747 // enough to point to the next possible object header (the 3748 // bitmap knows by how much we need to move it as it knows its 3749 // granularity). 3750 assert(_finger < _region_limit, "invariant"); 3751 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 3752 // Check if bitmap iteration was aborted while scanning the last object 3753 if (new_finger >= _region_limit) { 3754 giveup_current_region(); 3755 } else { 3756 move_finger_to(new_finger); 3757 } 3758 } 3759 } 3760 // At this point we have either completed iterating over the 3761 // region we were holding on to, or we have aborted. 3762 3763 // We then partially drain the local queue and the global stack. 3764 // (Do we really need this?) 3765 drain_local_queue(true); 3766 drain_global_stack(true); 3767 3768 // Read the note on the claim_region() method on why it might 3769 // return NULL with potentially more regions available for 3770 // claiming and why we have to check out_of_regions() to determine 3771 // whether we're done or not. 3772 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 3773 // We are going to try to claim a new region. We should have 3774 // given up on the previous one. 3775 // Separated the asserts so that we know which one fires. 3776 assert(_curr_region == NULL, "invariant"); 3777 assert(_finger == NULL, "invariant"); 3778 assert(_region_limit == NULL, "invariant"); 3779 if (_cm->verbose_low()) { 3780 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id); 3781 } 3782 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 3783 if (claimed_region != NULL) { 3784 // Yes, we managed to claim one 3785 if (_cm->verbose_low()) { 3786 gclog_or_tty->print_cr("[%u] we successfully claimed " 3787 "region " PTR_FORMAT, 3788 _worker_id, p2i(claimed_region)); 3789 } 3790 3791 setup_for_region(claimed_region); 3792 assert(_curr_region == claimed_region, "invariant"); 3793 } 3794 // It is important to call the regular clock here. It might take 3795 // a while to claim a region if, for example, we hit a large 3796 // block of empty regions. So we need to call the regular clock 3797 // method once round the loop to make sure it's called 3798 // frequently enough. 3799 regular_clock_call(); 3800 } 3801 3802 if (!has_aborted() && _curr_region == NULL) { 3803 assert(_cm->out_of_regions(), 3804 "at this point we should be out of regions"); 3805 } 3806 } while ( _curr_region != NULL && !has_aborted()); 3807 3808 if (!has_aborted()) { 3809 // We cannot check whether the global stack is empty, since other 3810 // tasks might be pushing objects to it concurrently. 3811 assert(_cm->out_of_regions(), 3812 "at this point we should be out of regions"); 3813 3814 if (_cm->verbose_low()) { 3815 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id); 3816 } 3817 3818 // Try to reduce the number of available SATB buffers so that 3819 // remark has less work to do. 3820 drain_satb_buffers(); 3821 } 3822 3823 // Since we've done everything else, we can now totally drain the 3824 // local queue and global stack. 3825 drain_local_queue(false); 3826 drain_global_stack(false); 3827 3828 // Attempt at work stealing from other task's queues. 3829 if (do_stealing && !has_aborted()) { 3830 // We have not aborted. This means that we have finished all that 3831 // we could. Let's try to do some stealing... 3832 3833 // We cannot check whether the global stack is empty, since other 3834 // tasks might be pushing objects to it concurrently. 3835 assert(_cm->out_of_regions() && _task_queue->size() == 0, 3836 "only way to reach here"); 3837 3838 if (_cm->verbose_low()) { 3839 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id); 3840 } 3841 3842 while (!has_aborted()) { 3843 oop obj; 3844 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 3845 if (_cm->verbose_medium()) { 3846 gclog_or_tty->print_cr("[%u] stolen " PTR_FORMAT " successfully", 3847 _worker_id, p2i((void*) obj)); 3848 } 3849 3850 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 3851 "any stolen object should be marked"); 3852 scan_object(obj); 3853 3854 // And since we're towards the end, let's totally drain the 3855 // local queue and global stack. 3856 drain_local_queue(false); 3857 drain_global_stack(false); 3858 } else { 3859 break; 3860 } 3861 } 3862 } 3863 3864 // If we are about to wrap up and go into termination, check if we 3865 // should raise the overflow flag. 3866 if (do_termination && !has_aborted()) { 3867 if (_cm->force_overflow()->should_force()) { 3868 _cm->set_has_overflown(); 3869 regular_clock_call(); 3870 } 3871 } 3872 3873 // We still haven't aborted. Now, let's try to get into the 3874 // termination protocol. 3875 if (do_termination && !has_aborted()) { 3876 // We cannot check whether the global stack is empty, since other 3877 // tasks might be concurrently pushing objects on it. 3878 // Separated the asserts so that we know which one fires. 3879 assert(_cm->out_of_regions(), "only way to reach here"); 3880 assert(_task_queue->size() == 0, "only way to reach here"); 3881 3882 if (_cm->verbose_low()) { 3883 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id); 3884 } 3885 3886 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 3887 3888 // The CMTask class also extends the TerminatorTerminator class, 3889 // hence its should_exit_termination() method will also decide 3890 // whether to exit the termination protocol or not. 3891 bool finished = (is_serial || 3892 _cm->terminator()->offer_termination(this)); 3893 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 3894 _termination_time_ms += 3895 termination_end_time_ms - _termination_start_time_ms; 3896 3897 if (finished) { 3898 // We're all done. 3899 3900 if (_worker_id == 0) { 3901 // let's allow task 0 to do this 3902 if (concurrent()) { 3903 assert(_cm->concurrent_marking_in_progress(), "invariant"); 3904 // we need to set this to false before the next 3905 // safepoint. This way we ensure that the marking phase 3906 // doesn't observe any more heap expansions. 3907 _cm->clear_concurrent_marking_in_progress(); 3908 } 3909 } 3910 3911 // We can now guarantee that the global stack is empty, since 3912 // all other tasks have finished. We separated the guarantees so 3913 // that, if a condition is false, we can immediately find out 3914 // which one. 3915 guarantee(_cm->out_of_regions(), "only way to reach here"); 3916 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 3917 guarantee(_task_queue->size() == 0, "only way to reach here"); 3918 guarantee(!_cm->has_overflown(), "only way to reach here"); 3919 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 3920 3921 if (_cm->verbose_low()) { 3922 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id); 3923 } 3924 } else { 3925 // Apparently there's more work to do. Let's abort this task. It 3926 // will restart it and we can hopefully find more things to do. 3927 3928 if (_cm->verbose_low()) { 3929 gclog_or_tty->print_cr("[%u] apparently there is more work to do", 3930 _worker_id); 3931 } 3932 3933 set_has_aborted(); 3934 } 3935 } 3936 3937 // Mainly for debugging purposes to make sure that a pointer to the 3938 // closure which was statically allocated in this frame doesn't 3939 // escape it by accident. 3940 set_cm_oop_closure(NULL); 3941 double end_time_ms = os::elapsedVTime() * 1000.0; 3942 double elapsed_time_ms = end_time_ms - _start_time_ms; 3943 // Update the step history. 3944 _step_times_ms.add(elapsed_time_ms); 3945 3946 if (has_aborted()) { 3947 // The task was aborted for some reason. 3948 if (_has_timed_out) { 3949 double diff_ms = elapsed_time_ms - _time_target_ms; 3950 // Keep statistics of how well we did with respect to hitting 3951 // our target only if we actually timed out (if we aborted for 3952 // other reasons, then the results might get skewed). 3953 _marking_step_diffs_ms.add(diff_ms); 3954 } 3955 3956 if (_cm->has_overflown()) { 3957 // This is the interesting one. We aborted because a global 3958 // overflow was raised. This means we have to restart the 3959 // marking phase and start iterating over regions. However, in 3960 // order to do this we have to make sure that all tasks stop 3961 // what they are doing and re-initialize in a safe manner. We 3962 // will achieve this with the use of two barrier sync points. 3963 3964 if (_cm->verbose_low()) { 3965 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id); 3966 } 3967 3968 if (!is_serial) { 3969 // We only need to enter the sync barrier if being called 3970 // from a parallel context 3971 _cm->enter_first_sync_barrier(_worker_id); 3972 3973 // When we exit this sync barrier we know that all tasks have 3974 // stopped doing marking work. So, it's now safe to 3975 // re-initialize our data structures. At the end of this method, 3976 // task 0 will clear the global data structures. 3977 } 3978 3979 // We clear the local state of this task... 3980 clear_region_fields(); 3981 3982 if (!is_serial) { 3983 // ...and enter the second barrier. 3984 _cm->enter_second_sync_barrier(_worker_id); 3985 } 3986 // At this point, if we're during the concurrent phase of 3987 // marking, everything has been re-initialized and we're 3988 // ready to restart. 3989 } 3990 3991 if (_cm->verbose_low()) { 3992 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, " 3993 "elapsed = %1.2lfms <<<<<<<<<<", 3994 _worker_id, _time_target_ms, elapsed_time_ms); 3995 if (_cm->has_aborted()) { 3996 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========", 3997 _worker_id); 3998 } 3999 } 4000 } else { 4001 if (_cm->verbose_low()) { 4002 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4003 "elapsed = %1.2lfms <<<<<<<<<<", 4004 _worker_id, _time_target_ms, elapsed_time_ms); 4005 } 4006 } 4007 4008 _claimed = false; 4009 } 4010 4011 CMTask::CMTask(uint worker_id, 4012 ConcurrentMark* cm, 4013 size_t* marked_bytes, 4014 BitMap* card_bm, 4015 CMTaskQueue* task_queue, 4016 CMTaskQueueSet* task_queues) 4017 : _g1h(G1CollectedHeap::heap()), 4018 _worker_id(worker_id), _cm(cm), 4019 _claimed(false), 4020 _nextMarkBitMap(NULL), _hash_seed(17), 4021 _task_queue(task_queue), 4022 _task_queues(task_queues), 4023 _cm_oop_closure(NULL), 4024 _marked_bytes_array(marked_bytes), 4025 _card_bm(card_bm) { 4026 guarantee(task_queue != NULL, "invariant"); 4027 guarantee(task_queues != NULL, "invariant"); 4028 4029 _marking_step_diffs_ms.add(0.5); 4030 } 4031 4032 // These are formatting macros that are used below to ensure 4033 // consistent formatting. The *_H_* versions are used to format the 4034 // header for a particular value and they should be kept consistent 4035 // with the corresponding macro. Also note that most of the macros add 4036 // the necessary white space (as a prefix) which makes them a bit 4037 // easier to compose. 4038 4039 // All the output lines are prefixed with this string to be able to 4040 // identify them easily in a large log file. 4041 #define G1PPRL_LINE_PREFIX "###" 4042 4043 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 4044 #ifdef _LP64 4045 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4046 #else // _LP64 4047 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4048 #endif // _LP64 4049 4050 // For per-region info 4051 #define G1PPRL_TYPE_FORMAT " %-4s" 4052 #define G1PPRL_TYPE_H_FORMAT " %4s" 4053 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 4054 #define G1PPRL_BYTE_H_FORMAT " %9s" 4055 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4056 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4057 4058 // For summary info 4059 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 4060 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 4061 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 4062 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 4063 4064 G1PrintRegionLivenessInfoClosure:: 4065 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4066 : _out(out), 4067 _total_used_bytes(0), _total_capacity_bytes(0), 4068 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4069 _hum_used_bytes(0), _hum_capacity_bytes(0), 4070 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 4071 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 4072 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4073 MemRegion g1_reserved = g1h->g1_reserved(); 4074 double now = os::elapsedTime(); 4075 4076 // Print the header of the output. 4077 _out->cr(); 4078 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4079 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4080 G1PPRL_SUM_ADDR_FORMAT("reserved") 4081 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4082 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 4083 HeapRegion::GrainBytes); 4084 _out->print_cr(G1PPRL_LINE_PREFIX); 4085 _out->print_cr(G1PPRL_LINE_PREFIX 4086 G1PPRL_TYPE_H_FORMAT 4087 G1PPRL_ADDR_BASE_H_FORMAT 4088 G1PPRL_BYTE_H_FORMAT 4089 G1PPRL_BYTE_H_FORMAT 4090 G1PPRL_BYTE_H_FORMAT 4091 G1PPRL_DOUBLE_H_FORMAT 4092 G1PPRL_BYTE_H_FORMAT 4093 G1PPRL_BYTE_H_FORMAT, 4094 "type", "address-range", 4095 "used", "prev-live", "next-live", "gc-eff", 4096 "remset", "code-roots"); 4097 _out->print_cr(G1PPRL_LINE_PREFIX 4098 G1PPRL_TYPE_H_FORMAT 4099 G1PPRL_ADDR_BASE_H_FORMAT 4100 G1PPRL_BYTE_H_FORMAT 4101 G1PPRL_BYTE_H_FORMAT 4102 G1PPRL_BYTE_H_FORMAT 4103 G1PPRL_DOUBLE_H_FORMAT 4104 G1PPRL_BYTE_H_FORMAT 4105 G1PPRL_BYTE_H_FORMAT, 4106 "", "", 4107 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 4108 "(bytes)", "(bytes)"); 4109 } 4110 4111 // It takes as a parameter a reference to one of the _hum_* fields, it 4112 // deduces the corresponding value for a region in a humongous region 4113 // series (either the region size, or what's left if the _hum_* field 4114 // is < the region size), and updates the _hum_* field accordingly. 4115 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4116 size_t bytes = 0; 4117 // The > 0 check is to deal with the prev and next live bytes which 4118 // could be 0. 4119 if (*hum_bytes > 0) { 4120 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4121 *hum_bytes -= bytes; 4122 } 4123 return bytes; 4124 } 4125 4126 // It deduces the values for a region in a humongous region series 4127 // from the _hum_* fields and updates those accordingly. It assumes 4128 // that that _hum_* fields have already been set up from the "starts 4129 // humongous" region and we visit the regions in address order. 4130 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4131 size_t* capacity_bytes, 4132 size_t* prev_live_bytes, 4133 size_t* next_live_bytes) { 4134 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4135 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4136 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4137 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4138 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4139 } 4140 4141 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4142 const char* type = r->get_type_str(); 4143 HeapWord* bottom = r->bottom(); 4144 HeapWord* end = r->end(); 4145 size_t capacity_bytes = r->capacity(); 4146 size_t used_bytes = r->used(); 4147 size_t prev_live_bytes = r->live_bytes(); 4148 size_t next_live_bytes = r->next_live_bytes(); 4149 double gc_eff = r->gc_efficiency(); 4150 size_t remset_bytes = r->rem_set()->mem_size(); 4151 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 4152 4153 if (r->is_starts_humongous()) { 4154 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4155 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4156 "they should have been zeroed after the last time we used them"); 4157 // Set up the _hum_* fields. 4158 _hum_capacity_bytes = capacity_bytes; 4159 _hum_used_bytes = used_bytes; 4160 _hum_prev_live_bytes = prev_live_bytes; 4161 _hum_next_live_bytes = next_live_bytes; 4162 get_hum_bytes(&used_bytes, &capacity_bytes, 4163 &prev_live_bytes, &next_live_bytes); 4164 end = bottom + HeapRegion::GrainWords; 4165 } else if (r->is_continues_humongous()) { 4166 get_hum_bytes(&used_bytes, &capacity_bytes, 4167 &prev_live_bytes, &next_live_bytes); 4168 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4169 } 4170 4171 _total_used_bytes += used_bytes; 4172 _total_capacity_bytes += capacity_bytes; 4173 _total_prev_live_bytes += prev_live_bytes; 4174 _total_next_live_bytes += next_live_bytes; 4175 _total_remset_bytes += remset_bytes; 4176 _total_strong_code_roots_bytes += strong_code_roots_bytes; 4177 4178 // Print a line for this particular region. 4179 _out->print_cr(G1PPRL_LINE_PREFIX 4180 G1PPRL_TYPE_FORMAT 4181 G1PPRL_ADDR_BASE_FORMAT 4182 G1PPRL_BYTE_FORMAT 4183 G1PPRL_BYTE_FORMAT 4184 G1PPRL_BYTE_FORMAT 4185 G1PPRL_DOUBLE_FORMAT 4186 G1PPRL_BYTE_FORMAT 4187 G1PPRL_BYTE_FORMAT, 4188 type, p2i(bottom), p2i(end), 4189 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 4190 remset_bytes, strong_code_roots_bytes); 4191 4192 return false; 4193 } 4194 4195 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4196 // add static memory usages to remembered set sizes 4197 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 4198 // Print the footer of the output. 4199 _out->print_cr(G1PPRL_LINE_PREFIX); 4200 _out->print_cr(G1PPRL_LINE_PREFIX 4201 " SUMMARY" 4202 G1PPRL_SUM_MB_FORMAT("capacity") 4203 G1PPRL_SUM_MB_PERC_FORMAT("used") 4204 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4205 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 4206 G1PPRL_SUM_MB_FORMAT("remset") 4207 G1PPRL_SUM_MB_FORMAT("code-roots"), 4208 bytes_to_mb(_total_capacity_bytes), 4209 bytes_to_mb(_total_used_bytes), 4210 perc(_total_used_bytes, _total_capacity_bytes), 4211 bytes_to_mb(_total_prev_live_bytes), 4212 perc(_total_prev_live_bytes, _total_capacity_bytes), 4213 bytes_to_mb(_total_next_live_bytes), 4214 perc(_total_next_live_bytes, _total_capacity_bytes), 4215 bytes_to_mb(_total_remset_bytes), 4216 bytes_to_mb(_total_strong_code_roots_bytes)); 4217 _out->cr(); 4218 }