1 /* 2 * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMark.inline.hpp" 30 #include "gc/g1/concurrentMarkThread.inline.hpp" 31 #include "gc/g1/g1CollectedHeap.inline.hpp" 32 #include "gc/g1/g1CollectorPolicy.hpp" 33 #include "gc/g1/g1CollectorState.hpp" 34 #include "gc/g1/g1ErgoVerbose.hpp" 35 #include "gc/g1/g1Log.hpp" 36 #include "gc/g1/g1OopClosures.inline.hpp" 37 #include "gc/g1/g1RemSet.hpp" 38 #include "gc/g1/g1StringDedup.hpp" 39 #include "gc/g1/heapRegion.inline.hpp" 40 #include "gc/g1/heapRegionManager.inline.hpp" 41 #include "gc/g1/heapRegionRemSet.hpp" 42 #include "gc/g1/heapRegionSet.inline.hpp" 43 #include "gc/g1/suspendibleThreadSet.hpp" 44 #include "gc/shared/gcId.hpp" 45 #include "gc/shared/gcTimer.hpp" 46 #include "gc/shared/gcTrace.hpp" 47 #include "gc/shared/gcTraceTime.hpp" 48 #include "gc/shared/genOopClosures.inline.hpp" 49 #include "gc/shared/referencePolicy.hpp" 50 #include "gc/shared/strongRootsScope.hpp" 51 #include "gc/shared/taskqueue.inline.hpp" 52 #include "gc/shared/vmGCOperations.hpp" 53 #include "memory/allocation.hpp" 54 #include "memory/resourceArea.hpp" 55 #include "oops/oop.inline.hpp" 56 #include "runtime/atomic.inline.hpp" 57 #include "runtime/handles.inline.hpp" 58 #include "runtime/java.hpp" 59 #include "runtime/prefetch.inline.hpp" 60 #include "services/memTracker.hpp" 61 62 // Concurrent marking bit map wrapper 63 64 CMBitMapRO::CMBitMapRO(int shifter) : 65 _bm(), 66 _shifter(shifter) { 67 _bmStartWord = 0; 68 _bmWordSize = 0; 69 } 70 71 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 72 const HeapWord* limit) const { 73 // First we must round addr *up* to a possible object boundary. 74 addr = (HeapWord*)align_size_up((intptr_t)addr, 75 HeapWordSize << _shifter); 76 size_t addrOffset = heapWordToOffset(addr); 77 if (limit == NULL) { 78 limit = _bmStartWord + _bmWordSize; 79 } 80 size_t limitOffset = heapWordToOffset(limit); 81 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 82 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 83 assert(nextAddr >= addr, "get_next_one postcondition"); 84 assert(nextAddr == limit || isMarked(nextAddr), 85 "get_next_one postcondition"); 86 return nextAddr; 87 } 88 89 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr, 90 const HeapWord* limit) const { 91 size_t addrOffset = heapWordToOffset(addr); 92 if (limit == NULL) { 93 limit = _bmStartWord + _bmWordSize; 94 } 95 size_t limitOffset = heapWordToOffset(limit); 96 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 97 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 98 assert(nextAddr >= addr, "get_next_one postcondition"); 99 assert(nextAddr == limit || !isMarked(nextAddr), 100 "get_next_one postcondition"); 101 return nextAddr; 102 } 103 104 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 105 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 106 return (int) (diff >> _shifter); 107 } 108 109 #ifndef PRODUCT 110 bool CMBitMapRO::covers(MemRegion heap_rs) const { 111 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 112 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 113 "size inconsistency"); 114 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 115 _bmWordSize == heap_rs.word_size(); 116 } 117 #endif 118 119 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 120 _bm.print_on_error(st, prefix); 121 } 122 123 size_t CMBitMap::compute_size(size_t heap_size) { 124 return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); 125 } 126 127 size_t CMBitMap::mark_distance() { 128 return MinObjAlignmentInBytes * BitsPerByte; 129 } 130 131 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 132 _bmStartWord = heap.start(); 133 _bmWordSize = heap.word_size(); 134 135 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start()); 136 _bm.set_size(_bmWordSize >> _shifter); 137 138 storage->set_mapping_changed_listener(&_listener); 139 } 140 141 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 142 if (zero_filled) { 143 return; 144 } 145 // We need to clear the bitmap on commit, removing any existing information. 146 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 147 _bm->clearRange(mr); 148 } 149 150 // Closure used for clearing the given mark bitmap. 151 class ClearBitmapHRClosure : public HeapRegionClosure { 152 private: 153 ConcurrentMark* _cm; 154 CMBitMap* _bitmap; 155 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration. 156 public: 157 ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) { 158 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield."); 159 } 160 161 virtual bool doHeapRegion(HeapRegion* r) { 162 size_t const chunk_size_in_words = M / HeapWordSize; 163 164 HeapWord* cur = r->bottom(); 165 HeapWord* const end = r->end(); 166 167 while (cur < end) { 168 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 169 _bitmap->clearRange(mr); 170 171 cur += chunk_size_in_words; 172 173 // Abort iteration if after yielding the marking has been aborted. 174 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) { 175 return true; 176 } 177 // Repeat the asserts from before the start of the closure. We will do them 178 // as asserts here to minimize their overhead on the product. However, we 179 // will have them as guarantees at the beginning / end of the bitmap 180 // clearing to get some checking in the product. 181 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant"); 182 assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 183 } 184 185 return false; 186 } 187 }; 188 189 class ParClearNextMarkBitmapTask : public AbstractGangTask { 190 ClearBitmapHRClosure* _cl; 191 HeapRegionClaimer _hrclaimer; 192 bool _suspendible; // If the task is suspendible, workers must join the STS. 193 194 public: 195 ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) : 196 _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {} 197 198 void work(uint worker_id) { 199 SuspendibleThreadSetJoiner sts_join(_suspendible); 200 G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true); 201 } 202 }; 203 204 void CMBitMap::clearAll() { 205 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 206 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */); 207 uint n_workers = g1h->workers()->active_workers(); 208 ParClearNextMarkBitmapTask task(&cl, n_workers, false); 209 g1h->workers()->run_task(&task); 210 guarantee(cl.complete(), "Must have completed iteration."); 211 return; 212 } 213 214 void CMBitMap::markRange(MemRegion mr) { 215 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 216 assert(!mr.is_empty(), "unexpected empty region"); 217 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 218 ((HeapWord *) mr.end())), 219 "markRange memory region end is not card aligned"); 220 // convert address range into offset range 221 _bm.at_put_range(heapWordToOffset(mr.start()), 222 heapWordToOffset(mr.end()), true); 223 } 224 225 void CMBitMap::clearRange(MemRegion mr) { 226 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 227 assert(!mr.is_empty(), "unexpected empty region"); 228 // convert address range into offset range 229 _bm.at_put_range(heapWordToOffset(mr.start()), 230 heapWordToOffset(mr.end()), false); 231 } 232 233 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 234 HeapWord* end_addr) { 235 HeapWord* start = getNextMarkedWordAddress(addr); 236 start = MIN2(start, end_addr); 237 HeapWord* end = getNextUnmarkedWordAddress(start); 238 end = MIN2(end, end_addr); 239 assert(start <= end, "Consistency check"); 240 MemRegion mr(start, end); 241 if (!mr.is_empty()) { 242 clearRange(mr); 243 } 244 return mr; 245 } 246 247 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 248 _base(NULL), _cm(cm) 249 {} 250 251 bool CMMarkStack::allocate(size_t capacity) { 252 // allocate a stack of the requisite depth 253 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 254 if (!rs.is_reserved()) { 255 warning("ConcurrentMark MarkStack allocation failure"); 256 return false; 257 } 258 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 259 if (!_virtual_space.initialize(rs, rs.size())) { 260 warning("ConcurrentMark MarkStack backing store failure"); 261 // Release the virtual memory reserved for the marking stack 262 rs.release(); 263 return false; 264 } 265 assert(_virtual_space.committed_size() == rs.size(), 266 "Didn't reserve backing store for all of ConcurrentMark stack?"); 267 _base = (oop*) _virtual_space.low(); 268 setEmpty(); 269 _capacity = (jint) capacity; 270 _saved_index = -1; 271 _should_expand = false; 272 return true; 273 } 274 275 void CMMarkStack::expand() { 276 // Called, during remark, if we've overflown the marking stack during marking. 277 assert(isEmpty(), "stack should been emptied while handling overflow"); 278 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 279 // Clear expansion flag 280 _should_expand = false; 281 if (_capacity == (jint) MarkStackSizeMax) { 282 if (PrintGCDetails && Verbose) { 283 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 284 } 285 return; 286 } 287 // Double capacity if possible 288 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 289 // Do not give up existing stack until we have managed to 290 // get the double capacity that we desired. 291 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 292 sizeof(oop))); 293 if (rs.is_reserved()) { 294 // Release the backing store associated with old stack 295 _virtual_space.release(); 296 // Reinitialize virtual space for new stack 297 if (!_virtual_space.initialize(rs, rs.size())) { 298 fatal("Not enough swap for expanded marking stack capacity"); 299 } 300 _base = (oop*)(_virtual_space.low()); 301 _index = 0; 302 _capacity = new_capacity; 303 } else { 304 if (PrintGCDetails && Verbose) { 305 // Failed to double capacity, continue; 306 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 307 SIZE_FORMAT "K to " SIZE_FORMAT "K", 308 _capacity / K, new_capacity / K); 309 } 310 } 311 } 312 313 void CMMarkStack::set_should_expand() { 314 // If we're resetting the marking state because of an 315 // marking stack overflow, record that we should, if 316 // possible, expand the stack. 317 _should_expand = _cm->has_overflown(); 318 } 319 320 CMMarkStack::~CMMarkStack() { 321 if (_base != NULL) { 322 _base = NULL; 323 _virtual_space.release(); 324 } 325 } 326 327 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 328 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 329 jint start = _index; 330 jint next_index = start + n; 331 if (next_index > _capacity) { 332 _overflow = true; 333 return; 334 } 335 // Otherwise. 336 _index = next_index; 337 for (int i = 0; i < n; i++) { 338 int ind = start + i; 339 assert(ind < _capacity, "By overflow test above."); 340 _base[ind] = ptr_arr[i]; 341 } 342 } 343 344 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 345 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 346 jint index = _index; 347 if (index == 0) { 348 *n = 0; 349 return false; 350 } else { 351 int k = MIN2(max, index); 352 jint new_ind = index - k; 353 for (int j = 0; j < k; j++) { 354 ptr_arr[j] = _base[new_ind + j]; 355 } 356 _index = new_ind; 357 *n = k; 358 return true; 359 } 360 } 361 362 void CMMarkStack::note_start_of_gc() { 363 assert(_saved_index == -1, 364 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 365 _saved_index = _index; 366 } 367 368 void CMMarkStack::note_end_of_gc() { 369 // This is intentionally a guarantee, instead of an assert. If we 370 // accidentally add something to the mark stack during GC, it 371 // will be a correctness issue so it's better if we crash. we'll 372 // only check this once per GC anyway, so it won't be a performance 373 // issue in any way. 374 guarantee(_saved_index == _index, 375 "saved index: %d index: %d", _saved_index, _index); 376 _saved_index = -1; 377 } 378 379 CMRootRegions::CMRootRegions() : 380 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 381 _should_abort(false), _next_survivor(NULL) { } 382 383 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 384 _young_list = g1h->young_list(); 385 _cm = cm; 386 } 387 388 void CMRootRegions::prepare_for_scan() { 389 assert(!scan_in_progress(), "pre-condition"); 390 391 // Currently, only survivors can be root regions. 392 assert(_next_survivor == NULL, "pre-condition"); 393 _next_survivor = _young_list->first_survivor_region(); 394 _scan_in_progress = (_next_survivor != NULL); 395 _should_abort = false; 396 } 397 398 HeapRegion* CMRootRegions::claim_next() { 399 if (_should_abort) { 400 // If someone has set the should_abort flag, we return NULL to 401 // force the caller to bail out of their loop. 402 return NULL; 403 } 404 405 // Currently, only survivors can be root regions. 406 HeapRegion* res = _next_survivor; 407 if (res != NULL) { 408 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 409 // Read it again in case it changed while we were waiting for the lock. 410 res = _next_survivor; 411 if (res != NULL) { 412 if (res == _young_list->last_survivor_region()) { 413 // We just claimed the last survivor so store NULL to indicate 414 // that we're done. 415 _next_survivor = NULL; 416 } else { 417 _next_survivor = res->get_next_young_region(); 418 } 419 } else { 420 // Someone else claimed the last survivor while we were trying 421 // to take the lock so nothing else to do. 422 } 423 } 424 assert(res == NULL || res->is_survivor(), "post-condition"); 425 426 return res; 427 } 428 429 void CMRootRegions::scan_finished() { 430 assert(scan_in_progress(), "pre-condition"); 431 432 // Currently, only survivors can be root regions. 433 if (!_should_abort) { 434 assert(_next_survivor == NULL, "we should have claimed all survivors"); 435 } 436 _next_survivor = NULL; 437 438 { 439 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 440 _scan_in_progress = false; 441 RootRegionScan_lock->notify_all(); 442 } 443 } 444 445 bool CMRootRegions::wait_until_scan_finished() { 446 if (!scan_in_progress()) return false; 447 448 { 449 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 450 while (scan_in_progress()) { 451 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 452 } 453 } 454 return true; 455 } 456 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 459 #endif // _MSC_VER 460 461 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 462 return MAX2((n_par_threads + 2) / 4, 1U); 463 } 464 465 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 466 _g1h(g1h), 467 _markBitMap1(), 468 _markBitMap2(), 469 _parallel_marking_threads(0), 470 _max_parallel_marking_threads(0), 471 _sleep_factor(0.0), 472 _marking_task_overhead(1.0), 473 _cleanup_sleep_factor(0.0), 474 _cleanup_task_overhead(1.0), 475 _cleanup_list("Cleanup List"), 476 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 477 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> 478 CardTableModRefBS::card_shift, 479 false /* in_resource_area*/), 480 481 _prevMarkBitMap(&_markBitMap1), 482 _nextMarkBitMap(&_markBitMap2), 483 484 _markStack(this), 485 // _finger set in set_non_marking_state 486 487 _max_worker_id(ParallelGCThreads), 488 // _active_tasks set in set_non_marking_state 489 // _tasks set inside the constructor 490 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 491 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 492 493 _has_overflown(false), 494 _concurrent(false), 495 _has_aborted(false), 496 _restart_for_overflow(false), 497 _concurrent_marking_in_progress(false), 498 499 // _verbose_level set below 500 501 _init_times(), 502 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 503 _cleanup_times(), 504 _total_counting_time(0.0), 505 _total_rs_scrub_time(0.0), 506 507 _parallel_workers(NULL), 508 509 _count_card_bitmaps(NULL), 510 _count_marked_bytes(NULL), 511 _completed_initialization(false) { 512 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 513 if (verbose_level < no_verbose) { 514 verbose_level = no_verbose; 515 } 516 if (verbose_level > high_verbose) { 517 verbose_level = high_verbose; 518 } 519 _verbose_level = verbose_level; 520 521 if (verbose_low()) { 522 gclog_or_tty->print_cr("[global] init, heap start = " PTR_FORMAT ", " 523 "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end)); 524 } 525 526 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 527 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 528 529 // Create & start a ConcurrentMark thread. 530 _cmThread = new ConcurrentMarkThread(this); 531 assert(cmThread() != NULL, "CM Thread should have been created"); 532 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 533 if (_cmThread->osthread() == NULL) { 534 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 535 } 536 537 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 538 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 539 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 540 541 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 542 satb_qs.set_buffer_size(G1SATBBufferSize); 543 544 _root_regions.init(_g1h, this); 545 546 if (ConcGCThreads > ParallelGCThreads) { 547 warning("Can't have more ConcGCThreads (%u) " 548 "than ParallelGCThreads (%u).", 549 ConcGCThreads, ParallelGCThreads); 550 return; 551 } 552 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 553 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 554 // if both are set 555 _sleep_factor = 0.0; 556 _marking_task_overhead = 1.0; 557 } else if (G1MarkingOverheadPercent > 0) { 558 // We will calculate the number of parallel marking threads based 559 // on a target overhead with respect to the soft real-time goal 560 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 561 double overall_cm_overhead = 562 (double) MaxGCPauseMillis * marking_overhead / 563 (double) GCPauseIntervalMillis; 564 double cpu_ratio = 1.0 / (double) os::processor_count(); 565 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 566 double marking_task_overhead = 567 overall_cm_overhead / marking_thread_num * 568 (double) os::processor_count(); 569 double sleep_factor = 570 (1.0 - marking_task_overhead) / marking_task_overhead; 571 572 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 573 _sleep_factor = sleep_factor; 574 _marking_task_overhead = marking_task_overhead; 575 } else { 576 // Calculate the number of parallel marking threads by scaling 577 // the number of parallel GC threads. 578 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 579 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 580 _sleep_factor = 0.0; 581 _marking_task_overhead = 1.0; 582 } 583 584 assert(ConcGCThreads > 0, "Should have been set"); 585 _parallel_marking_threads = ConcGCThreads; 586 _max_parallel_marking_threads = _parallel_marking_threads; 587 588 if (parallel_marking_threads() > 1) { 589 _cleanup_task_overhead = 1.0; 590 } else { 591 _cleanup_task_overhead = marking_task_overhead(); 592 } 593 _cleanup_sleep_factor = 594 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 595 596 #if 0 597 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 598 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 599 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 600 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 601 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 602 #endif 603 604 _parallel_workers = new WorkGang("G1 Marker", 605 _max_parallel_marking_threads, false, true); 606 if (_parallel_workers == NULL) { 607 vm_exit_during_initialization("Failed necessary allocation."); 608 } else { 609 _parallel_workers->initialize_workers(); 610 } 611 612 if (FLAG_IS_DEFAULT(MarkStackSize)) { 613 size_t mark_stack_size = 614 MIN2(MarkStackSizeMax, 615 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 616 // Verify that the calculated value for MarkStackSize is in range. 617 // It would be nice to use the private utility routine from Arguments. 618 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 619 warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 620 "must be between 1 and " SIZE_FORMAT, 621 mark_stack_size, MarkStackSizeMax); 622 return; 623 } 624 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 625 } else { 626 // Verify MarkStackSize is in range. 627 if (FLAG_IS_CMDLINE(MarkStackSize)) { 628 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 629 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 630 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 631 "must be between 1 and " SIZE_FORMAT, 632 MarkStackSize, MarkStackSizeMax); 633 return; 634 } 635 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 636 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 637 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 638 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 639 MarkStackSize, MarkStackSizeMax); 640 return; 641 } 642 } 643 } 644 } 645 646 if (!_markStack.allocate(MarkStackSize)) { 647 warning("Failed to allocate CM marking stack"); 648 return; 649 } 650 651 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 652 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 653 654 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 655 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 656 657 BitMap::idx_t card_bm_size = _card_bm.size(); 658 659 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 660 _active_tasks = _max_worker_id; 661 662 uint max_regions = _g1h->max_regions(); 663 for (uint i = 0; i < _max_worker_id; ++i) { 664 CMTaskQueue* task_queue = new CMTaskQueue(); 665 task_queue->initialize(); 666 _task_queues->register_queue(i, task_queue); 667 668 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 669 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 670 671 _tasks[i] = new CMTask(i, this, 672 _count_marked_bytes[i], 673 &_count_card_bitmaps[i], 674 task_queue, _task_queues); 675 676 _accum_task_vtime[i] = 0.0; 677 } 678 679 // Calculate the card number for the bottom of the heap. Used 680 // in biasing indexes into the accounting card bitmaps. 681 _heap_bottom_card_num = 682 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 683 CardTableModRefBS::card_shift); 684 685 // Clear all the liveness counting data 686 clear_all_count_data(); 687 688 // so that the call below can read a sensible value 689 _heap_start = g1h->reserved_region().start(); 690 set_non_marking_state(); 691 _completed_initialization = true; 692 } 693 694 void ConcurrentMark::reset() { 695 // Starting values for these two. This should be called in a STW 696 // phase. 697 MemRegion reserved = _g1h->g1_reserved(); 698 _heap_start = reserved.start(); 699 _heap_end = reserved.end(); 700 701 // Separated the asserts so that we know which one fires. 702 assert(_heap_start != NULL, "heap bounds should look ok"); 703 assert(_heap_end != NULL, "heap bounds should look ok"); 704 assert(_heap_start < _heap_end, "heap bounds should look ok"); 705 706 // Reset all the marking data structures and any necessary flags 707 reset_marking_state(); 708 709 if (verbose_low()) { 710 gclog_or_tty->print_cr("[global] resetting"); 711 } 712 713 // We do reset all of them, since different phases will use 714 // different number of active threads. So, it's easiest to have all 715 // of them ready. 716 for (uint i = 0; i < _max_worker_id; ++i) { 717 _tasks[i]->reset(_nextMarkBitMap); 718 } 719 720 // we need this to make sure that the flag is on during the evac 721 // pause with initial mark piggy-backed 722 set_concurrent_marking_in_progress(); 723 } 724 725 726 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 727 _markStack.set_should_expand(); 728 _markStack.setEmpty(); // Also clears the _markStack overflow flag 729 if (clear_overflow) { 730 clear_has_overflown(); 731 } else { 732 assert(has_overflown(), "pre-condition"); 733 } 734 _finger = _heap_start; 735 736 for (uint i = 0; i < _max_worker_id; ++i) { 737 CMTaskQueue* queue = _task_queues->queue(i); 738 queue->set_empty(); 739 } 740 } 741 742 void ConcurrentMark::set_concurrency(uint active_tasks) { 743 assert(active_tasks <= _max_worker_id, "we should not have more"); 744 745 _active_tasks = active_tasks; 746 // Need to update the three data structures below according to the 747 // number of active threads for this phase. 748 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 749 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 750 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 751 } 752 753 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 754 set_concurrency(active_tasks); 755 756 _concurrent = concurrent; 757 // We propagate this to all tasks, not just the active ones. 758 for (uint i = 0; i < _max_worker_id; ++i) 759 _tasks[i]->set_concurrent(concurrent); 760 761 if (concurrent) { 762 set_concurrent_marking_in_progress(); 763 } else { 764 // We currently assume that the concurrent flag has been set to 765 // false before we start remark. At this point we should also be 766 // in a STW phase. 767 assert(!concurrent_marking_in_progress(), "invariant"); 768 assert(out_of_regions(), 769 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 770 p2i(_finger), p2i(_heap_end)); 771 } 772 } 773 774 void ConcurrentMark::set_non_marking_state() { 775 // We set the global marking state to some default values when we're 776 // not doing marking. 777 reset_marking_state(); 778 _active_tasks = 0; 779 clear_concurrent_marking_in_progress(); 780 } 781 782 ConcurrentMark::~ConcurrentMark() { 783 // The ConcurrentMark instance is never freed. 784 ShouldNotReachHere(); 785 } 786 787 void ConcurrentMark::clearNextBitmap() { 788 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 789 790 // Make sure that the concurrent mark thread looks to still be in 791 // the current cycle. 792 guarantee(cmThread()->during_cycle(), "invariant"); 793 794 // We are finishing up the current cycle by clearing the next 795 // marking bitmap and getting it ready for the next cycle. During 796 // this time no other cycle can start. So, let's make sure that this 797 // is the case. 798 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 799 800 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */); 801 ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true); 802 _parallel_workers->run_task(&task); 803 804 // Clear the liveness counting data. If the marking has been aborted, the abort() 805 // call already did that. 806 if (cl.complete()) { 807 clear_all_count_data(); 808 } 809 810 // Repeat the asserts from above. 811 guarantee(cmThread()->during_cycle(), "invariant"); 812 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 813 } 814 815 class CheckBitmapClearHRClosure : public HeapRegionClosure { 816 CMBitMap* _bitmap; 817 bool _error; 818 public: 819 CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) { 820 } 821 822 virtual bool doHeapRegion(HeapRegion* r) { 823 // This closure can be called concurrently to the mutator, so we must make sure 824 // that the result of the getNextMarkedWordAddress() call is compared to the 825 // value passed to it as limit to detect any found bits. 826 // We can use the region's orig_end() for the limit and the comparison value 827 // as it always contains the "real" end of the region that never changes and 828 // has no side effects. 829 // Due to the latter, there can also be no problem with the compiler generating 830 // reloads of the orig_end() call. 831 HeapWord* end = r->orig_end(); 832 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end; 833 } 834 }; 835 836 bool ConcurrentMark::nextMarkBitmapIsClear() { 837 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 838 _g1h->heap_region_iterate(&cl); 839 return cl.complete(); 840 } 841 842 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 843 public: 844 bool doHeapRegion(HeapRegion* r) { 845 if (!r->is_continues_humongous()) { 846 r->note_start_of_marking(); 847 } 848 return false; 849 } 850 }; 851 852 void ConcurrentMark::checkpointRootsInitialPre() { 853 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 854 G1CollectorPolicy* g1p = g1h->g1_policy(); 855 856 _has_aborted = false; 857 858 // Initialize marking structures. This has to be done in a STW phase. 859 reset(); 860 861 // For each region note start of marking. 862 NoteStartOfMarkHRClosure startcl; 863 g1h->heap_region_iterate(&startcl); 864 } 865 866 867 void ConcurrentMark::checkpointRootsInitialPost() { 868 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 869 870 // If we force an overflow during remark, the remark operation will 871 // actually abort and we'll restart concurrent marking. If we always 872 // force an overflow during remark we'll never actually complete the 873 // marking phase. So, we initialize this here, at the start of the 874 // cycle, so that at the remaining overflow number will decrease at 875 // every remark and we'll eventually not need to cause one. 876 force_overflow_stw()->init(); 877 878 // Start Concurrent Marking weak-reference discovery. 879 ReferenceProcessor* rp = g1h->ref_processor_cm(); 880 // enable ("weak") refs discovery 881 rp->enable_discovery(); 882 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 883 884 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 885 // This is the start of the marking cycle, we're expected all 886 // threads to have SATB queues with active set to false. 887 satb_mq_set.set_active_all_threads(true, /* new active value */ 888 false /* expected_active */); 889 890 _root_regions.prepare_for_scan(); 891 892 // update_g1_committed() will be called at the end of an evac pause 893 // when marking is on. So, it's also called at the end of the 894 // initial-mark pause to update the heap end, if the heap expands 895 // during it. No need to call it here. 896 } 897 898 /* 899 * Notice that in the next two methods, we actually leave the STS 900 * during the barrier sync and join it immediately afterwards. If we 901 * do not do this, the following deadlock can occur: one thread could 902 * be in the barrier sync code, waiting for the other thread to also 903 * sync up, whereas another one could be trying to yield, while also 904 * waiting for the other threads to sync up too. 905 * 906 * Note, however, that this code is also used during remark and in 907 * this case we should not attempt to leave / enter the STS, otherwise 908 * we'll either hit an assert (debug / fastdebug) or deadlock 909 * (product). So we should only leave / enter the STS if we are 910 * operating concurrently. 911 * 912 * Because the thread that does the sync barrier has left the STS, it 913 * is possible to be suspended for a Full GC or an evacuation pause 914 * could occur. This is actually safe, since the entering the sync 915 * barrier is one of the last things do_marking_step() does, and it 916 * doesn't manipulate any data structures afterwards. 917 */ 918 919 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 920 bool barrier_aborted; 921 922 if (verbose_low()) { 923 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id); 924 } 925 926 { 927 SuspendibleThreadSetLeaver sts_leave(concurrent()); 928 barrier_aborted = !_first_overflow_barrier_sync.enter(); 929 } 930 931 // at this point everyone should have synced up and not be doing any 932 // more work 933 934 if (verbose_low()) { 935 if (barrier_aborted) { 936 gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id); 937 } else { 938 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); 939 } 940 } 941 942 if (barrier_aborted) { 943 // If the barrier aborted we ignore the overflow condition and 944 // just abort the whole marking phase as quickly as possible. 945 return; 946 } 947 948 // If we're executing the concurrent phase of marking, reset the marking 949 // state; otherwise the marking state is reset after reference processing, 950 // during the remark pause. 951 // If we reset here as a result of an overflow during the remark we will 952 // see assertion failures from any subsequent set_concurrency_and_phase() 953 // calls. 954 if (concurrent()) { 955 // let the task associated with with worker 0 do this 956 if (worker_id == 0) { 957 // task 0 is responsible for clearing the global data structures 958 // We should be here because of an overflow. During STW we should 959 // not clear the overflow flag since we rely on it being true when 960 // we exit this method to abort the pause and restart concurrent 961 // marking. 962 reset_marking_state(true /* clear_overflow */); 963 force_overflow()->update(); 964 965 if (G1Log::fine()) { 966 gclog_or_tty->gclog_stamp(); 967 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 968 } 969 } 970 } 971 972 // after this, each task should reset its own data structures then 973 // then go into the second barrier 974 } 975 976 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 977 bool barrier_aborted; 978 979 if (verbose_low()) { 980 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id); 981 } 982 983 { 984 SuspendibleThreadSetLeaver sts_leave(concurrent()); 985 barrier_aborted = !_second_overflow_barrier_sync.enter(); 986 } 987 988 // at this point everything should be re-initialized and ready to go 989 990 if (verbose_low()) { 991 if (barrier_aborted) { 992 gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id); 993 } else { 994 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); 995 } 996 } 997 } 998 999 #ifndef PRODUCT 1000 void ForceOverflowSettings::init() { 1001 _num_remaining = G1ConcMarkForceOverflow; 1002 _force = false; 1003 update(); 1004 } 1005 1006 void ForceOverflowSettings::update() { 1007 if (_num_remaining > 0) { 1008 _num_remaining -= 1; 1009 _force = true; 1010 } else { 1011 _force = false; 1012 } 1013 } 1014 1015 bool ForceOverflowSettings::should_force() { 1016 if (_force) { 1017 _force = false; 1018 return true; 1019 } else { 1020 return false; 1021 } 1022 } 1023 #endif // !PRODUCT 1024 1025 class CMConcurrentMarkingTask: public AbstractGangTask { 1026 private: 1027 ConcurrentMark* _cm; 1028 ConcurrentMarkThread* _cmt; 1029 1030 public: 1031 void work(uint worker_id) { 1032 assert(Thread::current()->is_ConcurrentGC_thread(), 1033 "this should only be done by a conc GC thread"); 1034 ResourceMark rm; 1035 1036 double start_vtime = os::elapsedVTime(); 1037 1038 { 1039 SuspendibleThreadSetJoiner sts_join; 1040 1041 assert(worker_id < _cm->active_tasks(), "invariant"); 1042 CMTask* the_task = _cm->task(worker_id); 1043 the_task->record_start_time(); 1044 if (!_cm->has_aborted()) { 1045 do { 1046 double start_vtime_sec = os::elapsedVTime(); 1047 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1048 1049 the_task->do_marking_step(mark_step_duration_ms, 1050 true /* do_termination */, 1051 false /* is_serial*/); 1052 1053 double end_vtime_sec = os::elapsedVTime(); 1054 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1055 _cm->clear_has_overflown(); 1056 1057 _cm->do_yield_check(worker_id); 1058 1059 jlong sleep_time_ms; 1060 if (!_cm->has_aborted() && the_task->has_aborted()) { 1061 sleep_time_ms = 1062 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1063 { 1064 SuspendibleThreadSetLeaver sts_leave; 1065 os::sleep(Thread::current(), sleep_time_ms, false); 1066 } 1067 } 1068 } while (!_cm->has_aborted() && the_task->has_aborted()); 1069 } 1070 the_task->record_end_time(); 1071 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1072 } 1073 1074 double end_vtime = os::elapsedVTime(); 1075 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1076 } 1077 1078 CMConcurrentMarkingTask(ConcurrentMark* cm, 1079 ConcurrentMarkThread* cmt) : 1080 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1081 1082 ~CMConcurrentMarkingTask() { } 1083 }; 1084 1085 // Calculates the number of active workers for a concurrent 1086 // phase. 1087 uint ConcurrentMark::calc_parallel_marking_threads() { 1088 uint n_conc_workers = 0; 1089 if (!UseDynamicNumberOfGCThreads || 1090 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1091 !ForceDynamicNumberOfGCThreads)) { 1092 n_conc_workers = max_parallel_marking_threads(); 1093 } else { 1094 n_conc_workers = 1095 AdaptiveSizePolicy::calc_default_active_workers( 1096 max_parallel_marking_threads(), 1097 1, /* Minimum workers */ 1098 parallel_marking_threads(), 1099 Threads::number_of_non_daemon_threads()); 1100 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1101 // that scaling has already gone into "_max_parallel_marking_threads". 1102 } 1103 assert(n_conc_workers > 0, "Always need at least 1"); 1104 return n_conc_workers; 1105 } 1106 1107 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1108 // Currently, only survivors can be root regions. 1109 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1110 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1111 1112 const uintx interval = PrefetchScanIntervalInBytes; 1113 HeapWord* curr = hr->bottom(); 1114 const HeapWord* end = hr->top(); 1115 while (curr < end) { 1116 Prefetch::read(curr, interval); 1117 oop obj = oop(curr); 1118 int size = obj->oop_iterate_size(&cl); 1119 assert(size == obj->size(), "sanity"); 1120 curr += size; 1121 } 1122 } 1123 1124 class CMRootRegionScanTask : public AbstractGangTask { 1125 private: 1126 ConcurrentMark* _cm; 1127 1128 public: 1129 CMRootRegionScanTask(ConcurrentMark* cm) : 1130 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1131 1132 void work(uint worker_id) { 1133 assert(Thread::current()->is_ConcurrentGC_thread(), 1134 "this should only be done by a conc GC thread"); 1135 1136 CMRootRegions* root_regions = _cm->root_regions(); 1137 HeapRegion* hr = root_regions->claim_next(); 1138 while (hr != NULL) { 1139 _cm->scanRootRegion(hr, worker_id); 1140 hr = root_regions->claim_next(); 1141 } 1142 } 1143 }; 1144 1145 void ConcurrentMark::scanRootRegions() { 1146 double scan_start = os::elapsedTime(); 1147 1148 // Start of concurrent marking. 1149 ClassLoaderDataGraph::clear_claimed_marks(); 1150 1151 // scan_in_progress() will have been set to true only if there was 1152 // at least one root region to scan. So, if it's false, we 1153 // should not attempt to do any further work. 1154 if (root_regions()->scan_in_progress()) { 1155 if (G1Log::fine()) { 1156 gclog_or_tty->gclog_stamp(); 1157 gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]"); 1158 } 1159 1160 _parallel_marking_threads = calc_parallel_marking_threads(); 1161 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1162 "Maximum number of marking threads exceeded"); 1163 uint active_workers = MAX2(1U, parallel_marking_threads()); 1164 1165 CMRootRegionScanTask task(this); 1166 _parallel_workers->set_active_workers(active_workers); 1167 _parallel_workers->run_task(&task); 1168 1169 if (G1Log::fine()) { 1170 gclog_or_tty->gclog_stamp(); 1171 gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf secs]", os::elapsedTime() - scan_start); 1172 } 1173 1174 // It's possible that has_aborted() is true here without actually 1175 // aborting the survivor scan earlier. This is OK as it's 1176 // mainly used for sanity checking. 1177 root_regions()->scan_finished(); 1178 } 1179 } 1180 1181 void ConcurrentMark::markFromRoots() { 1182 // we might be tempted to assert that: 1183 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1184 // "inconsistent argument?"); 1185 // However that wouldn't be right, because it's possible that 1186 // a safepoint is indeed in progress as a younger generation 1187 // stop-the-world GC happens even as we mark in this generation. 1188 1189 _restart_for_overflow = false; 1190 force_overflow_conc()->init(); 1191 1192 // _g1h has _n_par_threads 1193 _parallel_marking_threads = calc_parallel_marking_threads(); 1194 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1195 "Maximum number of marking threads exceeded"); 1196 1197 uint active_workers = MAX2(1U, parallel_marking_threads()); 1198 assert(active_workers > 0, "Should have been set"); 1199 1200 // Parallel task terminator is set in "set_concurrency_and_phase()" 1201 set_concurrency_and_phase(active_workers, true /* concurrent */); 1202 1203 CMConcurrentMarkingTask markingTask(this, cmThread()); 1204 _parallel_workers->set_active_workers(active_workers); 1205 _parallel_workers->run_task(&markingTask); 1206 print_stats(); 1207 } 1208 1209 // Helper class to get rid of some boilerplate code. 1210 class G1CMTraceTime : public StackObj { 1211 GCTraceTimeImpl _gc_trace_time; 1212 static bool doit_and_prepend(bool doit) { 1213 if (doit) { 1214 gclog_or_tty->put(' '); 1215 } 1216 return doit; 1217 } 1218 1219 public: 1220 G1CMTraceTime(const char* title, bool doit) 1221 : _gc_trace_time(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm()) { 1222 } 1223 }; 1224 1225 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1226 // world is stopped at this checkpoint 1227 assert(SafepointSynchronize::is_at_safepoint(), 1228 "world should be stopped"); 1229 1230 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1231 1232 // If a full collection has happened, we shouldn't do this. 1233 if (has_aborted()) { 1234 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1235 return; 1236 } 1237 1238 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1239 1240 if (VerifyDuringGC) { 1241 HandleMark hm; // handle scope 1242 g1h->prepare_for_verify(); 1243 Universe::verify(VerifyOption_G1UsePrevMarking, 1244 " VerifyDuringGC:(before)"); 1245 } 1246 g1h->check_bitmaps("Remark Start"); 1247 1248 G1CollectorPolicy* g1p = g1h->g1_policy(); 1249 g1p->record_concurrent_mark_remark_start(); 1250 1251 double start = os::elapsedTime(); 1252 1253 checkpointRootsFinalWork(); 1254 1255 double mark_work_end = os::elapsedTime(); 1256 1257 weakRefsWork(clear_all_soft_refs); 1258 1259 if (has_overflown()) { 1260 // Oops. We overflowed. Restart concurrent marking. 1261 _restart_for_overflow = true; 1262 if (G1TraceMarkStackOverflow) { 1263 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1264 } 1265 1266 // Verify the heap w.r.t. the previous marking bitmap. 1267 if (VerifyDuringGC) { 1268 HandleMark hm; // handle scope 1269 g1h->prepare_for_verify(); 1270 Universe::verify(VerifyOption_G1UsePrevMarking, 1271 " VerifyDuringGC:(overflow)"); 1272 } 1273 1274 // Clear the marking state because we will be restarting 1275 // marking due to overflowing the global mark stack. 1276 reset_marking_state(); 1277 } else { 1278 { 1279 G1CMTraceTime trace("GC aggregate-data", G1Log::finer()); 1280 1281 // Aggregate the per-task counting data that we have accumulated 1282 // while marking. 1283 aggregate_count_data(); 1284 } 1285 1286 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1287 // We're done with marking. 1288 // This is the end of the marking cycle, we're expected all 1289 // threads to have SATB queues with active set to true. 1290 satb_mq_set.set_active_all_threads(false, /* new active value */ 1291 true /* expected_active */); 1292 1293 if (VerifyDuringGC) { 1294 HandleMark hm; // handle scope 1295 g1h->prepare_for_verify(); 1296 Universe::verify(VerifyOption_G1UseNextMarking, 1297 " VerifyDuringGC:(after)"); 1298 } 1299 g1h->check_bitmaps("Remark End"); 1300 assert(!restart_for_overflow(), "sanity"); 1301 // Completely reset the marking state since marking completed 1302 set_non_marking_state(); 1303 } 1304 1305 // Expand the marking stack, if we have to and if we can. 1306 if (_markStack.should_expand()) { 1307 _markStack.expand(); 1308 } 1309 1310 // Statistics 1311 double now = os::elapsedTime(); 1312 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1313 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1314 _remark_times.add((now - start) * 1000.0); 1315 1316 g1p->record_concurrent_mark_remark_end(); 1317 1318 G1CMIsAliveClosure is_alive(g1h); 1319 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1320 } 1321 1322 // Base class of the closures that finalize and verify the 1323 // liveness counting data. 1324 class CMCountDataClosureBase: public HeapRegionClosure { 1325 protected: 1326 G1CollectedHeap* _g1h; 1327 ConcurrentMark* _cm; 1328 CardTableModRefBS* _ct_bs; 1329 1330 BitMap* _region_bm; 1331 BitMap* _card_bm; 1332 1333 // Takes a region that's not empty (i.e., it has at least one 1334 // live object in it and sets its corresponding bit on the region 1335 // bitmap to 1. If the region is "starts humongous" it will also set 1336 // to 1 the bits on the region bitmap that correspond to its 1337 // associated "continues humongous" regions. 1338 void set_bit_for_region(HeapRegion* hr) { 1339 assert(!hr->is_continues_humongous(), "should have filtered those out"); 1340 1341 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1342 if (!hr->is_starts_humongous()) { 1343 // Normal (non-humongous) case: just set the bit. 1344 _region_bm->par_at_put(index, true); 1345 } else { 1346 // Starts humongous case: calculate how many regions are part of 1347 // this humongous region and then set the bit range. 1348 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1349 _region_bm->par_at_put_range(index, end_index, true); 1350 } 1351 } 1352 1353 public: 1354 CMCountDataClosureBase(G1CollectedHeap* g1h, 1355 BitMap* region_bm, BitMap* card_bm): 1356 _g1h(g1h), _cm(g1h->concurrent_mark()), 1357 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 1358 _region_bm(region_bm), _card_bm(card_bm) { } 1359 }; 1360 1361 // Closure that calculates the # live objects per region. Used 1362 // for verification purposes during the cleanup pause. 1363 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1364 CMBitMapRO* _bm; 1365 size_t _region_marked_bytes; 1366 1367 public: 1368 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1369 BitMap* region_bm, BitMap* card_bm) : 1370 CMCountDataClosureBase(g1h, region_bm, card_bm), 1371 _bm(bm), _region_marked_bytes(0) { } 1372 1373 bool doHeapRegion(HeapRegion* hr) { 1374 1375 if (hr->is_continues_humongous()) { 1376 // We will ignore these here and process them when their 1377 // associated "starts humongous" region is processed (see 1378 // set_bit_for_heap_region()). Note that we cannot rely on their 1379 // associated "starts humongous" region to have their bit set to 1380 // 1 since, due to the region chunking in the parallel region 1381 // iteration, a "continues humongous" region might be visited 1382 // before its associated "starts humongous". 1383 return false; 1384 } 1385 1386 HeapWord* ntams = hr->next_top_at_mark_start(); 1387 HeapWord* start = hr->bottom(); 1388 1389 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1390 "Preconditions not met - " 1391 "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT, 1392 p2i(start), p2i(ntams), p2i(hr->end())); 1393 1394 // Find the first marked object at or after "start". 1395 start = _bm->getNextMarkedWordAddress(start, ntams); 1396 1397 size_t marked_bytes = 0; 1398 1399 while (start < ntams) { 1400 oop obj = oop(start); 1401 int obj_sz = obj->size(); 1402 HeapWord* obj_end = start + obj_sz; 1403 1404 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1405 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1406 1407 // Note: if we're looking at the last region in heap - obj_end 1408 // could be actually just beyond the end of the heap; end_idx 1409 // will then correspond to a (non-existent) card that is also 1410 // just beyond the heap. 1411 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1412 // end of object is not card aligned - increment to cover 1413 // all the cards spanned by the object 1414 end_idx += 1; 1415 } 1416 1417 // Set the bits in the card BM for the cards spanned by this object. 1418 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1419 1420 // Add the size of this object to the number of marked bytes. 1421 marked_bytes += (size_t)obj_sz * HeapWordSize; 1422 1423 // Find the next marked object after this one. 1424 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1425 } 1426 1427 // Mark the allocated-since-marking portion... 1428 HeapWord* top = hr->top(); 1429 if (ntams < top) { 1430 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1431 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1432 1433 // Note: if we're looking at the last region in heap - top 1434 // could be actually just beyond the end of the heap; end_idx 1435 // will then correspond to a (non-existent) card that is also 1436 // just beyond the heap. 1437 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1438 // end of object is not card aligned - increment to cover 1439 // all the cards spanned by the object 1440 end_idx += 1; 1441 } 1442 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1443 1444 // This definitely means the region has live objects. 1445 set_bit_for_region(hr); 1446 } 1447 1448 // Update the live region bitmap. 1449 if (marked_bytes > 0) { 1450 set_bit_for_region(hr); 1451 } 1452 1453 // Set the marked bytes for the current region so that 1454 // it can be queried by a calling verification routine 1455 _region_marked_bytes = marked_bytes; 1456 1457 return false; 1458 } 1459 1460 size_t region_marked_bytes() const { return _region_marked_bytes; } 1461 }; 1462 1463 // Heap region closure used for verifying the counting data 1464 // that was accumulated concurrently and aggregated during 1465 // the remark pause. This closure is applied to the heap 1466 // regions during the STW cleanup pause. 1467 1468 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1469 G1CollectedHeap* _g1h; 1470 ConcurrentMark* _cm; 1471 CalcLiveObjectsClosure _calc_cl; 1472 BitMap* _region_bm; // Region BM to be verified 1473 BitMap* _card_bm; // Card BM to be verified 1474 bool _verbose; // verbose output? 1475 1476 BitMap* _exp_region_bm; // Expected Region BM values 1477 BitMap* _exp_card_bm; // Expected card BM values 1478 1479 int _failures; 1480 1481 public: 1482 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1483 BitMap* region_bm, 1484 BitMap* card_bm, 1485 BitMap* exp_region_bm, 1486 BitMap* exp_card_bm, 1487 bool verbose) : 1488 _g1h(g1h), _cm(g1h->concurrent_mark()), 1489 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1490 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1491 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1492 _failures(0) { } 1493 1494 int failures() const { return _failures; } 1495 1496 bool doHeapRegion(HeapRegion* hr) { 1497 if (hr->is_continues_humongous()) { 1498 // We will ignore these here and process them when their 1499 // associated "starts humongous" region is processed (see 1500 // set_bit_for_heap_region()). Note that we cannot rely on their 1501 // associated "starts humongous" region to have their bit set to 1502 // 1 since, due to the region chunking in the parallel region 1503 // iteration, a "continues humongous" region might be visited 1504 // before its associated "starts humongous". 1505 return false; 1506 } 1507 1508 int failures = 0; 1509 1510 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1511 // this region and set the corresponding bits in the expected region 1512 // and card bitmaps. 1513 bool res = _calc_cl.doHeapRegion(hr); 1514 assert(res == false, "should be continuing"); 1515 1516 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1517 Mutex::_no_safepoint_check_flag); 1518 1519 // Verify the marked bytes for this region. 1520 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1521 size_t act_marked_bytes = hr->next_marked_bytes(); 1522 1523 // We're not OK if expected marked bytes > actual marked bytes. It means 1524 // we have missed accounting some objects during the actual marking. 1525 if (exp_marked_bytes > act_marked_bytes) { 1526 if (_verbose) { 1527 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1528 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1529 hr->hrm_index(), exp_marked_bytes, act_marked_bytes); 1530 } 1531 failures += 1; 1532 } 1533 1534 // Verify the bit, for this region, in the actual and expected 1535 // (which was just calculated) region bit maps. 1536 // We're not OK if the bit in the calculated expected region 1537 // bitmap is set and the bit in the actual region bitmap is not. 1538 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1539 1540 bool expected = _exp_region_bm->at(index); 1541 bool actual = _region_bm->at(index); 1542 if (expected && !actual) { 1543 if (_verbose) { 1544 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1545 "expected: %s, actual: %s", 1546 hr->hrm_index(), 1547 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1548 } 1549 failures += 1; 1550 } 1551 1552 // Verify that the card bit maps for the cards spanned by the current 1553 // region match. We have an error if we have a set bit in the expected 1554 // bit map and the corresponding bit in the actual bitmap is not set. 1555 1556 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1557 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1558 1559 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1560 expected = _exp_card_bm->at(i); 1561 actual = _card_bm->at(i); 1562 1563 if (expected && !actual) { 1564 if (_verbose) { 1565 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1566 "expected: %s, actual: %s", 1567 hr->hrm_index(), i, 1568 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1569 } 1570 failures += 1; 1571 } 1572 } 1573 1574 if (failures > 0 && _verbose) { 1575 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1576 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1577 HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()), 1578 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1579 } 1580 1581 _failures += failures; 1582 1583 // We could stop iteration over the heap when we 1584 // find the first violating region by returning true. 1585 return false; 1586 } 1587 }; 1588 1589 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1590 protected: 1591 G1CollectedHeap* _g1h; 1592 ConcurrentMark* _cm; 1593 BitMap* _actual_region_bm; 1594 BitMap* _actual_card_bm; 1595 1596 uint _n_workers; 1597 1598 BitMap* _expected_region_bm; 1599 BitMap* _expected_card_bm; 1600 1601 int _failures; 1602 bool _verbose; 1603 1604 HeapRegionClaimer _hrclaimer; 1605 1606 public: 1607 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1608 BitMap* region_bm, BitMap* card_bm, 1609 BitMap* expected_region_bm, BitMap* expected_card_bm) 1610 : AbstractGangTask("G1 verify final counting"), 1611 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1612 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1613 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1614 _failures(0), _verbose(false), 1615 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1616 assert(VerifyDuringGC, "don't call this otherwise"); 1617 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1618 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1619 1620 _verbose = _cm->verbose_medium(); 1621 } 1622 1623 void work(uint worker_id) { 1624 assert(worker_id < _n_workers, "invariant"); 1625 1626 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1627 _actual_region_bm, _actual_card_bm, 1628 _expected_region_bm, 1629 _expected_card_bm, 1630 _verbose); 1631 1632 _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer); 1633 1634 Atomic::add(verify_cl.failures(), &_failures); 1635 } 1636 1637 int failures() const { return _failures; } 1638 }; 1639 1640 // Closure that finalizes the liveness counting data. 1641 // Used during the cleanup pause. 1642 // Sets the bits corresponding to the interval [NTAMS, top] 1643 // (which contains the implicitly live objects) in the 1644 // card liveness bitmap. Also sets the bit for each region, 1645 // containing live data, in the region liveness bitmap. 1646 1647 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1648 public: 1649 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1650 BitMap* region_bm, 1651 BitMap* card_bm) : 1652 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1653 1654 bool doHeapRegion(HeapRegion* hr) { 1655 1656 if (hr->is_continues_humongous()) { 1657 // We will ignore these here and process them when their 1658 // associated "starts humongous" region is processed (see 1659 // set_bit_for_heap_region()). Note that we cannot rely on their 1660 // associated "starts humongous" region to have their bit set to 1661 // 1 since, due to the region chunking in the parallel region 1662 // iteration, a "continues humongous" region might be visited 1663 // before its associated "starts humongous". 1664 return false; 1665 } 1666 1667 HeapWord* ntams = hr->next_top_at_mark_start(); 1668 HeapWord* top = hr->top(); 1669 1670 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1671 1672 // Mark the allocated-since-marking portion... 1673 if (ntams < top) { 1674 // This definitely means the region has live objects. 1675 set_bit_for_region(hr); 1676 1677 // Now set the bits in the card bitmap for [ntams, top) 1678 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1679 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1680 1681 // Note: if we're looking at the last region in heap - top 1682 // could be actually just beyond the end of the heap; end_idx 1683 // will then correspond to a (non-existent) card that is also 1684 // just beyond the heap. 1685 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1686 // end of object is not card aligned - increment to cover 1687 // all the cards spanned by the object 1688 end_idx += 1; 1689 } 1690 1691 assert(end_idx <= _card_bm->size(), 1692 "oob: end_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1693 end_idx, _card_bm->size()); 1694 assert(start_idx < _card_bm->size(), 1695 "oob: start_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1696 start_idx, _card_bm->size()); 1697 1698 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1699 } 1700 1701 // Set the bit for the region if it contains live data 1702 if (hr->next_marked_bytes() > 0) { 1703 set_bit_for_region(hr); 1704 } 1705 1706 return false; 1707 } 1708 }; 1709 1710 class G1ParFinalCountTask: public AbstractGangTask { 1711 protected: 1712 G1CollectedHeap* _g1h; 1713 ConcurrentMark* _cm; 1714 BitMap* _actual_region_bm; 1715 BitMap* _actual_card_bm; 1716 1717 uint _n_workers; 1718 HeapRegionClaimer _hrclaimer; 1719 1720 public: 1721 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1722 : AbstractGangTask("G1 final counting"), 1723 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1724 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1725 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1726 } 1727 1728 void work(uint worker_id) { 1729 assert(worker_id < _n_workers, "invariant"); 1730 1731 FinalCountDataUpdateClosure final_update_cl(_g1h, 1732 _actual_region_bm, 1733 _actual_card_bm); 1734 1735 _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer); 1736 } 1737 }; 1738 1739 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1740 G1CollectedHeap* _g1; 1741 size_t _freed_bytes; 1742 FreeRegionList* _local_cleanup_list; 1743 HeapRegionSetCount _old_regions_removed; 1744 HeapRegionSetCount _humongous_regions_removed; 1745 HRRSCleanupTask* _hrrs_cleanup_task; 1746 1747 public: 1748 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1749 FreeRegionList* local_cleanup_list, 1750 HRRSCleanupTask* hrrs_cleanup_task) : 1751 _g1(g1), 1752 _freed_bytes(0), 1753 _local_cleanup_list(local_cleanup_list), 1754 _old_regions_removed(), 1755 _humongous_regions_removed(), 1756 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1757 1758 size_t freed_bytes() { return _freed_bytes; } 1759 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; } 1760 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } 1761 1762 bool doHeapRegion(HeapRegion *hr) { 1763 if (hr->is_continues_humongous() || hr->is_archive()) { 1764 return false; 1765 } 1766 // We use a claim value of zero here because all regions 1767 // were claimed with value 1 in the FinalCount task. 1768 _g1->reset_gc_time_stamps(hr); 1769 hr->note_end_of_marking(); 1770 1771 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1772 _freed_bytes += hr->used(); 1773 hr->set_containing_set(NULL); 1774 if (hr->is_humongous()) { 1775 assert(hr->is_starts_humongous(), "we should only see starts humongous"); 1776 _humongous_regions_removed.increment(1u, hr->capacity()); 1777 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1778 } else { 1779 _old_regions_removed.increment(1u, hr->capacity()); 1780 _g1->free_region(hr, _local_cleanup_list, true); 1781 } 1782 } else { 1783 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1784 } 1785 1786 return false; 1787 } 1788 }; 1789 1790 class G1ParNoteEndTask: public AbstractGangTask { 1791 friend class G1NoteEndOfConcMarkClosure; 1792 1793 protected: 1794 G1CollectedHeap* _g1h; 1795 FreeRegionList* _cleanup_list; 1796 HeapRegionClaimer _hrclaimer; 1797 1798 public: 1799 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1800 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1801 } 1802 1803 void work(uint worker_id) { 1804 FreeRegionList local_cleanup_list("Local Cleanup List"); 1805 HRRSCleanupTask hrrs_cleanup_task; 1806 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1807 &hrrs_cleanup_task); 1808 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1809 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1810 1811 // Now update the lists 1812 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1813 { 1814 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1815 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1816 1817 // If we iterate over the global cleanup list at the end of 1818 // cleanup to do this printing we will not guarantee to only 1819 // generate output for the newly-reclaimed regions (the list 1820 // might not be empty at the beginning of cleanup; we might 1821 // still be working on its previous contents). So we do the 1822 // printing here, before we append the new regions to the global 1823 // cleanup list. 1824 1825 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1826 if (hr_printer->is_active()) { 1827 FreeRegionListIterator iter(&local_cleanup_list); 1828 while (iter.more_available()) { 1829 HeapRegion* hr = iter.get_next(); 1830 hr_printer->cleanup(hr); 1831 } 1832 } 1833 1834 _cleanup_list->add_ordered(&local_cleanup_list); 1835 assert(local_cleanup_list.is_empty(), "post-condition"); 1836 1837 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1838 } 1839 } 1840 }; 1841 1842 class G1ParScrubRemSetTask: public AbstractGangTask { 1843 protected: 1844 G1RemSet* _g1rs; 1845 BitMap* _region_bm; 1846 BitMap* _card_bm; 1847 HeapRegionClaimer _hrclaimer; 1848 1849 public: 1850 G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) : 1851 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) { 1852 } 1853 1854 void work(uint worker_id) { 1855 _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer); 1856 } 1857 1858 }; 1859 1860 void ConcurrentMark::cleanup() { 1861 // world is stopped at this checkpoint 1862 assert(SafepointSynchronize::is_at_safepoint(), 1863 "world should be stopped"); 1864 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1865 1866 // If a full collection has happened, we shouldn't do this. 1867 if (has_aborted()) { 1868 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1869 return; 1870 } 1871 1872 g1h->verify_region_sets_optional(); 1873 1874 if (VerifyDuringGC) { 1875 HandleMark hm; // handle scope 1876 g1h->prepare_for_verify(); 1877 Universe::verify(VerifyOption_G1UsePrevMarking, 1878 " VerifyDuringGC:(before)"); 1879 } 1880 g1h->check_bitmaps("Cleanup Start"); 1881 1882 G1CollectorPolicy* g1p = g1h->g1_policy(); 1883 g1p->record_concurrent_mark_cleanup_start(); 1884 1885 double start = os::elapsedTime(); 1886 1887 HeapRegionRemSet::reset_for_cleanup_tasks(); 1888 1889 // Do counting once more with the world stopped for good measure. 1890 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1891 1892 g1h->workers()->run_task(&g1_par_count_task); 1893 1894 if (VerifyDuringGC) { 1895 // Verify that the counting data accumulated during marking matches 1896 // that calculated by walking the marking bitmap. 1897 1898 // Bitmaps to hold expected values 1899 BitMap expected_region_bm(_region_bm.size(), true); 1900 BitMap expected_card_bm(_card_bm.size(), true); 1901 1902 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1903 &_region_bm, 1904 &_card_bm, 1905 &expected_region_bm, 1906 &expected_card_bm); 1907 1908 g1h->workers()->run_task(&g1_par_verify_task); 1909 1910 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1911 } 1912 1913 size_t start_used_bytes = g1h->used(); 1914 g1h->collector_state()->set_mark_in_progress(false); 1915 1916 double count_end = os::elapsedTime(); 1917 double this_final_counting_time = (count_end - start); 1918 _total_counting_time += this_final_counting_time; 1919 1920 if (G1PrintRegionLivenessInfo) { 1921 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1922 _g1h->heap_region_iterate(&cl); 1923 } 1924 1925 // Install newly created mark bitMap as "prev". 1926 swapMarkBitMaps(); 1927 1928 g1h->reset_gc_time_stamp(); 1929 1930 uint n_workers = _g1h->workers()->active_workers(); 1931 1932 // Note end of marking in all heap regions. 1933 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1934 g1h->workers()->run_task(&g1_par_note_end_task); 1935 g1h->check_gc_time_stamps(); 1936 1937 if (!cleanup_list_is_empty()) { 1938 // The cleanup list is not empty, so we'll have to process it 1939 // concurrently. Notify anyone else that might be wanting free 1940 // regions that there will be more free regions coming soon. 1941 g1h->set_free_regions_coming(); 1942 } 1943 1944 // call below, since it affects the metric by which we sort the heap 1945 // regions. 1946 if (G1ScrubRemSets) { 1947 double rs_scrub_start = os::elapsedTime(); 1948 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers); 1949 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1950 1951 double rs_scrub_end = os::elapsedTime(); 1952 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1953 _total_rs_scrub_time += this_rs_scrub_time; 1954 } 1955 1956 // this will also free any regions totally full of garbage objects, 1957 // and sort the regions. 1958 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1959 1960 // Statistics. 1961 double end = os::elapsedTime(); 1962 _cleanup_times.add((end - start) * 1000.0); 1963 1964 if (G1Log::fine()) { 1965 g1h->g1_policy()->print_heap_transition(start_used_bytes); 1966 } 1967 1968 // Clean up will have freed any regions completely full of garbage. 1969 // Update the soft reference policy with the new heap occupancy. 1970 Universe::update_heap_info_at_gc(); 1971 1972 if (VerifyDuringGC) { 1973 HandleMark hm; // handle scope 1974 g1h->prepare_for_verify(); 1975 Universe::verify(VerifyOption_G1UsePrevMarking, 1976 " VerifyDuringGC:(after)"); 1977 } 1978 1979 g1h->check_bitmaps("Cleanup End"); 1980 1981 g1h->verify_region_sets_optional(); 1982 1983 // We need to make this be a "collection" so any collection pause that 1984 // races with it goes around and waits for completeCleanup to finish. 1985 g1h->increment_total_collections(); 1986 1987 // Clean out dead classes and update Metaspace sizes. 1988 if (ClassUnloadingWithConcurrentMark) { 1989 ClassLoaderDataGraph::purge(); 1990 } 1991 MetaspaceGC::compute_new_size(); 1992 1993 // We reclaimed old regions so we should calculate the sizes to make 1994 // sure we update the old gen/space data. 1995 g1h->g1mm()->update_sizes(); 1996 g1h->allocation_context_stats().update_after_mark(); 1997 1998 g1h->trace_heap_after_concurrent_cycle(); 1999 } 2000 2001 void ConcurrentMark::completeCleanup() { 2002 if (has_aborted()) return; 2003 2004 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2005 2006 _cleanup_list.verify_optional(); 2007 FreeRegionList tmp_free_list("Tmp Free List"); 2008 2009 if (G1ConcRegionFreeingVerbose) { 2010 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2011 "cleanup list has %u entries", 2012 _cleanup_list.length()); 2013 } 2014 2015 // No one else should be accessing the _cleanup_list at this point, 2016 // so it is not necessary to take any locks 2017 while (!_cleanup_list.is_empty()) { 2018 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 2019 assert(hr != NULL, "Got NULL from a non-empty list"); 2020 hr->par_clear(); 2021 tmp_free_list.add_ordered(hr); 2022 2023 // Instead of adding one region at a time to the secondary_free_list, 2024 // we accumulate them in the local list and move them a few at a 2025 // time. This also cuts down on the number of notify_all() calls 2026 // we do during this process. We'll also append the local list when 2027 // _cleanup_list is empty (which means we just removed the last 2028 // region from the _cleanup_list). 2029 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2030 _cleanup_list.is_empty()) { 2031 if (G1ConcRegionFreeingVerbose) { 2032 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2033 "appending %u entries to the secondary_free_list, " 2034 "cleanup list still has %u entries", 2035 tmp_free_list.length(), 2036 _cleanup_list.length()); 2037 } 2038 2039 { 2040 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2041 g1h->secondary_free_list_add(&tmp_free_list); 2042 SecondaryFreeList_lock->notify_all(); 2043 } 2044 #ifndef PRODUCT 2045 if (G1StressConcRegionFreeing) { 2046 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2047 os::sleep(Thread::current(), (jlong) 1, false); 2048 } 2049 } 2050 #endif 2051 } 2052 } 2053 assert(tmp_free_list.is_empty(), "post-condition"); 2054 } 2055 2056 // Supporting Object and Oop closures for reference discovery 2057 // and processing in during marking 2058 2059 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2060 HeapWord* addr = (HeapWord*)obj; 2061 return addr != NULL && 2062 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2063 } 2064 2065 // 'Keep Alive' oop closure used by both serial parallel reference processing. 2066 // Uses the CMTask associated with a worker thread (for serial reference 2067 // processing the CMTask for worker 0 is used) to preserve (mark) and 2068 // trace referent objects. 2069 // 2070 // Using the CMTask and embedded local queues avoids having the worker 2071 // threads operating on the global mark stack. This reduces the risk 2072 // of overflowing the stack - which we would rather avoid at this late 2073 // state. Also using the tasks' local queues removes the potential 2074 // of the workers interfering with each other that could occur if 2075 // operating on the global stack. 2076 2077 class G1CMKeepAliveAndDrainClosure: public OopClosure { 2078 ConcurrentMark* _cm; 2079 CMTask* _task; 2080 int _ref_counter_limit; 2081 int _ref_counter; 2082 bool _is_serial; 2083 public: 2084 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2085 _cm(cm), _task(task), _is_serial(is_serial), 2086 _ref_counter_limit(G1RefProcDrainInterval) { 2087 assert(_ref_counter_limit > 0, "sanity"); 2088 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2089 _ref_counter = _ref_counter_limit; 2090 } 2091 2092 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2093 virtual void do_oop( oop* p) { do_oop_work(p); } 2094 2095 template <class T> void do_oop_work(T* p) { 2096 if (!_cm->has_overflown()) { 2097 oop obj = oopDesc::load_decode_heap_oop(p); 2098 if (_cm->verbose_high()) { 2099 gclog_or_tty->print_cr("\t[%u] we're looking at location " 2100 "*" PTR_FORMAT " = " PTR_FORMAT, 2101 _task->worker_id(), p2i(p), p2i((void*) obj)); 2102 } 2103 2104 _task->deal_with_reference(obj); 2105 _ref_counter--; 2106 2107 if (_ref_counter == 0) { 2108 // We have dealt with _ref_counter_limit references, pushing them 2109 // and objects reachable from them on to the local stack (and 2110 // possibly the global stack). Call CMTask::do_marking_step() to 2111 // process these entries. 2112 // 2113 // We call CMTask::do_marking_step() in a loop, which we'll exit if 2114 // there's nothing more to do (i.e. we're done with the entries that 2115 // were pushed as a result of the CMTask::deal_with_reference() calls 2116 // above) or we overflow. 2117 // 2118 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2119 // flag while there may still be some work to do. (See the comment at 2120 // the beginning of CMTask::do_marking_step() for those conditions - 2121 // one of which is reaching the specified time target.) It is only 2122 // when CMTask::do_marking_step() returns without setting the 2123 // has_aborted() flag that the marking step has completed. 2124 do { 2125 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2126 _task->do_marking_step(mark_step_duration_ms, 2127 false /* do_termination */, 2128 _is_serial); 2129 } while (_task->has_aborted() && !_cm->has_overflown()); 2130 _ref_counter = _ref_counter_limit; 2131 } 2132 } else { 2133 if (_cm->verbose_high()) { 2134 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id()); 2135 } 2136 } 2137 } 2138 }; 2139 2140 // 'Drain' oop closure used by both serial and parallel reference processing. 2141 // Uses the CMTask associated with a given worker thread (for serial 2142 // reference processing the CMtask for worker 0 is used). Calls the 2143 // do_marking_step routine, with an unbelievably large timeout value, 2144 // to drain the marking data structures of the remaining entries 2145 // added by the 'keep alive' oop closure above. 2146 2147 class G1CMDrainMarkingStackClosure: public VoidClosure { 2148 ConcurrentMark* _cm; 2149 CMTask* _task; 2150 bool _is_serial; 2151 public: 2152 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2153 _cm(cm), _task(task), _is_serial(is_serial) { 2154 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2155 } 2156 2157 void do_void() { 2158 do { 2159 if (_cm->verbose_high()) { 2160 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s", 2161 _task->worker_id(), BOOL_TO_STR(_is_serial)); 2162 } 2163 2164 // We call CMTask::do_marking_step() to completely drain the local 2165 // and global marking stacks of entries pushed by the 'keep alive' 2166 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2167 // 2168 // CMTask::do_marking_step() is called in a loop, which we'll exit 2169 // if there's nothing more to do (i.e. we've completely drained the 2170 // entries that were pushed as a a result of applying the 'keep alive' 2171 // closure to the entries on the discovered ref lists) or we overflow 2172 // the global marking stack. 2173 // 2174 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2175 // flag while there may still be some work to do. (See the comment at 2176 // the beginning of CMTask::do_marking_step() for those conditions - 2177 // one of which is reaching the specified time target.) It is only 2178 // when CMTask::do_marking_step() returns without setting the 2179 // has_aborted() flag that the marking step has completed. 2180 2181 _task->do_marking_step(1000000000.0 /* something very large */, 2182 true /* do_termination */, 2183 _is_serial); 2184 } while (_task->has_aborted() && !_cm->has_overflown()); 2185 } 2186 }; 2187 2188 // Implementation of AbstractRefProcTaskExecutor for parallel 2189 // reference processing at the end of G1 concurrent marking 2190 2191 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2192 private: 2193 G1CollectedHeap* _g1h; 2194 ConcurrentMark* _cm; 2195 WorkGang* _workers; 2196 uint _active_workers; 2197 2198 public: 2199 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2200 ConcurrentMark* cm, 2201 WorkGang* workers, 2202 uint n_workers) : 2203 _g1h(g1h), _cm(cm), 2204 _workers(workers), _active_workers(n_workers) { } 2205 2206 // Executes the given task using concurrent marking worker threads. 2207 virtual void execute(ProcessTask& task); 2208 virtual void execute(EnqueueTask& task); 2209 }; 2210 2211 class G1CMRefProcTaskProxy: public AbstractGangTask { 2212 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2213 ProcessTask& _proc_task; 2214 G1CollectedHeap* _g1h; 2215 ConcurrentMark* _cm; 2216 2217 public: 2218 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2219 G1CollectedHeap* g1h, 2220 ConcurrentMark* cm) : 2221 AbstractGangTask("Process reference objects in parallel"), 2222 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2223 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2224 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2225 } 2226 2227 virtual void work(uint worker_id) { 2228 ResourceMark rm; 2229 HandleMark hm; 2230 CMTask* task = _cm->task(worker_id); 2231 G1CMIsAliveClosure g1_is_alive(_g1h); 2232 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2233 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2234 2235 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2236 } 2237 }; 2238 2239 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2240 assert(_workers != NULL, "Need parallel worker threads."); 2241 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2242 2243 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2244 2245 // We need to reset the concurrency level before each 2246 // proxy task execution, so that the termination protocol 2247 // and overflow handling in CMTask::do_marking_step() knows 2248 // how many workers to wait for. 2249 _cm->set_concurrency(_active_workers); 2250 _workers->run_task(&proc_task_proxy); 2251 } 2252 2253 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2254 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2255 EnqueueTask& _enq_task; 2256 2257 public: 2258 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2259 AbstractGangTask("Enqueue reference objects in parallel"), 2260 _enq_task(enq_task) { } 2261 2262 virtual void work(uint worker_id) { 2263 _enq_task.work(worker_id); 2264 } 2265 }; 2266 2267 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2268 assert(_workers != NULL, "Need parallel worker threads."); 2269 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2270 2271 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2272 2273 // Not strictly necessary but... 2274 // 2275 // We need to reset the concurrency level before each 2276 // proxy task execution, so that the termination protocol 2277 // and overflow handling in CMTask::do_marking_step() knows 2278 // how many workers to wait for. 2279 _cm->set_concurrency(_active_workers); 2280 _workers->run_task(&enq_task_proxy); 2281 } 2282 2283 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 2284 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 2285 } 2286 2287 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2288 if (has_overflown()) { 2289 // Skip processing the discovered references if we have 2290 // overflown the global marking stack. Reference objects 2291 // only get discovered once so it is OK to not 2292 // de-populate the discovered reference lists. We could have, 2293 // but the only benefit would be that, when marking restarts, 2294 // less reference objects are discovered. 2295 return; 2296 } 2297 2298 ResourceMark rm; 2299 HandleMark hm; 2300 2301 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2302 2303 // Is alive closure. 2304 G1CMIsAliveClosure g1_is_alive(g1h); 2305 2306 // Inner scope to exclude the cleaning of the string and symbol 2307 // tables from the displayed time. 2308 { 2309 G1CMTraceTime t("GC ref-proc", G1Log::finer()); 2310 2311 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2312 2313 // See the comment in G1CollectedHeap::ref_processing_init() 2314 // about how reference processing currently works in G1. 2315 2316 // Set the soft reference policy 2317 rp->setup_policy(clear_all_soft_refs); 2318 assert(_markStack.isEmpty(), "mark stack should be empty"); 2319 2320 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2321 // in serial reference processing. Note these closures are also 2322 // used for serially processing (by the the current thread) the 2323 // JNI references during parallel reference processing. 2324 // 2325 // These closures do not need to synchronize with the worker 2326 // threads involved in parallel reference processing as these 2327 // instances are executed serially by the current thread (e.g. 2328 // reference processing is not multi-threaded and is thus 2329 // performed by the current thread instead of a gang worker). 2330 // 2331 // The gang tasks involved in parallel reference processing create 2332 // their own instances of these closures, which do their own 2333 // synchronization among themselves. 2334 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2335 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2336 2337 // We need at least one active thread. If reference processing 2338 // is not multi-threaded we use the current (VMThread) thread, 2339 // otherwise we use the work gang from the G1CollectedHeap and 2340 // we utilize all the worker threads we can. 2341 bool processing_is_mt = rp->processing_is_mt(); 2342 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2343 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2344 2345 // Parallel processing task executor. 2346 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2347 g1h->workers(), active_workers); 2348 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2349 2350 // Set the concurrency level. The phase was already set prior to 2351 // executing the remark task. 2352 set_concurrency(active_workers); 2353 2354 // Set the degree of MT processing here. If the discovery was done MT, 2355 // the number of threads involved during discovery could differ from 2356 // the number of active workers. This is OK as long as the discovered 2357 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2358 rp->set_active_mt_degree(active_workers); 2359 2360 // Process the weak references. 2361 const ReferenceProcessorStats& stats = 2362 rp->process_discovered_references(&g1_is_alive, 2363 &g1_keep_alive, 2364 &g1_drain_mark_stack, 2365 executor, 2366 g1h->gc_timer_cm()); 2367 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2368 2369 // The do_oop work routines of the keep_alive and drain_marking_stack 2370 // oop closures will set the has_overflown flag if we overflow the 2371 // global marking stack. 2372 2373 assert(_markStack.overflow() || _markStack.isEmpty(), 2374 "mark stack should be empty (unless it overflowed)"); 2375 2376 if (_markStack.overflow()) { 2377 // This should have been done already when we tried to push an 2378 // entry on to the global mark stack. But let's do it again. 2379 set_has_overflown(); 2380 } 2381 2382 assert(rp->num_q() == active_workers, "why not"); 2383 2384 rp->enqueue_discovered_references(executor); 2385 2386 rp->verify_no_references_recorded(); 2387 assert(!rp->discovery_enabled(), "Post condition"); 2388 } 2389 2390 if (has_overflown()) { 2391 // We can not trust g1_is_alive if the marking stack overflowed 2392 return; 2393 } 2394 2395 assert(_markStack.isEmpty(), "Marking should have completed"); 2396 2397 // Unload Klasses, String, Symbols, Code Cache, etc. 2398 { 2399 G1CMTraceTime trace("Unloading", G1Log::finer()); 2400 2401 if (ClassUnloadingWithConcurrentMark) { 2402 bool purged_classes; 2403 2404 { 2405 G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest()); 2406 purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */); 2407 } 2408 2409 { 2410 G1CMTraceTime trace("Parallel Unloading", G1Log::finest()); 2411 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2412 } 2413 } 2414 2415 if (G1StringDedup::is_enabled()) { 2416 G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest()); 2417 G1StringDedup::unlink(&g1_is_alive); 2418 } 2419 } 2420 } 2421 2422 void ConcurrentMark::swapMarkBitMaps() { 2423 CMBitMapRO* temp = _prevMarkBitMap; 2424 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2425 _nextMarkBitMap = (CMBitMap*) temp; 2426 } 2427 2428 // Closure for marking entries in SATB buffers. 2429 class CMSATBBufferClosure : public SATBBufferClosure { 2430 private: 2431 CMTask* _task; 2432 G1CollectedHeap* _g1h; 2433 2434 // This is very similar to CMTask::deal_with_reference, but with 2435 // more relaxed requirements for the argument, so this must be more 2436 // circumspect about treating the argument as an object. 2437 void do_entry(void* entry) const { 2438 _task->increment_refs_reached(); 2439 HeapRegion* hr = _g1h->heap_region_containing_raw(entry); 2440 if (entry < hr->next_top_at_mark_start()) { 2441 // Until we get here, we don't know whether entry refers to a valid 2442 // object; it could instead have been a stale reference. 2443 oop obj = static_cast<oop>(entry); 2444 assert(obj->is_oop(true /* ignore mark word */), 2445 "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)); 2446 _task->make_reference_grey(obj, hr); 2447 } 2448 } 2449 2450 public: 2451 CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h) 2452 : _task(task), _g1h(g1h) { } 2453 2454 virtual void do_buffer(void** buffer, size_t size) { 2455 for (size_t i = 0; i < size; ++i) { 2456 do_entry(buffer[i]); 2457 } 2458 } 2459 }; 2460 2461 class G1RemarkThreadsClosure : public ThreadClosure { 2462 CMSATBBufferClosure _cm_satb_cl; 2463 G1CMOopClosure _cm_cl; 2464 MarkingCodeBlobClosure _code_cl; 2465 int _thread_parity; 2466 2467 public: 2468 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) : 2469 _cm_satb_cl(task, g1h), 2470 _cm_cl(g1h, g1h->concurrent_mark(), task), 2471 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2472 _thread_parity(Threads::thread_claim_parity()) {} 2473 2474 void do_thread(Thread* thread) { 2475 if (thread->is_Java_thread()) { 2476 if (thread->claim_oops_do(true, _thread_parity)) { 2477 JavaThread* jt = (JavaThread*)thread; 2478 2479 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2480 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2481 // * Alive if on the stack of an executing method 2482 // * Weakly reachable otherwise 2483 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2484 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2485 jt->nmethods_do(&_code_cl); 2486 2487 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 2488 } 2489 } else if (thread->is_VM_thread()) { 2490 if (thread->claim_oops_do(true, _thread_parity)) { 2491 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 2492 } 2493 } 2494 } 2495 }; 2496 2497 class CMRemarkTask: public AbstractGangTask { 2498 private: 2499 ConcurrentMark* _cm; 2500 public: 2501 void work(uint worker_id) { 2502 // Since all available tasks are actually started, we should 2503 // only proceed if we're supposed to be active. 2504 if (worker_id < _cm->active_tasks()) { 2505 CMTask* task = _cm->task(worker_id); 2506 task->record_start_time(); 2507 { 2508 ResourceMark rm; 2509 HandleMark hm; 2510 2511 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 2512 Threads::threads_do(&threads_f); 2513 } 2514 2515 do { 2516 task->do_marking_step(1000000000.0 /* something very large */, 2517 true /* do_termination */, 2518 false /* is_serial */); 2519 } while (task->has_aborted() && !_cm->has_overflown()); 2520 // If we overflow, then we do not want to restart. We instead 2521 // want to abort remark and do concurrent marking again. 2522 task->record_end_time(); 2523 } 2524 } 2525 2526 CMRemarkTask(ConcurrentMark* cm, uint active_workers) : 2527 AbstractGangTask("Par Remark"), _cm(cm) { 2528 _cm->terminator()->reset_for_reuse(active_workers); 2529 } 2530 }; 2531 2532 void ConcurrentMark::checkpointRootsFinalWork() { 2533 ResourceMark rm; 2534 HandleMark hm; 2535 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2536 2537 G1CMTraceTime trace("Finalize Marking", G1Log::finer()); 2538 2539 g1h->ensure_parsability(false); 2540 2541 // this is remark, so we'll use up all active threads 2542 uint active_workers = g1h->workers()->active_workers(); 2543 set_concurrency_and_phase(active_workers, false /* concurrent */); 2544 // Leave _parallel_marking_threads at it's 2545 // value originally calculated in the ConcurrentMark 2546 // constructor and pass values of the active workers 2547 // through the gang in the task. 2548 2549 { 2550 StrongRootsScope srs(active_workers); 2551 2552 CMRemarkTask remarkTask(this, active_workers); 2553 // We will start all available threads, even if we decide that the 2554 // active_workers will be fewer. The extra ones will just bail out 2555 // immediately. 2556 g1h->workers()->run_task(&remarkTask); 2557 } 2558 2559 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2560 guarantee(has_overflown() || 2561 satb_mq_set.completed_buffers_num() == 0, 2562 "Invariant: has_overflown = %s, num buffers = %d", 2563 BOOL_TO_STR(has_overflown()), 2564 satb_mq_set.completed_buffers_num()); 2565 2566 print_stats(); 2567 } 2568 2569 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2570 // Note we are overriding the read-only view of the prev map here, via 2571 // the cast. 2572 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2573 } 2574 2575 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2576 _nextMarkBitMap->clearRange(mr); 2577 } 2578 2579 HeapRegion* 2580 ConcurrentMark::claim_region(uint worker_id) { 2581 // "checkpoint" the finger 2582 HeapWord* finger = _finger; 2583 2584 // _heap_end will not change underneath our feet; it only changes at 2585 // yield points. 2586 while (finger < _heap_end) { 2587 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2588 2589 // Note on how this code handles humongous regions. In the 2590 // normal case the finger will reach the start of a "starts 2591 // humongous" (SH) region. Its end will either be the end of the 2592 // last "continues humongous" (CH) region in the sequence, or the 2593 // standard end of the SH region (if the SH is the only region in 2594 // the sequence). That way claim_region() will skip over the CH 2595 // regions. However, there is a subtle race between a CM thread 2596 // executing this method and a mutator thread doing a humongous 2597 // object allocation. The two are not mutually exclusive as the CM 2598 // thread does not need to hold the Heap_lock when it gets 2599 // here. So there is a chance that claim_region() will come across 2600 // a free region that's in the progress of becoming a SH or a CH 2601 // region. In the former case, it will either 2602 // a) Miss the update to the region's end, in which case it will 2603 // visit every subsequent CH region, will find their bitmaps 2604 // empty, and do nothing, or 2605 // b) Will observe the update of the region's end (in which case 2606 // it will skip the subsequent CH regions). 2607 // If it comes across a region that suddenly becomes CH, the 2608 // scenario will be similar to b). So, the race between 2609 // claim_region() and a humongous object allocation might force us 2610 // to do a bit of unnecessary work (due to some unnecessary bitmap 2611 // iterations) but it should not introduce and correctness issues. 2612 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2613 2614 // Above heap_region_containing_raw may return NULL as we always scan claim 2615 // until the end of the heap. In this case, just jump to the next region. 2616 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 2617 2618 // Is the gap between reading the finger and doing the CAS too long? 2619 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2620 if (res == finger && curr_region != NULL) { 2621 // we succeeded 2622 HeapWord* bottom = curr_region->bottom(); 2623 HeapWord* limit = curr_region->next_top_at_mark_start(); 2624 2625 if (verbose_low()) { 2626 gclog_or_tty->print_cr("[%u] curr_region = " PTR_FORMAT " " 2627 "[" PTR_FORMAT ", " PTR_FORMAT "), " 2628 "limit = " PTR_FORMAT, 2629 worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit)); 2630 } 2631 2632 // notice that _finger == end cannot be guaranteed here since, 2633 // someone else might have moved the finger even further 2634 assert(_finger >= end, "the finger should have moved forward"); 2635 2636 if (verbose_low()) { 2637 gclog_or_tty->print_cr("[%u] we were successful with region = " 2638 PTR_FORMAT, worker_id, p2i(curr_region)); 2639 } 2640 2641 if (limit > bottom) { 2642 if (verbose_low()) { 2643 gclog_or_tty->print_cr("[%u] region " PTR_FORMAT " is not empty, " 2644 "returning it ", worker_id, p2i(curr_region)); 2645 } 2646 return curr_region; 2647 } else { 2648 assert(limit == bottom, 2649 "the region limit should be at bottom"); 2650 if (verbose_low()) { 2651 gclog_or_tty->print_cr("[%u] region " PTR_FORMAT " is empty, " 2652 "returning NULL", worker_id, p2i(curr_region)); 2653 } 2654 // we return NULL and the caller should try calling 2655 // claim_region() again. 2656 return NULL; 2657 } 2658 } else { 2659 assert(_finger > finger, "the finger should have moved forward"); 2660 if (verbose_low()) { 2661 if (curr_region == NULL) { 2662 gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, " 2663 "global finger = " PTR_FORMAT ", " 2664 "our finger = " PTR_FORMAT, 2665 worker_id, p2i(_finger), p2i(finger)); 2666 } else { 2667 gclog_or_tty->print_cr("[%u] somebody else moved the finger, " 2668 "global finger = " PTR_FORMAT ", " 2669 "our finger = " PTR_FORMAT, 2670 worker_id, p2i(_finger), p2i(finger)); 2671 } 2672 } 2673 2674 // read it again 2675 finger = _finger; 2676 } 2677 } 2678 2679 return NULL; 2680 } 2681 2682 #ifndef PRODUCT 2683 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 2684 private: 2685 G1CollectedHeap* _g1h; 2686 const char* _phase; 2687 int _info; 2688 2689 public: 2690 VerifyNoCSetOops(const char* phase, int info = -1) : 2691 _g1h(G1CollectedHeap::heap()), 2692 _phase(phase), 2693 _info(info) 2694 { } 2695 2696 void operator()(oop obj) const { 2697 guarantee(obj->is_oop(), 2698 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 2699 p2i(obj), _phase, _info); 2700 guarantee(!_g1h->obj_in_cs(obj), 2701 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 2702 p2i(obj), _phase, _info); 2703 } 2704 }; 2705 2706 void ConcurrentMark::verify_no_cset_oops() { 2707 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2708 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 2709 return; 2710 } 2711 2712 // Verify entries on the global mark stack 2713 _markStack.iterate(VerifyNoCSetOops("Stack")); 2714 2715 // Verify entries on the task queues 2716 for (uint i = 0; i < _max_worker_id; ++i) { 2717 CMTaskQueue* queue = _task_queues->queue(i); 2718 queue->iterate(VerifyNoCSetOops("Queue", i)); 2719 } 2720 2721 // Verify the global finger 2722 HeapWord* global_finger = finger(); 2723 if (global_finger != NULL && global_finger < _heap_end) { 2724 // The global finger always points to a heap region boundary. We 2725 // use heap_region_containing_raw() to get the containing region 2726 // given that the global finger could be pointing to a free region 2727 // which subsequently becomes continues humongous. If that 2728 // happens, heap_region_containing() will return the bottom of the 2729 // corresponding starts humongous region and the check below will 2730 // not hold any more. 2731 // Since we always iterate over all regions, we might get a NULL HeapRegion 2732 // here. 2733 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 2734 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 2735 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 2736 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 2737 } 2738 2739 // Verify the task fingers 2740 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2741 for (uint i = 0; i < parallel_marking_threads(); ++i) { 2742 CMTask* task = _tasks[i]; 2743 HeapWord* task_finger = task->finger(); 2744 if (task_finger != NULL && task_finger < _heap_end) { 2745 // See above note on the global finger verification. 2746 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 2747 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 2748 !task_hr->in_collection_set(), 2749 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 2750 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 2751 } 2752 } 2753 } 2754 #endif // PRODUCT 2755 2756 // Aggregate the counting data that was constructed concurrently 2757 // with marking. 2758 class AggregateCountDataHRClosure: public HeapRegionClosure { 2759 G1CollectedHeap* _g1h; 2760 ConcurrentMark* _cm; 2761 CardTableModRefBS* _ct_bs; 2762 BitMap* _cm_card_bm; 2763 uint _max_worker_id; 2764 2765 public: 2766 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2767 BitMap* cm_card_bm, 2768 uint max_worker_id) : 2769 _g1h(g1h), _cm(g1h->concurrent_mark()), 2770 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 2771 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 2772 2773 bool doHeapRegion(HeapRegion* hr) { 2774 if (hr->is_continues_humongous()) { 2775 // We will ignore these here and process them when their 2776 // associated "starts humongous" region is processed. 2777 // Note that we cannot rely on their associated 2778 // "starts humongous" region to have their bit set to 1 2779 // since, due to the region chunking in the parallel region 2780 // iteration, a "continues humongous" region might be visited 2781 // before its associated "starts humongous". 2782 return false; 2783 } 2784 2785 HeapWord* start = hr->bottom(); 2786 HeapWord* limit = hr->next_top_at_mark_start(); 2787 HeapWord* end = hr->end(); 2788 2789 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2790 "Preconditions not met - " 2791 "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", " 2792 "top: " PTR_FORMAT ", end: " PTR_FORMAT, 2793 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())); 2794 2795 assert(hr->next_marked_bytes() == 0, "Precondition"); 2796 2797 if (start == limit) { 2798 // NTAMS of this region has not been set so nothing to do. 2799 return false; 2800 } 2801 2802 // 'start' should be in the heap. 2803 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2804 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 2805 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2806 2807 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2808 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2809 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2810 2811 // If ntams is not card aligned then we bump card bitmap index 2812 // for limit so that we get the all the cards spanned by 2813 // the object ending at ntams. 2814 // Note: if this is the last region in the heap then ntams 2815 // could be actually just beyond the end of the the heap; 2816 // limit_idx will then correspond to a (non-existent) card 2817 // that is also outside the heap. 2818 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2819 limit_idx += 1; 2820 } 2821 2822 assert(limit_idx <= end_idx, "or else use atomics"); 2823 2824 // Aggregate the "stripe" in the count data associated with hr. 2825 uint hrm_index = hr->hrm_index(); 2826 size_t marked_bytes = 0; 2827 2828 for (uint i = 0; i < _max_worker_id; i += 1) { 2829 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2830 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2831 2832 // Fetch the marked_bytes in this region for task i and 2833 // add it to the running total for this region. 2834 marked_bytes += marked_bytes_array[hrm_index]; 2835 2836 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 2837 // into the global card bitmap. 2838 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2839 2840 while (scan_idx < limit_idx) { 2841 assert(task_card_bm->at(scan_idx) == true, "should be"); 2842 _cm_card_bm->set_bit(scan_idx); 2843 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2844 2845 // BitMap::get_next_one_offset() can handle the case when 2846 // its left_offset parameter is greater than its right_offset 2847 // parameter. It does, however, have an early exit if 2848 // left_offset == right_offset. So let's limit the value 2849 // passed in for left offset here. 2850 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2851 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2852 } 2853 } 2854 2855 // Update the marked bytes for this region. 2856 hr->add_to_marked_bytes(marked_bytes); 2857 2858 // Next heap region 2859 return false; 2860 } 2861 }; 2862 2863 class G1AggregateCountDataTask: public AbstractGangTask { 2864 protected: 2865 G1CollectedHeap* _g1h; 2866 ConcurrentMark* _cm; 2867 BitMap* _cm_card_bm; 2868 uint _max_worker_id; 2869 uint _active_workers; 2870 HeapRegionClaimer _hrclaimer; 2871 2872 public: 2873 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2874 ConcurrentMark* cm, 2875 BitMap* cm_card_bm, 2876 uint max_worker_id, 2877 uint n_workers) : 2878 AbstractGangTask("Count Aggregation"), 2879 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2880 _max_worker_id(max_worker_id), 2881 _active_workers(n_workers), 2882 _hrclaimer(_active_workers) { 2883 } 2884 2885 void work(uint worker_id) { 2886 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 2887 2888 _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer); 2889 } 2890 }; 2891 2892 2893 void ConcurrentMark::aggregate_count_data() { 2894 uint n_workers = _g1h->workers()->active_workers(); 2895 2896 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 2897 _max_worker_id, n_workers); 2898 2899 _g1h->workers()->run_task(&g1_par_agg_task); 2900 } 2901 2902 // Clear the per-worker arrays used to store the per-region counting data 2903 void ConcurrentMark::clear_all_count_data() { 2904 // Clear the global card bitmap - it will be filled during 2905 // liveness count aggregation (during remark) and the 2906 // final counting task. 2907 _card_bm.clear(); 2908 2909 // Clear the global region bitmap - it will be filled as part 2910 // of the final counting task. 2911 _region_bm.clear(); 2912 2913 uint max_regions = _g1h->max_regions(); 2914 assert(_max_worker_id > 0, "uninitialized"); 2915 2916 for (uint i = 0; i < _max_worker_id; i += 1) { 2917 BitMap* task_card_bm = count_card_bitmap_for(i); 2918 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 2919 2920 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 2921 assert(marked_bytes_array != NULL, "uninitialized"); 2922 2923 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 2924 task_card_bm->clear(); 2925 } 2926 } 2927 2928 void ConcurrentMark::print_stats() { 2929 if (verbose_stats()) { 2930 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2931 for (size_t i = 0; i < _active_tasks; ++i) { 2932 _tasks[i]->print_stats(); 2933 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2934 } 2935 } 2936 } 2937 2938 // abandon current marking iteration due to a Full GC 2939 void ConcurrentMark::abort() { 2940 if (!cmThread()->during_cycle() || _has_aborted) { 2941 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2942 return; 2943 } 2944 2945 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2946 // concurrent bitmap clearing. 2947 _nextMarkBitMap->clearAll(); 2948 2949 // Note we cannot clear the previous marking bitmap here 2950 // since VerifyDuringGC verifies the objects marked during 2951 // a full GC against the previous bitmap. 2952 2953 // Clear the liveness counting data 2954 clear_all_count_data(); 2955 // Empty mark stack 2956 reset_marking_state(); 2957 for (uint i = 0; i < _max_worker_id; ++i) { 2958 _tasks[i]->clear_region_fields(); 2959 } 2960 _first_overflow_barrier_sync.abort(); 2961 _second_overflow_barrier_sync.abort(); 2962 _has_aborted = true; 2963 2964 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2965 satb_mq_set.abandon_partial_marking(); 2966 // This can be called either during or outside marking, we'll read 2967 // the expected_active value from the SATB queue set. 2968 satb_mq_set.set_active_all_threads( 2969 false, /* new active value */ 2970 satb_mq_set.is_active() /* expected_active */); 2971 2972 _g1h->trace_heap_after_concurrent_cycle(); 2973 _g1h->register_concurrent_cycle_end(); 2974 } 2975 2976 static void print_ms_time_info(const char* prefix, const char* name, 2977 NumberSeq& ns) { 2978 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2979 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2980 if (ns.num() > 0) { 2981 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2982 prefix, ns.sd(), ns.maximum()); 2983 } 2984 } 2985 2986 void ConcurrentMark::print_summary_info() { 2987 gclog_or_tty->print_cr(" Concurrent marking:"); 2988 print_ms_time_info(" ", "init marks", _init_times); 2989 print_ms_time_info(" ", "remarks", _remark_times); 2990 { 2991 print_ms_time_info(" ", "final marks", _remark_mark_times); 2992 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2993 2994 } 2995 print_ms_time_info(" ", "cleanups", _cleanup_times); 2996 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 2997 _total_counting_time, 2998 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 2999 (double)_cleanup_times.num() 3000 : 0.0)); 3001 if (G1ScrubRemSets) { 3002 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3003 _total_rs_scrub_time, 3004 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3005 (double)_cleanup_times.num() 3006 : 0.0)); 3007 } 3008 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3009 (_init_times.sum() + _remark_times.sum() + 3010 _cleanup_times.sum())/1000.0); 3011 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3012 "(%8.2f s marking).", 3013 cmThread()->vtime_accum(), 3014 cmThread()->vtime_mark_accum()); 3015 } 3016 3017 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3018 _parallel_workers->print_worker_threads_on(st); 3019 } 3020 3021 void ConcurrentMark::print_on_error(outputStream* st) const { 3022 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 3023 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 3024 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 3025 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 3026 } 3027 3028 // We take a break if someone is trying to stop the world. 3029 bool ConcurrentMark::do_yield_check(uint worker_id) { 3030 if (SuspendibleThreadSet::should_yield()) { 3031 if (worker_id == 0) { 3032 _g1h->g1_policy()->record_concurrent_pause(); 3033 } 3034 SuspendibleThreadSet::yield(); 3035 return true; 3036 } else { 3037 return false; 3038 } 3039 } 3040 3041 #ifndef PRODUCT 3042 // for debugging purposes 3043 void ConcurrentMark::print_finger() { 3044 gclog_or_tty->print_cr("heap [" PTR_FORMAT ", " PTR_FORMAT "), global finger = " PTR_FORMAT, 3045 p2i(_heap_start), p2i(_heap_end), p2i(_finger)); 3046 for (uint i = 0; i < _max_worker_id; ++i) { 3047 gclog_or_tty->print(" %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger())); 3048 } 3049 gclog_or_tty->cr(); 3050 } 3051 #endif 3052 3053 // Closure for iteration over bitmaps 3054 class CMBitMapClosure : public BitMapClosure { 3055 private: 3056 // the bitmap that is being iterated over 3057 CMBitMap* _nextMarkBitMap; 3058 ConcurrentMark* _cm; 3059 CMTask* _task; 3060 3061 public: 3062 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3063 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3064 3065 bool do_bit(size_t offset) { 3066 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3067 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3068 assert( addr < _cm->finger(), "invariant"); 3069 assert(addr >= _task->finger(), "invariant"); 3070 3071 // We move that task's local finger along. 3072 _task->move_finger_to(addr); 3073 3074 _task->scan_object(oop(addr)); 3075 // we only partially drain the local queue and global stack 3076 _task->drain_local_queue(true); 3077 _task->drain_global_stack(true); 3078 3079 // if the has_aborted flag has been raised, we need to bail out of 3080 // the iteration 3081 return !_task->has_aborted(); 3082 } 3083 }; 3084 3085 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 3086 ReferenceProcessor* result = NULL; 3087 if (G1UseConcMarkReferenceProcessing) { 3088 result = g1h->ref_processor_cm(); 3089 assert(result != NULL, "should not be NULL"); 3090 } 3091 return result; 3092 } 3093 3094 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3095 ConcurrentMark* cm, 3096 CMTask* task) 3097 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 3098 _g1h(g1h), _cm(cm), _task(task) 3099 { } 3100 3101 void CMTask::setup_for_region(HeapRegion* hr) { 3102 assert(hr != NULL, 3103 "claim_region() should have filtered out NULL regions"); 3104 assert(!hr->is_continues_humongous(), 3105 "claim_region() should have filtered out continues humongous regions"); 3106 3107 if (_cm->verbose_low()) { 3108 gclog_or_tty->print_cr("[%u] setting up for region " PTR_FORMAT, 3109 _worker_id, p2i(hr)); 3110 } 3111 3112 _curr_region = hr; 3113 _finger = hr->bottom(); 3114 update_region_limit(); 3115 } 3116 3117 void CMTask::update_region_limit() { 3118 HeapRegion* hr = _curr_region; 3119 HeapWord* bottom = hr->bottom(); 3120 HeapWord* limit = hr->next_top_at_mark_start(); 3121 3122 if (limit == bottom) { 3123 if (_cm->verbose_low()) { 3124 gclog_or_tty->print_cr("[%u] found an empty region " 3125 "[" PTR_FORMAT ", " PTR_FORMAT ")", 3126 _worker_id, p2i(bottom), p2i(limit)); 3127 } 3128 // The region was collected underneath our feet. 3129 // We set the finger to bottom to ensure that the bitmap 3130 // iteration that will follow this will not do anything. 3131 // (this is not a condition that holds when we set the region up, 3132 // as the region is not supposed to be empty in the first place) 3133 _finger = bottom; 3134 } else if (limit >= _region_limit) { 3135 assert(limit >= _finger, "peace of mind"); 3136 } else { 3137 assert(limit < _region_limit, "only way to get here"); 3138 // This can happen under some pretty unusual circumstances. An 3139 // evacuation pause empties the region underneath our feet (NTAMS 3140 // at bottom). We then do some allocation in the region (NTAMS 3141 // stays at bottom), followed by the region being used as a GC 3142 // alloc region (NTAMS will move to top() and the objects 3143 // originally below it will be grayed). All objects now marked in 3144 // the region are explicitly grayed, if below the global finger, 3145 // and we do not need in fact to scan anything else. So, we simply 3146 // set _finger to be limit to ensure that the bitmap iteration 3147 // doesn't do anything. 3148 _finger = limit; 3149 } 3150 3151 _region_limit = limit; 3152 } 3153 3154 void CMTask::giveup_current_region() { 3155 assert(_curr_region != NULL, "invariant"); 3156 if (_cm->verbose_low()) { 3157 gclog_or_tty->print_cr("[%u] giving up region " PTR_FORMAT, 3158 _worker_id, p2i(_curr_region)); 3159 } 3160 clear_region_fields(); 3161 } 3162 3163 void CMTask::clear_region_fields() { 3164 // Values for these three fields that indicate that we're not 3165 // holding on to a region. 3166 _curr_region = NULL; 3167 _finger = NULL; 3168 _region_limit = NULL; 3169 } 3170 3171 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3172 if (cm_oop_closure == NULL) { 3173 assert(_cm_oop_closure != NULL, "invariant"); 3174 } else { 3175 assert(_cm_oop_closure == NULL, "invariant"); 3176 } 3177 _cm_oop_closure = cm_oop_closure; 3178 } 3179 3180 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3181 guarantee(nextMarkBitMap != NULL, "invariant"); 3182 3183 if (_cm->verbose_low()) { 3184 gclog_or_tty->print_cr("[%u] resetting", _worker_id); 3185 } 3186 3187 _nextMarkBitMap = nextMarkBitMap; 3188 clear_region_fields(); 3189 3190 _calls = 0; 3191 _elapsed_time_ms = 0.0; 3192 _termination_time_ms = 0.0; 3193 _termination_start_time_ms = 0.0; 3194 } 3195 3196 bool CMTask::should_exit_termination() { 3197 regular_clock_call(); 3198 // This is called when we are in the termination protocol. We should 3199 // quit if, for some reason, this task wants to abort or the global 3200 // stack is not empty (this means that we can get work from it). 3201 return !_cm->mark_stack_empty() || has_aborted(); 3202 } 3203 3204 void CMTask::reached_limit() { 3205 assert(_words_scanned >= _words_scanned_limit || 3206 _refs_reached >= _refs_reached_limit , 3207 "shouldn't have been called otherwise"); 3208 regular_clock_call(); 3209 } 3210 3211 void CMTask::regular_clock_call() { 3212 if (has_aborted()) return; 3213 3214 // First, we need to recalculate the words scanned and refs reached 3215 // limits for the next clock call. 3216 recalculate_limits(); 3217 3218 // During the regular clock call we do the following 3219 3220 // (1) If an overflow has been flagged, then we abort. 3221 if (_cm->has_overflown()) { 3222 set_has_aborted(); 3223 return; 3224 } 3225 3226 // If we are not concurrent (i.e. we're doing remark) we don't need 3227 // to check anything else. The other steps are only needed during 3228 // the concurrent marking phase. 3229 if (!concurrent()) return; 3230 3231 // (2) If marking has been aborted for Full GC, then we also abort. 3232 if (_cm->has_aborted()) { 3233 set_has_aborted(); 3234 return; 3235 } 3236 3237 double curr_time_ms = os::elapsedVTime() * 1000.0; 3238 3239 // (4) We check whether we should yield. If we have to, then we abort. 3240 if (SuspendibleThreadSet::should_yield()) { 3241 // We should yield. To do this we abort the task. The caller is 3242 // responsible for yielding. 3243 set_has_aborted(); 3244 return; 3245 } 3246 3247 // (5) We check whether we've reached our time quota. If we have, 3248 // then we abort. 3249 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3250 if (elapsed_time_ms > _time_target_ms) { 3251 set_has_aborted(); 3252 _has_timed_out = true; 3253 return; 3254 } 3255 3256 // (6) Finally, we check whether there are enough completed STAB 3257 // buffers available for processing. If there are, we abort. 3258 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3259 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3260 if (_cm->verbose_low()) { 3261 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers", 3262 _worker_id); 3263 } 3264 // we do need to process SATB buffers, we'll abort and restart 3265 // the marking task to do so 3266 set_has_aborted(); 3267 return; 3268 } 3269 } 3270 3271 void CMTask::recalculate_limits() { 3272 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3273 _words_scanned_limit = _real_words_scanned_limit; 3274 3275 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3276 _refs_reached_limit = _real_refs_reached_limit; 3277 } 3278 3279 void CMTask::decrease_limits() { 3280 // This is called when we believe that we're going to do an infrequent 3281 // operation which will increase the per byte scanned cost (i.e. move 3282 // entries to/from the global stack). It basically tries to decrease the 3283 // scanning limit so that the clock is called earlier. 3284 3285 if (_cm->verbose_medium()) { 3286 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id); 3287 } 3288 3289 _words_scanned_limit = _real_words_scanned_limit - 3290 3 * words_scanned_period / 4; 3291 _refs_reached_limit = _real_refs_reached_limit - 3292 3 * refs_reached_period / 4; 3293 } 3294 3295 void CMTask::move_entries_to_global_stack() { 3296 // local array where we'll store the entries that will be popped 3297 // from the local queue 3298 oop buffer[global_stack_transfer_size]; 3299 3300 int n = 0; 3301 oop obj; 3302 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3303 buffer[n] = obj; 3304 ++n; 3305 } 3306 3307 if (n > 0) { 3308 // we popped at least one entry from the local queue 3309 3310 if (!_cm->mark_stack_push(buffer, n)) { 3311 if (_cm->verbose_low()) { 3312 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow", 3313 _worker_id); 3314 } 3315 set_has_aborted(); 3316 } else { 3317 // the transfer was successful 3318 3319 if (_cm->verbose_medium()) { 3320 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack", 3321 _worker_id, n); 3322 } 3323 } 3324 } 3325 3326 // this operation was quite expensive, so decrease the limits 3327 decrease_limits(); 3328 } 3329 3330 void CMTask::get_entries_from_global_stack() { 3331 // local array where we'll store the entries that will be popped 3332 // from the global stack. 3333 oop buffer[global_stack_transfer_size]; 3334 int n; 3335 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3336 assert(n <= global_stack_transfer_size, 3337 "we should not pop more than the given limit"); 3338 if (n > 0) { 3339 // yes, we did actually pop at least one entry 3340 if (_cm->verbose_medium()) { 3341 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack", 3342 _worker_id, n); 3343 } 3344 for (int i = 0; i < n; ++i) { 3345 bool success = _task_queue->push(buffer[i]); 3346 // We only call this when the local queue is empty or under a 3347 // given target limit. So, we do not expect this push to fail. 3348 assert(success, "invariant"); 3349 } 3350 } 3351 3352 // this operation was quite expensive, so decrease the limits 3353 decrease_limits(); 3354 } 3355 3356 void CMTask::drain_local_queue(bool partially) { 3357 if (has_aborted()) return; 3358 3359 // Decide what the target size is, depending whether we're going to 3360 // drain it partially (so that other tasks can steal if they run out 3361 // of things to do) or totally (at the very end). 3362 size_t target_size; 3363 if (partially) { 3364 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3365 } else { 3366 target_size = 0; 3367 } 3368 3369 if (_task_queue->size() > target_size) { 3370 if (_cm->verbose_high()) { 3371 gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT, 3372 _worker_id, target_size); 3373 } 3374 3375 oop obj; 3376 bool ret = _task_queue->pop_local(obj); 3377 while (ret) { 3378 if (_cm->verbose_high()) { 3379 gclog_or_tty->print_cr("[%u] popped " PTR_FORMAT, _worker_id, 3380 p2i((void*) obj)); 3381 } 3382 3383 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3384 assert(!_g1h->is_on_master_free_list( 3385 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3386 3387 scan_object(obj); 3388 3389 if (_task_queue->size() <= target_size || has_aborted()) { 3390 ret = false; 3391 } else { 3392 ret = _task_queue->pop_local(obj); 3393 } 3394 } 3395 3396 if (_cm->verbose_high()) { 3397 gclog_or_tty->print_cr("[%u] drained local queue, size = %u", 3398 _worker_id, _task_queue->size()); 3399 } 3400 } 3401 } 3402 3403 void CMTask::drain_global_stack(bool partially) { 3404 if (has_aborted()) return; 3405 3406 // We have a policy to drain the local queue before we attempt to 3407 // drain the global stack. 3408 assert(partially || _task_queue->size() == 0, "invariant"); 3409 3410 // Decide what the target size is, depending whether we're going to 3411 // drain it partially (so that other tasks can steal if they run out 3412 // of things to do) or totally (at the very end). Notice that, 3413 // because we move entries from the global stack in chunks or 3414 // because another task might be doing the same, we might in fact 3415 // drop below the target. But, this is not a problem. 3416 size_t target_size; 3417 if (partially) { 3418 target_size = _cm->partial_mark_stack_size_target(); 3419 } else { 3420 target_size = 0; 3421 } 3422 3423 if (_cm->mark_stack_size() > target_size) { 3424 if (_cm->verbose_low()) { 3425 gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT, 3426 _worker_id, target_size); 3427 } 3428 3429 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3430 get_entries_from_global_stack(); 3431 drain_local_queue(partially); 3432 } 3433 3434 if (_cm->verbose_low()) { 3435 gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT, 3436 _worker_id, _cm->mark_stack_size()); 3437 } 3438 } 3439 } 3440 3441 // SATB Queue has several assumptions on whether to call the par or 3442 // non-par versions of the methods. this is why some of the code is 3443 // replicated. We should really get rid of the single-threaded version 3444 // of the code to simplify things. 3445 void CMTask::drain_satb_buffers() { 3446 if (has_aborted()) return; 3447 3448 // We set this so that the regular clock knows that we're in the 3449 // middle of draining buffers and doesn't set the abort flag when it 3450 // notices that SATB buffers are available for draining. It'd be 3451 // very counter productive if it did that. :-) 3452 _draining_satb_buffers = true; 3453 3454 CMSATBBufferClosure satb_cl(this, _g1h); 3455 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3456 3457 // This keeps claiming and applying the closure to completed buffers 3458 // until we run out of buffers or we need to abort. 3459 while (!has_aborted() && 3460 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 3461 if (_cm->verbose_medium()) { 3462 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3463 } 3464 regular_clock_call(); 3465 } 3466 3467 _draining_satb_buffers = false; 3468 3469 assert(has_aborted() || 3470 concurrent() || 3471 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3472 3473 // again, this was a potentially expensive operation, decrease the 3474 // limits to get the regular clock call early 3475 decrease_limits(); 3476 } 3477 3478 void CMTask::print_stats() { 3479 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 3480 _worker_id, _calls); 3481 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3482 _elapsed_time_ms, _termination_time_ms); 3483 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3484 _step_times_ms.num(), _step_times_ms.avg(), 3485 _step_times_ms.sd()); 3486 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3487 _step_times_ms.maximum(), _step_times_ms.sum()); 3488 } 3489 3490 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) { 3491 return _task_queues->steal(worker_id, hash_seed, obj); 3492 } 3493 3494 /***************************************************************************** 3495 3496 The do_marking_step(time_target_ms, ...) method is the building 3497 block of the parallel marking framework. It can be called in parallel 3498 with other invocations of do_marking_step() on different tasks 3499 (but only one per task, obviously) and concurrently with the 3500 mutator threads, or during remark, hence it eliminates the need 3501 for two versions of the code. When called during remark, it will 3502 pick up from where the task left off during the concurrent marking 3503 phase. Interestingly, tasks are also claimable during evacuation 3504 pauses too, since do_marking_step() ensures that it aborts before 3505 it needs to yield. 3506 3507 The data structures that it uses to do marking work are the 3508 following: 3509 3510 (1) Marking Bitmap. If there are gray objects that appear only 3511 on the bitmap (this happens either when dealing with an overflow 3512 or when the initial marking phase has simply marked the roots 3513 and didn't push them on the stack), then tasks claim heap 3514 regions whose bitmap they then scan to find gray objects. A 3515 global finger indicates where the end of the last claimed region 3516 is. A local finger indicates how far into the region a task has 3517 scanned. The two fingers are used to determine how to gray an 3518 object (i.e. whether simply marking it is OK, as it will be 3519 visited by a task in the future, or whether it needs to be also 3520 pushed on a stack). 3521 3522 (2) Local Queue. The local queue of the task which is accessed 3523 reasonably efficiently by the task. Other tasks can steal from 3524 it when they run out of work. Throughout the marking phase, a 3525 task attempts to keep its local queue short but not totally 3526 empty, so that entries are available for stealing by other 3527 tasks. Only when there is no more work, a task will totally 3528 drain its local queue. 3529 3530 (3) Global Mark Stack. This handles local queue overflow. During 3531 marking only sets of entries are moved between it and the local 3532 queues, as access to it requires a mutex and more fine-grain 3533 interaction with it which might cause contention. If it 3534 overflows, then the marking phase should restart and iterate 3535 over the bitmap to identify gray objects. Throughout the marking 3536 phase, tasks attempt to keep the global mark stack at a small 3537 length but not totally empty, so that entries are available for 3538 popping by other tasks. Only when there is no more work, tasks 3539 will totally drain the global mark stack. 3540 3541 (4) SATB Buffer Queue. This is where completed SATB buffers are 3542 made available. Buffers are regularly removed from this queue 3543 and scanned for roots, so that the queue doesn't get too 3544 long. During remark, all completed buffers are processed, as 3545 well as the filled in parts of any uncompleted buffers. 3546 3547 The do_marking_step() method tries to abort when the time target 3548 has been reached. There are a few other cases when the 3549 do_marking_step() method also aborts: 3550 3551 (1) When the marking phase has been aborted (after a Full GC). 3552 3553 (2) When a global overflow (on the global stack) has been 3554 triggered. Before the task aborts, it will actually sync up with 3555 the other tasks to ensure that all the marking data structures 3556 (local queues, stacks, fingers etc.) are re-initialized so that 3557 when do_marking_step() completes, the marking phase can 3558 immediately restart. 3559 3560 (3) When enough completed SATB buffers are available. The 3561 do_marking_step() method only tries to drain SATB buffers right 3562 at the beginning. So, if enough buffers are available, the 3563 marking step aborts and the SATB buffers are processed at 3564 the beginning of the next invocation. 3565 3566 (4) To yield. when we have to yield then we abort and yield 3567 right at the end of do_marking_step(). This saves us from a lot 3568 of hassle as, by yielding we might allow a Full GC. If this 3569 happens then objects will be compacted underneath our feet, the 3570 heap might shrink, etc. We save checking for this by just 3571 aborting and doing the yield right at the end. 3572 3573 From the above it follows that the do_marking_step() method should 3574 be called in a loop (or, otherwise, regularly) until it completes. 3575 3576 If a marking step completes without its has_aborted() flag being 3577 true, it means it has completed the current marking phase (and 3578 also all other marking tasks have done so and have all synced up). 3579 3580 A method called regular_clock_call() is invoked "regularly" (in 3581 sub ms intervals) throughout marking. It is this clock method that 3582 checks all the abort conditions which were mentioned above and 3583 decides when the task should abort. A work-based scheme is used to 3584 trigger this clock method: when the number of object words the 3585 marking phase has scanned or the number of references the marking 3586 phase has visited reach a given limit. Additional invocations to 3587 the method clock have been planted in a few other strategic places 3588 too. The initial reason for the clock method was to avoid calling 3589 vtime too regularly, as it is quite expensive. So, once it was in 3590 place, it was natural to piggy-back all the other conditions on it 3591 too and not constantly check them throughout the code. 3592 3593 If do_termination is true then do_marking_step will enter its 3594 termination protocol. 3595 3596 The value of is_serial must be true when do_marking_step is being 3597 called serially (i.e. by the VMThread) and do_marking_step should 3598 skip any synchronization in the termination and overflow code. 3599 Examples include the serial remark code and the serial reference 3600 processing closures. 3601 3602 The value of is_serial must be false when do_marking_step is 3603 being called by any of the worker threads in a work gang. 3604 Examples include the concurrent marking code (CMMarkingTask), 3605 the MT remark code, and the MT reference processing closures. 3606 3607 *****************************************************************************/ 3608 3609 void CMTask::do_marking_step(double time_target_ms, 3610 bool do_termination, 3611 bool is_serial) { 3612 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3613 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3614 3615 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3616 assert(_task_queues != NULL, "invariant"); 3617 assert(_task_queue != NULL, "invariant"); 3618 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 3619 3620 assert(!_claimed, 3621 "only one thread should claim this task at any one time"); 3622 3623 // OK, this doesn't safeguard again all possible scenarios, as it is 3624 // possible for two threads to set the _claimed flag at the same 3625 // time. But it is only for debugging purposes anyway and it will 3626 // catch most problems. 3627 _claimed = true; 3628 3629 _start_time_ms = os::elapsedVTime() * 1000.0; 3630 3631 // If do_stealing is true then do_marking_step will attempt to 3632 // steal work from the other CMTasks. It only makes sense to 3633 // enable stealing when the termination protocol is enabled 3634 // and do_marking_step() is not being called serially. 3635 bool do_stealing = do_termination && !is_serial; 3636 3637 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 3638 _time_target_ms = time_target_ms - diff_prediction_ms; 3639 3640 // set up the variables that are used in the work-based scheme to 3641 // call the regular clock method 3642 _words_scanned = 0; 3643 _refs_reached = 0; 3644 recalculate_limits(); 3645 3646 // clear all flags 3647 clear_has_aborted(); 3648 _has_timed_out = false; 3649 _draining_satb_buffers = false; 3650 3651 ++_calls; 3652 3653 if (_cm->verbose_low()) { 3654 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, " 3655 "target = %1.2lfms >>>>>>>>>>", 3656 _worker_id, _calls, _time_target_ms); 3657 } 3658 3659 // Set up the bitmap and oop closures. Anything that uses them is 3660 // eventually called from this method, so it is OK to allocate these 3661 // statically. 3662 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3663 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3664 set_cm_oop_closure(&cm_oop_closure); 3665 3666 if (_cm->has_overflown()) { 3667 // This can happen if the mark stack overflows during a GC pause 3668 // and this task, after a yield point, restarts. We have to abort 3669 // as we need to get into the overflow protocol which happens 3670 // right at the end of this task. 3671 set_has_aborted(); 3672 } 3673 3674 // First drain any available SATB buffers. After this, we will not 3675 // look at SATB buffers before the next invocation of this method. 3676 // If enough completed SATB buffers are queued up, the regular clock 3677 // will abort this task so that it restarts. 3678 drain_satb_buffers(); 3679 // ...then partially drain the local queue and the global stack 3680 drain_local_queue(true); 3681 drain_global_stack(true); 3682 3683 do { 3684 if (!has_aborted() && _curr_region != NULL) { 3685 // This means that we're already holding on to a region. 3686 assert(_finger != NULL, "if region is not NULL, then the finger " 3687 "should not be NULL either"); 3688 3689 // We might have restarted this task after an evacuation pause 3690 // which might have evacuated the region we're holding on to 3691 // underneath our feet. Let's read its limit again to make sure 3692 // that we do not iterate over a region of the heap that 3693 // contains garbage (update_region_limit() will also move 3694 // _finger to the start of the region if it is found empty). 3695 update_region_limit(); 3696 // We will start from _finger not from the start of the region, 3697 // as we might be restarting this task after aborting half-way 3698 // through scanning this region. In this case, _finger points to 3699 // the address where we last found a marked object. If this is a 3700 // fresh region, _finger points to start(). 3701 MemRegion mr = MemRegion(_finger, _region_limit); 3702 3703 if (_cm->verbose_low()) { 3704 gclog_or_tty->print_cr("[%u] we're scanning part " 3705 "[" PTR_FORMAT ", " PTR_FORMAT ") " 3706 "of region " HR_FORMAT, 3707 _worker_id, p2i(_finger), p2i(_region_limit), 3708 HR_FORMAT_PARAMS(_curr_region)); 3709 } 3710 3711 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 3712 "humongous regions should go around loop once only"); 3713 3714 // Some special cases: 3715 // If the memory region is empty, we can just give up the region. 3716 // If the current region is humongous then we only need to check 3717 // the bitmap for the bit associated with the start of the object, 3718 // scan the object if it's live, and give up the region. 3719 // Otherwise, let's iterate over the bitmap of the part of the region 3720 // that is left. 3721 // If the iteration is successful, give up the region. 3722 if (mr.is_empty()) { 3723 giveup_current_region(); 3724 regular_clock_call(); 3725 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 3726 if (_nextMarkBitMap->isMarked(mr.start())) { 3727 // The object is marked - apply the closure 3728 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 3729 bitmap_closure.do_bit(offset); 3730 } 3731 // Even if this task aborted while scanning the humongous object 3732 // we can (and should) give up the current region. 3733 giveup_current_region(); 3734 regular_clock_call(); 3735 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3736 giveup_current_region(); 3737 regular_clock_call(); 3738 } else { 3739 assert(has_aborted(), "currently the only way to do so"); 3740 // The only way to abort the bitmap iteration is to return 3741 // false from the do_bit() method. However, inside the 3742 // do_bit() method we move the _finger to point to the 3743 // object currently being looked at. So, if we bail out, we 3744 // have definitely set _finger to something non-null. 3745 assert(_finger != NULL, "invariant"); 3746 3747 // Region iteration was actually aborted. So now _finger 3748 // points to the address of the object we last scanned. If we 3749 // leave it there, when we restart this task, we will rescan 3750 // the object. It is easy to avoid this. We move the finger by 3751 // enough to point to the next possible object header (the 3752 // bitmap knows by how much we need to move it as it knows its 3753 // granularity). 3754 assert(_finger < _region_limit, "invariant"); 3755 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 3756 // Check if bitmap iteration was aborted while scanning the last object 3757 if (new_finger >= _region_limit) { 3758 giveup_current_region(); 3759 } else { 3760 move_finger_to(new_finger); 3761 } 3762 } 3763 } 3764 // At this point we have either completed iterating over the 3765 // region we were holding on to, or we have aborted. 3766 3767 // We then partially drain the local queue and the global stack. 3768 // (Do we really need this?) 3769 drain_local_queue(true); 3770 drain_global_stack(true); 3771 3772 // Read the note on the claim_region() method on why it might 3773 // return NULL with potentially more regions available for 3774 // claiming and why we have to check out_of_regions() to determine 3775 // whether we're done or not. 3776 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 3777 // We are going to try to claim a new region. We should have 3778 // given up on the previous one. 3779 // Separated the asserts so that we know which one fires. 3780 assert(_curr_region == NULL, "invariant"); 3781 assert(_finger == NULL, "invariant"); 3782 assert(_region_limit == NULL, "invariant"); 3783 if (_cm->verbose_low()) { 3784 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id); 3785 } 3786 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 3787 if (claimed_region != NULL) { 3788 // Yes, we managed to claim one 3789 if (_cm->verbose_low()) { 3790 gclog_or_tty->print_cr("[%u] we successfully claimed " 3791 "region " PTR_FORMAT, 3792 _worker_id, p2i(claimed_region)); 3793 } 3794 3795 setup_for_region(claimed_region); 3796 assert(_curr_region == claimed_region, "invariant"); 3797 } 3798 // It is important to call the regular clock here. It might take 3799 // a while to claim a region if, for example, we hit a large 3800 // block of empty regions. So we need to call the regular clock 3801 // method once round the loop to make sure it's called 3802 // frequently enough. 3803 regular_clock_call(); 3804 } 3805 3806 if (!has_aborted() && _curr_region == NULL) { 3807 assert(_cm->out_of_regions(), 3808 "at this point we should be out of regions"); 3809 } 3810 } while ( _curr_region != NULL && !has_aborted()); 3811 3812 if (!has_aborted()) { 3813 // We cannot check whether the global stack is empty, since other 3814 // tasks might be pushing objects to it concurrently. 3815 assert(_cm->out_of_regions(), 3816 "at this point we should be out of regions"); 3817 3818 if (_cm->verbose_low()) { 3819 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id); 3820 } 3821 3822 // Try to reduce the number of available SATB buffers so that 3823 // remark has less work to do. 3824 drain_satb_buffers(); 3825 } 3826 3827 // Since we've done everything else, we can now totally drain the 3828 // local queue and global stack. 3829 drain_local_queue(false); 3830 drain_global_stack(false); 3831 3832 // Attempt at work stealing from other task's queues. 3833 if (do_stealing && !has_aborted()) { 3834 // We have not aborted. This means that we have finished all that 3835 // we could. Let's try to do some stealing... 3836 3837 // We cannot check whether the global stack is empty, since other 3838 // tasks might be pushing objects to it concurrently. 3839 assert(_cm->out_of_regions() && _task_queue->size() == 0, 3840 "only way to reach here"); 3841 3842 if (_cm->verbose_low()) { 3843 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id); 3844 } 3845 3846 while (!has_aborted()) { 3847 oop obj; 3848 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 3849 if (_cm->verbose_medium()) { 3850 gclog_or_tty->print_cr("[%u] stolen " PTR_FORMAT " successfully", 3851 _worker_id, p2i((void*) obj)); 3852 } 3853 3854 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 3855 "any stolen object should be marked"); 3856 scan_object(obj); 3857 3858 // And since we're towards the end, let's totally drain the 3859 // local queue and global stack. 3860 drain_local_queue(false); 3861 drain_global_stack(false); 3862 } else { 3863 break; 3864 } 3865 } 3866 } 3867 3868 // If we are about to wrap up and go into termination, check if we 3869 // should raise the overflow flag. 3870 if (do_termination && !has_aborted()) { 3871 if (_cm->force_overflow()->should_force()) { 3872 _cm->set_has_overflown(); 3873 regular_clock_call(); 3874 } 3875 } 3876 3877 // We still haven't aborted. Now, let's try to get into the 3878 // termination protocol. 3879 if (do_termination && !has_aborted()) { 3880 // We cannot check whether the global stack is empty, since other 3881 // tasks might be concurrently pushing objects on it. 3882 // Separated the asserts so that we know which one fires. 3883 assert(_cm->out_of_regions(), "only way to reach here"); 3884 assert(_task_queue->size() == 0, "only way to reach here"); 3885 3886 if (_cm->verbose_low()) { 3887 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id); 3888 } 3889 3890 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 3891 3892 // The CMTask class also extends the TerminatorTerminator class, 3893 // hence its should_exit_termination() method will also decide 3894 // whether to exit the termination protocol or not. 3895 bool finished = (is_serial || 3896 _cm->terminator()->offer_termination(this)); 3897 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 3898 _termination_time_ms += 3899 termination_end_time_ms - _termination_start_time_ms; 3900 3901 if (finished) { 3902 // We're all done. 3903 3904 if (_worker_id == 0) { 3905 // let's allow task 0 to do this 3906 if (concurrent()) { 3907 assert(_cm->concurrent_marking_in_progress(), "invariant"); 3908 // we need to set this to false before the next 3909 // safepoint. This way we ensure that the marking phase 3910 // doesn't observe any more heap expansions. 3911 _cm->clear_concurrent_marking_in_progress(); 3912 } 3913 } 3914 3915 // We can now guarantee that the global stack is empty, since 3916 // all other tasks have finished. We separated the guarantees so 3917 // that, if a condition is false, we can immediately find out 3918 // which one. 3919 guarantee(_cm->out_of_regions(), "only way to reach here"); 3920 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 3921 guarantee(_task_queue->size() == 0, "only way to reach here"); 3922 guarantee(!_cm->has_overflown(), "only way to reach here"); 3923 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 3924 3925 if (_cm->verbose_low()) { 3926 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id); 3927 } 3928 } else { 3929 // Apparently there's more work to do. Let's abort this task. It 3930 // will restart it and we can hopefully find more things to do. 3931 3932 if (_cm->verbose_low()) { 3933 gclog_or_tty->print_cr("[%u] apparently there is more work to do", 3934 _worker_id); 3935 } 3936 3937 set_has_aborted(); 3938 } 3939 } 3940 3941 // Mainly for debugging purposes to make sure that a pointer to the 3942 // closure which was statically allocated in this frame doesn't 3943 // escape it by accident. 3944 set_cm_oop_closure(NULL); 3945 double end_time_ms = os::elapsedVTime() * 1000.0; 3946 double elapsed_time_ms = end_time_ms - _start_time_ms; 3947 // Update the step history. 3948 _step_times_ms.add(elapsed_time_ms); 3949 3950 if (has_aborted()) { 3951 // The task was aborted for some reason. 3952 if (_has_timed_out) { 3953 double diff_ms = elapsed_time_ms - _time_target_ms; 3954 // Keep statistics of how well we did with respect to hitting 3955 // our target only if we actually timed out (if we aborted for 3956 // other reasons, then the results might get skewed). 3957 _marking_step_diffs_ms.add(diff_ms); 3958 } 3959 3960 if (_cm->has_overflown()) { 3961 // This is the interesting one. We aborted because a global 3962 // overflow was raised. This means we have to restart the 3963 // marking phase and start iterating over regions. However, in 3964 // order to do this we have to make sure that all tasks stop 3965 // what they are doing and re-initialize in a safe manner. We 3966 // will achieve this with the use of two barrier sync points. 3967 3968 if (_cm->verbose_low()) { 3969 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id); 3970 } 3971 3972 if (!is_serial) { 3973 // We only need to enter the sync barrier if being called 3974 // from a parallel context 3975 _cm->enter_first_sync_barrier(_worker_id); 3976 3977 // When we exit this sync barrier we know that all tasks have 3978 // stopped doing marking work. So, it's now safe to 3979 // re-initialize our data structures. At the end of this method, 3980 // task 0 will clear the global data structures. 3981 } 3982 3983 // We clear the local state of this task... 3984 clear_region_fields(); 3985 3986 if (!is_serial) { 3987 // ...and enter the second barrier. 3988 _cm->enter_second_sync_barrier(_worker_id); 3989 } 3990 // At this point, if we're during the concurrent phase of 3991 // marking, everything has been re-initialized and we're 3992 // ready to restart. 3993 } 3994 3995 if (_cm->verbose_low()) { 3996 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, " 3997 "elapsed = %1.2lfms <<<<<<<<<<", 3998 _worker_id, _time_target_ms, elapsed_time_ms); 3999 if (_cm->has_aborted()) { 4000 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========", 4001 _worker_id); 4002 } 4003 } 4004 } else { 4005 if (_cm->verbose_low()) { 4006 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4007 "elapsed = %1.2lfms <<<<<<<<<<", 4008 _worker_id, _time_target_ms, elapsed_time_ms); 4009 } 4010 } 4011 4012 _claimed = false; 4013 } 4014 4015 CMTask::CMTask(uint worker_id, 4016 ConcurrentMark* cm, 4017 size_t* marked_bytes, 4018 BitMap* card_bm, 4019 CMTaskQueue* task_queue, 4020 CMTaskQueueSet* task_queues) 4021 : _g1h(G1CollectedHeap::heap()), 4022 _worker_id(worker_id), _cm(cm), 4023 _claimed(false), 4024 _nextMarkBitMap(NULL), _hash_seed(17), 4025 _task_queue(task_queue), 4026 _task_queues(task_queues), 4027 _cm_oop_closure(NULL), 4028 _marked_bytes_array(marked_bytes), 4029 _card_bm(card_bm) { 4030 guarantee(task_queue != NULL, "invariant"); 4031 guarantee(task_queues != NULL, "invariant"); 4032 4033 _marking_step_diffs_ms.add(0.5); 4034 } 4035 4036 // These are formatting macros that are used below to ensure 4037 // consistent formatting. The *_H_* versions are used to format the 4038 // header for a particular value and they should be kept consistent 4039 // with the corresponding macro. Also note that most of the macros add 4040 // the necessary white space (as a prefix) which makes them a bit 4041 // easier to compose. 4042 4043 // All the output lines are prefixed with this string to be able to 4044 // identify them easily in a large log file. 4045 #define G1PPRL_LINE_PREFIX "###" 4046 4047 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 4048 #ifdef _LP64 4049 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4050 #else // _LP64 4051 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4052 #endif // _LP64 4053 4054 // For per-region info 4055 #define G1PPRL_TYPE_FORMAT " %-4s" 4056 #define G1PPRL_TYPE_H_FORMAT " %4s" 4057 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 4058 #define G1PPRL_BYTE_H_FORMAT " %9s" 4059 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4060 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4061 4062 // For summary info 4063 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 4064 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 4065 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 4066 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 4067 4068 G1PrintRegionLivenessInfoClosure:: 4069 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4070 : _out(out), 4071 _total_used_bytes(0), _total_capacity_bytes(0), 4072 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4073 _hum_used_bytes(0), _hum_capacity_bytes(0), 4074 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 4075 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 4076 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4077 MemRegion g1_reserved = g1h->g1_reserved(); 4078 double now = os::elapsedTime(); 4079 4080 // Print the header of the output. 4081 _out->cr(); 4082 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4083 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4084 G1PPRL_SUM_ADDR_FORMAT("reserved") 4085 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4086 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 4087 HeapRegion::GrainBytes); 4088 _out->print_cr(G1PPRL_LINE_PREFIX); 4089 _out->print_cr(G1PPRL_LINE_PREFIX 4090 G1PPRL_TYPE_H_FORMAT 4091 G1PPRL_ADDR_BASE_H_FORMAT 4092 G1PPRL_BYTE_H_FORMAT 4093 G1PPRL_BYTE_H_FORMAT 4094 G1PPRL_BYTE_H_FORMAT 4095 G1PPRL_DOUBLE_H_FORMAT 4096 G1PPRL_BYTE_H_FORMAT 4097 G1PPRL_BYTE_H_FORMAT, 4098 "type", "address-range", 4099 "used", "prev-live", "next-live", "gc-eff", 4100 "remset", "code-roots"); 4101 _out->print_cr(G1PPRL_LINE_PREFIX 4102 G1PPRL_TYPE_H_FORMAT 4103 G1PPRL_ADDR_BASE_H_FORMAT 4104 G1PPRL_BYTE_H_FORMAT 4105 G1PPRL_BYTE_H_FORMAT 4106 G1PPRL_BYTE_H_FORMAT 4107 G1PPRL_DOUBLE_H_FORMAT 4108 G1PPRL_BYTE_H_FORMAT 4109 G1PPRL_BYTE_H_FORMAT, 4110 "", "", 4111 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 4112 "(bytes)", "(bytes)"); 4113 } 4114 4115 // It takes as a parameter a reference to one of the _hum_* fields, it 4116 // deduces the corresponding value for a region in a humongous region 4117 // series (either the region size, or what's left if the _hum_* field 4118 // is < the region size), and updates the _hum_* field accordingly. 4119 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4120 size_t bytes = 0; 4121 // The > 0 check is to deal with the prev and next live bytes which 4122 // could be 0. 4123 if (*hum_bytes > 0) { 4124 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4125 *hum_bytes -= bytes; 4126 } 4127 return bytes; 4128 } 4129 4130 // It deduces the values for a region in a humongous region series 4131 // from the _hum_* fields and updates those accordingly. It assumes 4132 // that that _hum_* fields have already been set up from the "starts 4133 // humongous" region and we visit the regions in address order. 4134 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4135 size_t* capacity_bytes, 4136 size_t* prev_live_bytes, 4137 size_t* next_live_bytes) { 4138 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4139 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4140 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4141 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4142 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4143 } 4144 4145 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4146 const char* type = r->get_type_str(); 4147 HeapWord* bottom = r->bottom(); 4148 HeapWord* end = r->end(); 4149 size_t capacity_bytes = r->capacity(); 4150 size_t used_bytes = r->used(); 4151 size_t prev_live_bytes = r->live_bytes(); 4152 size_t next_live_bytes = r->next_live_bytes(); 4153 double gc_eff = r->gc_efficiency(); 4154 size_t remset_bytes = r->rem_set()->mem_size(); 4155 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 4156 4157 if (r->is_starts_humongous()) { 4158 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4159 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4160 "they should have been zeroed after the last time we used them"); 4161 // Set up the _hum_* fields. 4162 _hum_capacity_bytes = capacity_bytes; 4163 _hum_used_bytes = used_bytes; 4164 _hum_prev_live_bytes = prev_live_bytes; 4165 _hum_next_live_bytes = next_live_bytes; 4166 get_hum_bytes(&used_bytes, &capacity_bytes, 4167 &prev_live_bytes, &next_live_bytes); 4168 end = bottom + HeapRegion::GrainWords; 4169 } else if (r->is_continues_humongous()) { 4170 get_hum_bytes(&used_bytes, &capacity_bytes, 4171 &prev_live_bytes, &next_live_bytes); 4172 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4173 } 4174 4175 _total_used_bytes += used_bytes; 4176 _total_capacity_bytes += capacity_bytes; 4177 _total_prev_live_bytes += prev_live_bytes; 4178 _total_next_live_bytes += next_live_bytes; 4179 _total_remset_bytes += remset_bytes; 4180 _total_strong_code_roots_bytes += strong_code_roots_bytes; 4181 4182 // Print a line for this particular region. 4183 _out->print_cr(G1PPRL_LINE_PREFIX 4184 G1PPRL_TYPE_FORMAT 4185 G1PPRL_ADDR_BASE_FORMAT 4186 G1PPRL_BYTE_FORMAT 4187 G1PPRL_BYTE_FORMAT 4188 G1PPRL_BYTE_FORMAT 4189 G1PPRL_DOUBLE_FORMAT 4190 G1PPRL_BYTE_FORMAT 4191 G1PPRL_BYTE_FORMAT, 4192 type, p2i(bottom), p2i(end), 4193 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 4194 remset_bytes, strong_code_roots_bytes); 4195 4196 return false; 4197 } 4198 4199 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4200 // add static memory usages to remembered set sizes 4201 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 4202 // Print the footer of the output. 4203 _out->print_cr(G1PPRL_LINE_PREFIX); 4204 _out->print_cr(G1PPRL_LINE_PREFIX 4205 " SUMMARY" 4206 G1PPRL_SUM_MB_FORMAT("capacity") 4207 G1PPRL_SUM_MB_PERC_FORMAT("used") 4208 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4209 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 4210 G1PPRL_SUM_MB_FORMAT("remset") 4211 G1PPRL_SUM_MB_FORMAT("code-roots"), 4212 bytes_to_mb(_total_capacity_bytes), 4213 bytes_to_mb(_total_used_bytes), 4214 perc(_total_used_bytes, _total_capacity_bytes), 4215 bytes_to_mb(_total_prev_live_bytes), 4216 perc(_total_prev_live_bytes, _total_capacity_bytes), 4217 bytes_to_mb(_total_next_live_bytes), 4218 perc(_total_next_live_bytes, _total_capacity_bytes), 4219 bytes_to_mb(_total_remset_bytes), 4220 bytes_to_mb(_total_strong_code_roots_bytes)); 4221 _out->cr(); 4222 }