1 /* 2 * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMark.inline.hpp" 30 #include "gc/g1/concurrentMarkThread.inline.hpp" 31 #include "gc/g1/g1CollectedHeap.inline.hpp" 32 #include "gc/g1/g1CollectorPolicy.hpp" 33 #include "gc/g1/g1CollectorState.hpp" 34 #include "gc/g1/g1OopClosures.inline.hpp" 35 #include "gc/g1/g1RemSet.hpp" 36 #include "gc/g1/g1StringDedup.hpp" 37 #include "gc/g1/heapRegion.inline.hpp" 38 #include "gc/g1/heapRegionManager.inline.hpp" 39 #include "gc/g1/heapRegionRemSet.hpp" 40 #include "gc/g1/heapRegionSet.inline.hpp" 41 #include "gc/g1/suspendibleThreadSet.hpp" 42 #include "gc/shared/gcId.hpp" 43 #include "gc/shared/gcTimer.hpp" 44 #include "gc/shared/gcTrace.hpp" 45 #include "gc/shared/gcTraceTime.inline.hpp" 46 #include "gc/shared/genOopClosures.inline.hpp" 47 #include "gc/shared/referencePolicy.hpp" 48 #include "gc/shared/strongRootsScope.hpp" 49 #include "gc/shared/taskqueue.inline.hpp" 50 #include "gc/shared/vmGCOperations.hpp" 51 #include "logging/log.hpp" 52 #include "memory/allocation.hpp" 53 #include "memory/resourceArea.hpp" 54 #include "oops/oop.inline.hpp" 55 #include "runtime/atomic.inline.hpp" 56 #include "runtime/handles.inline.hpp" 57 #include "runtime/java.hpp" 58 #include "runtime/prefetch.inline.hpp" 59 #include "services/memTracker.hpp" 60 61 // Concurrent marking bit map wrapper 62 63 CMBitMapRO::CMBitMapRO(int shifter) : 64 _bm(), 65 _shifter(shifter) { 66 _bmStartWord = 0; 67 _bmWordSize = 0; 68 } 69 70 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 71 const HeapWord* limit) const { 72 // First we must round addr *up* to a possible object boundary. 73 addr = (HeapWord*)align_size_up((intptr_t)addr, 74 HeapWordSize << _shifter); 75 size_t addrOffset = heapWordToOffset(addr); 76 assert(limit != NULL, "limit must not be NULL"); 77 size_t limitOffset = heapWordToOffset(limit); 78 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 79 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 80 assert(nextAddr >= addr, "get_next_one postcondition"); 81 assert(nextAddr == limit || isMarked(nextAddr), 82 "get_next_one postcondition"); 83 return nextAddr; 84 } 85 86 #ifndef PRODUCT 87 bool CMBitMapRO::covers(MemRegion heap_rs) const { 88 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 89 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 90 "size inconsistency"); 91 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 92 _bmWordSize == heap_rs.word_size(); 93 } 94 #endif 95 96 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 97 _bm.print_on_error(st, prefix); 98 } 99 100 size_t CMBitMap::compute_size(size_t heap_size) { 101 return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); 102 } 103 104 size_t CMBitMap::mark_distance() { 105 return MinObjAlignmentInBytes * BitsPerByte; 106 } 107 108 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 109 _bmStartWord = heap.start(); 110 _bmWordSize = heap.word_size(); 111 112 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start()); 113 _bm.set_size(_bmWordSize >> _shifter); 114 115 storage->set_mapping_changed_listener(&_listener); 116 } 117 118 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 119 if (zero_filled) { 120 return; 121 } 122 // We need to clear the bitmap on commit, removing any existing information. 123 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 124 _bm->clearRange(mr); 125 } 126 127 // Closure used for clearing the given mark bitmap. 128 class ClearBitmapHRClosure : public HeapRegionClosure { 129 private: 130 ConcurrentMark* _cm; 131 CMBitMap* _bitmap; 132 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration. 133 public: 134 ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) { 135 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield."); 136 } 137 138 virtual bool doHeapRegion(HeapRegion* r) { 139 size_t const chunk_size_in_words = M / HeapWordSize; 140 141 HeapWord* cur = r->bottom(); 142 HeapWord* const end = r->end(); 143 144 while (cur < end) { 145 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 146 _bitmap->clearRange(mr); 147 148 cur += chunk_size_in_words; 149 150 // Abort iteration if after yielding the marking has been aborted. 151 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) { 152 return true; 153 } 154 // Repeat the asserts from before the start of the closure. We will do them 155 // as asserts here to minimize their overhead on the product. However, we 156 // will have them as guarantees at the beginning / end of the bitmap 157 // clearing to get some checking in the product. 158 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant"); 159 assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 160 } 161 162 return false; 163 } 164 }; 165 166 class ParClearNextMarkBitmapTask : public AbstractGangTask { 167 ClearBitmapHRClosure* _cl; 168 HeapRegionClaimer _hrclaimer; 169 bool _suspendible; // If the task is suspendible, workers must join the STS. 170 171 public: 172 ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) : 173 _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {} 174 175 void work(uint worker_id) { 176 SuspendibleThreadSetJoiner sts_join(_suspendible); 177 G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true); 178 } 179 }; 180 181 void CMBitMap::clearAll() { 182 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 183 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */); 184 uint n_workers = g1h->workers()->active_workers(); 185 ParClearNextMarkBitmapTask task(&cl, n_workers, false); 186 g1h->workers()->run_task(&task); 187 guarantee(cl.complete(), "Must have completed iteration."); 188 return; 189 } 190 191 void CMBitMap::clearRange(MemRegion mr) { 192 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 193 assert(!mr.is_empty(), "unexpected empty region"); 194 // convert address range into offset range 195 _bm.at_put_range(heapWordToOffset(mr.start()), 196 heapWordToOffset(mr.end()), false); 197 } 198 199 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 200 _base(NULL), _cm(cm) 201 {} 202 203 bool CMMarkStack::allocate(size_t capacity) { 204 // allocate a stack of the requisite depth 205 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 206 if (!rs.is_reserved()) { 207 warning("ConcurrentMark MarkStack allocation failure"); 208 return false; 209 } 210 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 211 if (!_virtual_space.initialize(rs, rs.size())) { 212 warning("ConcurrentMark MarkStack backing store failure"); 213 // Release the virtual memory reserved for the marking stack 214 rs.release(); 215 return false; 216 } 217 assert(_virtual_space.committed_size() == rs.size(), 218 "Didn't reserve backing store for all of ConcurrentMark stack?"); 219 _base = (oop*) _virtual_space.low(); 220 setEmpty(); 221 _capacity = (jint) capacity; 222 _saved_index = -1; 223 _should_expand = false; 224 return true; 225 } 226 227 void CMMarkStack::expand() { 228 // Called, during remark, if we've overflown the marking stack during marking. 229 assert(isEmpty(), "stack should been emptied while handling overflow"); 230 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 231 // Clear expansion flag 232 _should_expand = false; 233 if (_capacity == (jint) MarkStackSizeMax) { 234 log_trace(gc)("(benign) Can't expand marking stack capacity, at max size limit"); 235 return; 236 } 237 // Double capacity if possible 238 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 239 // Do not give up existing stack until we have managed to 240 // get the double capacity that we desired. 241 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 242 sizeof(oop))); 243 if (rs.is_reserved()) { 244 // Release the backing store associated with old stack 245 _virtual_space.release(); 246 // Reinitialize virtual space for new stack 247 if (!_virtual_space.initialize(rs, rs.size())) { 248 fatal("Not enough swap for expanded marking stack capacity"); 249 } 250 _base = (oop*)(_virtual_space.low()); 251 _index = 0; 252 _capacity = new_capacity; 253 } else { 254 // Failed to double capacity, continue; 255 log_trace(gc)("(benign) Failed to expand marking stack capacity from " SIZE_FORMAT "K to " SIZE_FORMAT "K", 256 _capacity / K, new_capacity / K); 257 } 258 } 259 260 void CMMarkStack::set_should_expand() { 261 // If we're resetting the marking state because of an 262 // marking stack overflow, record that we should, if 263 // possible, expand the stack. 264 _should_expand = _cm->has_overflown(); 265 } 266 267 CMMarkStack::~CMMarkStack() { 268 if (_base != NULL) { 269 _base = NULL; 270 _virtual_space.release(); 271 } 272 } 273 274 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 275 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 276 jint start = _index; 277 jint next_index = start + n; 278 if (next_index > _capacity) { 279 _overflow = true; 280 return; 281 } 282 // Otherwise. 283 _index = next_index; 284 for (int i = 0; i < n; i++) { 285 int ind = start + i; 286 assert(ind < _capacity, "By overflow test above."); 287 _base[ind] = ptr_arr[i]; 288 } 289 } 290 291 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 292 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 293 jint index = _index; 294 if (index == 0) { 295 *n = 0; 296 return false; 297 } else { 298 int k = MIN2(max, index); 299 jint new_ind = index - k; 300 for (int j = 0; j < k; j++) { 301 ptr_arr[j] = _base[new_ind + j]; 302 } 303 _index = new_ind; 304 *n = k; 305 return true; 306 } 307 } 308 309 void CMMarkStack::note_start_of_gc() { 310 assert(_saved_index == -1, 311 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 312 _saved_index = _index; 313 } 314 315 void CMMarkStack::note_end_of_gc() { 316 // This is intentionally a guarantee, instead of an assert. If we 317 // accidentally add something to the mark stack during GC, it 318 // will be a correctness issue so it's better if we crash. we'll 319 // only check this once per GC anyway, so it won't be a performance 320 // issue in any way. 321 guarantee(_saved_index == _index, 322 "saved index: %d index: %d", _saved_index, _index); 323 _saved_index = -1; 324 } 325 326 CMRootRegions::CMRootRegions() : 327 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 328 _should_abort(false), _next_survivor(NULL) { } 329 330 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 331 _young_list = g1h->young_list(); 332 _cm = cm; 333 } 334 335 void CMRootRegions::prepare_for_scan() { 336 assert(!scan_in_progress(), "pre-condition"); 337 338 // Currently, only survivors can be root regions. 339 assert(_next_survivor == NULL, "pre-condition"); 340 _next_survivor = _young_list->first_survivor_region(); 341 _scan_in_progress = (_next_survivor != NULL); 342 _should_abort = false; 343 } 344 345 HeapRegion* CMRootRegions::claim_next() { 346 if (_should_abort) { 347 // If someone has set the should_abort flag, we return NULL to 348 // force the caller to bail out of their loop. 349 return NULL; 350 } 351 352 // Currently, only survivors can be root regions. 353 HeapRegion* res = _next_survivor; 354 if (res != NULL) { 355 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 356 // Read it again in case it changed while we were waiting for the lock. 357 res = _next_survivor; 358 if (res != NULL) { 359 if (res == _young_list->last_survivor_region()) { 360 // We just claimed the last survivor so store NULL to indicate 361 // that we're done. 362 _next_survivor = NULL; 363 } else { 364 _next_survivor = res->get_next_young_region(); 365 } 366 } else { 367 // Someone else claimed the last survivor while we were trying 368 // to take the lock so nothing else to do. 369 } 370 } 371 assert(res == NULL || res->is_survivor(), "post-condition"); 372 373 return res; 374 } 375 376 void CMRootRegions::scan_finished() { 377 assert(scan_in_progress(), "pre-condition"); 378 379 // Currently, only survivors can be root regions. 380 if (!_should_abort) { 381 assert(_next_survivor == NULL, "we should have claimed all survivors"); 382 } 383 _next_survivor = NULL; 384 385 { 386 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 387 _scan_in_progress = false; 388 RootRegionScan_lock->notify_all(); 389 } 390 } 391 392 bool CMRootRegions::wait_until_scan_finished() { 393 if (!scan_in_progress()) return false; 394 395 { 396 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 397 while (scan_in_progress()) { 398 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 399 } 400 } 401 return true; 402 } 403 404 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 405 return MAX2((n_par_threads + 2) / 4, 1U); 406 } 407 408 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 409 _g1h(g1h), 410 _markBitMap1(), 411 _markBitMap2(), 412 _parallel_marking_threads(0), 413 _max_parallel_marking_threads(0), 414 _sleep_factor(0.0), 415 _marking_task_overhead(1.0), 416 _cleanup_list("Cleanup List"), 417 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 418 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> 419 CardTableModRefBS::card_shift, 420 false /* in_resource_area*/), 421 422 _prevMarkBitMap(&_markBitMap1), 423 _nextMarkBitMap(&_markBitMap2), 424 425 _markStack(this), 426 // _finger set in set_non_marking_state 427 428 _max_worker_id(ParallelGCThreads), 429 // _active_tasks set in set_non_marking_state 430 // _tasks set inside the constructor 431 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 432 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 433 434 _has_overflown(false), 435 _concurrent(false), 436 _has_aborted(false), 437 _restart_for_overflow(false), 438 _concurrent_marking_in_progress(false), 439 440 // _verbose_level set below 441 442 _init_times(), 443 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 444 _cleanup_times(), 445 _total_counting_time(0.0), 446 _total_rs_scrub_time(0.0), 447 448 _parallel_workers(NULL), 449 450 _count_card_bitmaps(NULL), 451 _count_marked_bytes(NULL), 452 _completed_initialization(false) { 453 454 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 455 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 456 457 // Create & start a ConcurrentMark thread. 458 _cmThread = new ConcurrentMarkThread(this); 459 assert(cmThread() != NULL, "CM Thread should have been created"); 460 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 461 if (_cmThread->osthread() == NULL) { 462 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 463 } 464 465 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 466 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 467 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 468 469 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 470 satb_qs.set_buffer_size(G1SATBBufferSize); 471 472 _root_regions.init(_g1h, this); 473 474 if (ConcGCThreads > ParallelGCThreads) { 475 warning("Can't have more ConcGCThreads (%u) " 476 "than ParallelGCThreads (%u).", 477 ConcGCThreads, ParallelGCThreads); 478 return; 479 } 480 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 481 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 482 // if both are set 483 _sleep_factor = 0.0; 484 _marking_task_overhead = 1.0; 485 } else if (G1MarkingOverheadPercent > 0) { 486 // We will calculate the number of parallel marking threads based 487 // on a target overhead with respect to the soft real-time goal 488 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 489 double overall_cm_overhead = 490 (double) MaxGCPauseMillis * marking_overhead / 491 (double) GCPauseIntervalMillis; 492 double cpu_ratio = 1.0 / (double) os::processor_count(); 493 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 494 double marking_task_overhead = 495 overall_cm_overhead / marking_thread_num * 496 (double) os::processor_count(); 497 double sleep_factor = 498 (1.0 - marking_task_overhead) / marking_task_overhead; 499 500 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 501 _sleep_factor = sleep_factor; 502 _marking_task_overhead = marking_task_overhead; 503 } else { 504 // Calculate the number of parallel marking threads by scaling 505 // the number of parallel GC threads. 506 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 507 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 508 _sleep_factor = 0.0; 509 _marking_task_overhead = 1.0; 510 } 511 512 assert(ConcGCThreads > 0, "Should have been set"); 513 _parallel_marking_threads = ConcGCThreads; 514 _max_parallel_marking_threads = _parallel_marking_threads; 515 516 _parallel_workers = new WorkGang("G1 Marker", 517 _max_parallel_marking_threads, false, true); 518 if (_parallel_workers == NULL) { 519 vm_exit_during_initialization("Failed necessary allocation."); 520 } else { 521 _parallel_workers->initialize_workers(); 522 } 523 524 if (FLAG_IS_DEFAULT(MarkStackSize)) { 525 size_t mark_stack_size = 526 MIN2(MarkStackSizeMax, 527 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 528 // Verify that the calculated value for MarkStackSize is in range. 529 // It would be nice to use the private utility routine from Arguments. 530 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 531 warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 532 "must be between 1 and " SIZE_FORMAT, 533 mark_stack_size, MarkStackSizeMax); 534 return; 535 } 536 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 537 } else { 538 // Verify MarkStackSize is in range. 539 if (FLAG_IS_CMDLINE(MarkStackSize)) { 540 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 541 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 542 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 543 "must be between 1 and " SIZE_FORMAT, 544 MarkStackSize, MarkStackSizeMax); 545 return; 546 } 547 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 548 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 549 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 550 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 551 MarkStackSize, MarkStackSizeMax); 552 return; 553 } 554 } 555 } 556 } 557 558 if (!_markStack.allocate(MarkStackSize)) { 559 warning("Failed to allocate CM marking stack"); 560 return; 561 } 562 563 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 564 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 565 566 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 567 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 568 569 BitMap::idx_t card_bm_size = _card_bm.size(); 570 571 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 572 _active_tasks = _max_worker_id; 573 574 uint max_regions = _g1h->max_regions(); 575 for (uint i = 0; i < _max_worker_id; ++i) { 576 CMTaskQueue* task_queue = new CMTaskQueue(); 577 task_queue->initialize(); 578 _task_queues->register_queue(i, task_queue); 579 580 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 581 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 582 583 _tasks[i] = new CMTask(i, this, 584 _count_marked_bytes[i], 585 &_count_card_bitmaps[i], 586 task_queue, _task_queues); 587 588 _accum_task_vtime[i] = 0.0; 589 } 590 591 // Calculate the card number for the bottom of the heap. Used 592 // in biasing indexes into the accounting card bitmaps. 593 _heap_bottom_card_num = 594 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 595 CardTableModRefBS::card_shift); 596 597 // Clear all the liveness counting data 598 clear_all_count_data(); 599 600 // so that the call below can read a sensible value 601 _heap_start = g1h->reserved_region().start(); 602 set_non_marking_state(); 603 _completed_initialization = true; 604 } 605 606 void ConcurrentMark::reset() { 607 // Starting values for these two. This should be called in a STW 608 // phase. 609 MemRegion reserved = _g1h->g1_reserved(); 610 _heap_start = reserved.start(); 611 _heap_end = reserved.end(); 612 613 // Separated the asserts so that we know which one fires. 614 assert(_heap_start != NULL, "heap bounds should look ok"); 615 assert(_heap_end != NULL, "heap bounds should look ok"); 616 assert(_heap_start < _heap_end, "heap bounds should look ok"); 617 618 // Reset all the marking data structures and any necessary flags 619 reset_marking_state(); 620 621 // We do reset all of them, since different phases will use 622 // different number of active threads. So, it's easiest to have all 623 // of them ready. 624 for (uint i = 0; i < _max_worker_id; ++i) { 625 _tasks[i]->reset(_nextMarkBitMap); 626 } 627 628 // we need this to make sure that the flag is on during the evac 629 // pause with initial mark piggy-backed 630 set_concurrent_marking_in_progress(); 631 } 632 633 634 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 635 _markStack.set_should_expand(); 636 _markStack.setEmpty(); // Also clears the _markStack overflow flag 637 if (clear_overflow) { 638 clear_has_overflown(); 639 } else { 640 assert(has_overflown(), "pre-condition"); 641 } 642 _finger = _heap_start; 643 644 for (uint i = 0; i < _max_worker_id; ++i) { 645 CMTaskQueue* queue = _task_queues->queue(i); 646 queue->set_empty(); 647 } 648 } 649 650 void ConcurrentMark::set_concurrency(uint active_tasks) { 651 assert(active_tasks <= _max_worker_id, "we should not have more"); 652 653 _active_tasks = active_tasks; 654 // Need to update the three data structures below according to the 655 // number of active threads for this phase. 656 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 657 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 658 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 659 } 660 661 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 662 set_concurrency(active_tasks); 663 664 _concurrent = concurrent; 665 // We propagate this to all tasks, not just the active ones. 666 for (uint i = 0; i < _max_worker_id; ++i) 667 _tasks[i]->set_concurrent(concurrent); 668 669 if (concurrent) { 670 set_concurrent_marking_in_progress(); 671 } else { 672 // We currently assume that the concurrent flag has been set to 673 // false before we start remark. At this point we should also be 674 // in a STW phase. 675 assert(!concurrent_marking_in_progress(), "invariant"); 676 assert(out_of_regions(), 677 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 678 p2i(_finger), p2i(_heap_end)); 679 } 680 } 681 682 void ConcurrentMark::set_non_marking_state() { 683 // We set the global marking state to some default values when we're 684 // not doing marking. 685 reset_marking_state(); 686 _active_tasks = 0; 687 clear_concurrent_marking_in_progress(); 688 } 689 690 ConcurrentMark::~ConcurrentMark() { 691 // The ConcurrentMark instance is never freed. 692 ShouldNotReachHere(); 693 } 694 695 void ConcurrentMark::clearNextBitmap() { 696 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 697 698 // Make sure that the concurrent mark thread looks to still be in 699 // the current cycle. 700 guarantee(cmThread()->during_cycle(), "invariant"); 701 702 // We are finishing up the current cycle by clearing the next 703 // marking bitmap and getting it ready for the next cycle. During 704 // this time no other cycle can start. So, let's make sure that this 705 // is the case. 706 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 707 708 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */); 709 ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true); 710 _parallel_workers->run_task(&task); 711 712 // Clear the liveness counting data. If the marking has been aborted, the abort() 713 // call already did that. 714 if (cl.complete()) { 715 clear_all_count_data(); 716 } 717 718 // Repeat the asserts from above. 719 guarantee(cmThread()->during_cycle(), "invariant"); 720 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 721 } 722 723 class CheckBitmapClearHRClosure : public HeapRegionClosure { 724 CMBitMap* _bitmap; 725 bool _error; 726 public: 727 CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) { 728 } 729 730 virtual bool doHeapRegion(HeapRegion* r) { 731 // This closure can be called concurrently to the mutator, so we must make sure 732 // that the result of the getNextMarkedWordAddress() call is compared to the 733 // value passed to it as limit to detect any found bits. 734 // end never changes in G1. 735 HeapWord* end = r->end(); 736 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end; 737 } 738 }; 739 740 bool ConcurrentMark::nextMarkBitmapIsClear() { 741 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 742 _g1h->heap_region_iterate(&cl); 743 return cl.complete(); 744 } 745 746 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 747 public: 748 bool doHeapRegion(HeapRegion* r) { 749 r->note_start_of_marking(); 750 return false; 751 } 752 }; 753 754 void ConcurrentMark::checkpointRootsInitialPre() { 755 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 756 G1CollectorPolicy* g1p = g1h->g1_policy(); 757 758 _has_aborted = false; 759 760 // Initialize marking structures. This has to be done in a STW phase. 761 reset(); 762 763 // For each region note start of marking. 764 NoteStartOfMarkHRClosure startcl; 765 g1h->heap_region_iterate(&startcl); 766 } 767 768 769 void ConcurrentMark::checkpointRootsInitialPost() { 770 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 771 772 // Start Concurrent Marking weak-reference discovery. 773 ReferenceProcessor* rp = g1h->ref_processor_cm(); 774 // enable ("weak") refs discovery 775 rp->enable_discovery(); 776 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 777 778 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 779 // This is the start of the marking cycle, we're expected all 780 // threads to have SATB queues with active set to false. 781 satb_mq_set.set_active_all_threads(true, /* new active value */ 782 false /* expected_active */); 783 784 _root_regions.prepare_for_scan(); 785 786 // update_g1_committed() will be called at the end of an evac pause 787 // when marking is on. So, it's also called at the end of the 788 // initial-mark pause to update the heap end, if the heap expands 789 // during it. No need to call it here. 790 } 791 792 /* 793 * Notice that in the next two methods, we actually leave the STS 794 * during the barrier sync and join it immediately afterwards. If we 795 * do not do this, the following deadlock can occur: one thread could 796 * be in the barrier sync code, waiting for the other thread to also 797 * sync up, whereas another one could be trying to yield, while also 798 * waiting for the other threads to sync up too. 799 * 800 * Note, however, that this code is also used during remark and in 801 * this case we should not attempt to leave / enter the STS, otherwise 802 * we'll either hit an assert (debug / fastdebug) or deadlock 803 * (product). So we should only leave / enter the STS if we are 804 * operating concurrently. 805 * 806 * Because the thread that does the sync barrier has left the STS, it 807 * is possible to be suspended for a Full GC or an evacuation pause 808 * could occur. This is actually safe, since the entering the sync 809 * barrier is one of the last things do_marking_step() does, and it 810 * doesn't manipulate any data structures afterwards. 811 */ 812 813 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 814 bool barrier_aborted; 815 { 816 SuspendibleThreadSetLeaver sts_leave(concurrent()); 817 barrier_aborted = !_first_overflow_barrier_sync.enter(); 818 } 819 820 // at this point everyone should have synced up and not be doing any 821 // more work 822 823 if (barrier_aborted) { 824 // If the barrier aborted we ignore the overflow condition and 825 // just abort the whole marking phase as quickly as possible. 826 return; 827 } 828 829 // If we're executing the concurrent phase of marking, reset the marking 830 // state; otherwise the marking state is reset after reference processing, 831 // during the remark pause. 832 // If we reset here as a result of an overflow during the remark we will 833 // see assertion failures from any subsequent set_concurrency_and_phase() 834 // calls. 835 if (concurrent()) { 836 // let the task associated with with worker 0 do this 837 if (worker_id == 0) { 838 // task 0 is responsible for clearing the global data structures 839 // We should be here because of an overflow. During STW we should 840 // not clear the overflow flag since we rely on it being true when 841 // we exit this method to abort the pause and restart concurrent 842 // marking. 843 reset_marking_state(true /* clear_overflow */); 844 845 log_info(gc)("Concurrent Mark reset for overflow"); 846 } 847 } 848 849 // after this, each task should reset its own data structures then 850 // then go into the second barrier 851 } 852 853 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 854 SuspendibleThreadSetLeaver sts_leave(concurrent()); 855 _second_overflow_barrier_sync.enter(); 856 857 // at this point everything should be re-initialized and ready to go 858 } 859 860 class CMConcurrentMarkingTask: public AbstractGangTask { 861 private: 862 ConcurrentMark* _cm; 863 ConcurrentMarkThread* _cmt; 864 865 public: 866 void work(uint worker_id) { 867 assert(Thread::current()->is_ConcurrentGC_thread(), 868 "this should only be done by a conc GC thread"); 869 ResourceMark rm; 870 871 double start_vtime = os::elapsedVTime(); 872 873 { 874 SuspendibleThreadSetJoiner sts_join; 875 876 assert(worker_id < _cm->active_tasks(), "invariant"); 877 CMTask* the_task = _cm->task(worker_id); 878 the_task->record_start_time(); 879 if (!_cm->has_aborted()) { 880 do { 881 double start_vtime_sec = os::elapsedVTime(); 882 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 883 884 the_task->do_marking_step(mark_step_duration_ms, 885 true /* do_termination */, 886 false /* is_serial*/); 887 888 double end_vtime_sec = os::elapsedVTime(); 889 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 890 _cm->clear_has_overflown(); 891 892 _cm->do_yield_check(worker_id); 893 894 jlong sleep_time_ms; 895 if (!_cm->has_aborted() && the_task->has_aborted()) { 896 sleep_time_ms = 897 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 898 { 899 SuspendibleThreadSetLeaver sts_leave; 900 os::sleep(Thread::current(), sleep_time_ms, false); 901 } 902 } 903 } while (!_cm->has_aborted() && the_task->has_aborted()); 904 } 905 the_task->record_end_time(); 906 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 907 } 908 909 double end_vtime = os::elapsedVTime(); 910 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 911 } 912 913 CMConcurrentMarkingTask(ConcurrentMark* cm, 914 ConcurrentMarkThread* cmt) : 915 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 916 917 ~CMConcurrentMarkingTask() { } 918 }; 919 920 // Calculates the number of active workers for a concurrent 921 // phase. 922 uint ConcurrentMark::calc_parallel_marking_threads() { 923 uint n_conc_workers = 0; 924 if (!UseDynamicNumberOfGCThreads || 925 (!FLAG_IS_DEFAULT(ConcGCThreads) && 926 !ForceDynamicNumberOfGCThreads)) { 927 n_conc_workers = max_parallel_marking_threads(); 928 } else { 929 n_conc_workers = 930 AdaptiveSizePolicy::calc_default_active_workers( 931 max_parallel_marking_threads(), 932 1, /* Minimum workers */ 933 parallel_marking_threads(), 934 Threads::number_of_non_daemon_threads()); 935 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 936 // that scaling has already gone into "_max_parallel_marking_threads". 937 } 938 assert(n_conc_workers > 0, "Always need at least 1"); 939 return n_conc_workers; 940 } 941 942 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 943 // Currently, only survivors can be root regions. 944 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 945 G1RootRegionScanClosure cl(_g1h, this, worker_id); 946 947 const uintx interval = PrefetchScanIntervalInBytes; 948 HeapWord* curr = hr->bottom(); 949 const HeapWord* end = hr->top(); 950 while (curr < end) { 951 Prefetch::read(curr, interval); 952 oop obj = oop(curr); 953 int size = obj->oop_iterate_size(&cl); 954 assert(size == obj->size(), "sanity"); 955 curr += size; 956 } 957 } 958 959 class CMRootRegionScanTask : public AbstractGangTask { 960 private: 961 ConcurrentMark* _cm; 962 963 public: 964 CMRootRegionScanTask(ConcurrentMark* cm) : 965 AbstractGangTask("Root Region Scan"), _cm(cm) { } 966 967 void work(uint worker_id) { 968 assert(Thread::current()->is_ConcurrentGC_thread(), 969 "this should only be done by a conc GC thread"); 970 971 CMRootRegions* root_regions = _cm->root_regions(); 972 HeapRegion* hr = root_regions->claim_next(); 973 while (hr != NULL) { 974 _cm->scanRootRegion(hr, worker_id); 975 hr = root_regions->claim_next(); 976 } 977 } 978 }; 979 980 void ConcurrentMark::scanRootRegions() { 981 // Start of concurrent marking. 982 ClassLoaderDataGraph::clear_claimed_marks(); 983 984 // scan_in_progress() will have been set to true only if there was 985 // at least one root region to scan. So, if it's false, we 986 // should not attempt to do any further work. 987 if (root_regions()->scan_in_progress()) { 988 GCTraceConcTime(Info, gc) tt("Concurrent Root Region Scan"); 989 990 _parallel_marking_threads = calc_parallel_marking_threads(); 991 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 992 "Maximum number of marking threads exceeded"); 993 uint active_workers = MAX2(1U, parallel_marking_threads()); 994 995 CMRootRegionScanTask task(this); 996 _parallel_workers->set_active_workers(active_workers); 997 _parallel_workers->run_task(&task); 998 999 // It's possible that has_aborted() is true here without actually 1000 // aborting the survivor scan earlier. This is OK as it's 1001 // mainly used for sanity checking. 1002 root_regions()->scan_finished(); 1003 } 1004 } 1005 1006 void ConcurrentMark::markFromRoots() { 1007 // we might be tempted to assert that: 1008 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1009 // "inconsistent argument?"); 1010 // However that wouldn't be right, because it's possible that 1011 // a safepoint is indeed in progress as a younger generation 1012 // stop-the-world GC happens even as we mark in this generation. 1013 1014 _restart_for_overflow = false; 1015 1016 // _g1h has _n_par_threads 1017 _parallel_marking_threads = calc_parallel_marking_threads(); 1018 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1019 "Maximum number of marking threads exceeded"); 1020 1021 uint active_workers = MAX2(1U, parallel_marking_threads()); 1022 assert(active_workers > 0, "Should have been set"); 1023 1024 // Parallel task terminator is set in "set_concurrency_and_phase()" 1025 set_concurrency_and_phase(active_workers, true /* concurrent */); 1026 1027 CMConcurrentMarkingTask markingTask(this, cmThread()); 1028 _parallel_workers->set_active_workers(active_workers); 1029 _parallel_workers->run_task(&markingTask); 1030 print_stats(); 1031 } 1032 1033 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1034 // world is stopped at this checkpoint 1035 assert(SafepointSynchronize::is_at_safepoint(), 1036 "world should be stopped"); 1037 1038 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1039 1040 // If a full collection has happened, we shouldn't do this. 1041 if (has_aborted()) { 1042 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1043 return; 1044 } 1045 1046 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1047 1048 if (VerifyDuringGC) { 1049 HandleMark hm; // handle scope 1050 g1h->prepare_for_verify(); 1051 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1052 } 1053 g1h->check_bitmaps("Remark Start"); 1054 1055 G1CollectorPolicy* g1p = g1h->g1_policy(); 1056 g1p->record_concurrent_mark_remark_start(); 1057 1058 double start = os::elapsedTime(); 1059 1060 checkpointRootsFinalWork(); 1061 1062 double mark_work_end = os::elapsedTime(); 1063 1064 weakRefsWork(clear_all_soft_refs); 1065 1066 if (has_overflown()) { 1067 // Oops. We overflowed. Restart concurrent marking. 1068 _restart_for_overflow = true; 1069 log_develop_trace(gc)("Remark led to restart for overflow."); 1070 1071 // Verify the heap w.r.t. the previous marking bitmap. 1072 if (VerifyDuringGC) { 1073 HandleMark hm; // handle scope 1074 g1h->prepare_for_verify(); 1075 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)"); 1076 } 1077 1078 // Clear the marking state because we will be restarting 1079 // marking due to overflowing the global mark stack. 1080 reset_marking_state(); 1081 } else { 1082 { 1083 GCTraceTime(Debug, gc) trace("GC Aggregate Data", g1h->gc_timer_cm()); 1084 1085 // Aggregate the per-task counting data that we have accumulated 1086 // while marking. 1087 aggregate_count_data(); 1088 } 1089 1090 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1091 // We're done with marking. 1092 // This is the end of the marking cycle, we're expected all 1093 // threads to have SATB queues with active set to true. 1094 satb_mq_set.set_active_all_threads(false, /* new active value */ 1095 true /* expected_active */); 1096 1097 if (VerifyDuringGC) { 1098 HandleMark hm; // handle scope 1099 g1h->prepare_for_verify(); 1100 Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)"); 1101 } 1102 g1h->check_bitmaps("Remark End"); 1103 assert(!restart_for_overflow(), "sanity"); 1104 // Completely reset the marking state since marking completed 1105 set_non_marking_state(); 1106 } 1107 1108 // Expand the marking stack, if we have to and if we can. 1109 if (_markStack.should_expand()) { 1110 _markStack.expand(); 1111 } 1112 1113 // Statistics 1114 double now = os::elapsedTime(); 1115 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1116 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1117 _remark_times.add((now - start) * 1000.0); 1118 1119 g1p->record_concurrent_mark_remark_end(); 1120 1121 G1CMIsAliveClosure is_alive(g1h); 1122 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1123 } 1124 1125 // Base class of the closures that finalize and verify the 1126 // liveness counting data. 1127 class CMCountDataClosureBase: public HeapRegionClosure { 1128 protected: 1129 G1CollectedHeap* _g1h; 1130 ConcurrentMark* _cm; 1131 CardTableModRefBS* _ct_bs; 1132 1133 BitMap* _region_bm; 1134 BitMap* _card_bm; 1135 1136 // Takes a region that's not empty (i.e., it has at least one 1137 // live object in it and sets its corresponding bit on the region 1138 // bitmap to 1. 1139 void set_bit_for_region(HeapRegion* hr) { 1140 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1141 _region_bm->par_at_put(index, true); 1142 } 1143 1144 public: 1145 CMCountDataClosureBase(G1CollectedHeap* g1h, 1146 BitMap* region_bm, BitMap* card_bm): 1147 _g1h(g1h), _cm(g1h->concurrent_mark()), 1148 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 1149 _region_bm(region_bm), _card_bm(card_bm) { } 1150 }; 1151 1152 // Closure that calculates the # live objects per region. Used 1153 // for verification purposes during the cleanup pause. 1154 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1155 CMBitMapRO* _bm; 1156 size_t _region_marked_bytes; 1157 1158 public: 1159 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1160 BitMap* region_bm, BitMap* card_bm) : 1161 CMCountDataClosureBase(g1h, region_bm, card_bm), 1162 _bm(bm), _region_marked_bytes(0) { } 1163 1164 bool doHeapRegion(HeapRegion* hr) { 1165 HeapWord* ntams = hr->next_top_at_mark_start(); 1166 HeapWord* start = hr->bottom(); 1167 1168 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1169 "Preconditions not met - " 1170 "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT, 1171 p2i(start), p2i(ntams), p2i(hr->end())); 1172 1173 // Find the first marked object at or after "start". 1174 start = _bm->getNextMarkedWordAddress(start, ntams); 1175 1176 size_t marked_bytes = 0; 1177 1178 while (start < ntams) { 1179 oop obj = oop(start); 1180 int obj_sz = obj->size(); 1181 HeapWord* obj_end = start + obj_sz; 1182 1183 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1184 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1185 1186 // Note: if we're looking at the last region in heap - obj_end 1187 // could be actually just beyond the end of the heap; end_idx 1188 // will then correspond to a (non-existent) card that is also 1189 // just beyond the heap. 1190 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1191 // end of object is not card aligned - increment to cover 1192 // all the cards spanned by the object 1193 end_idx += 1; 1194 } 1195 1196 // Set the bits in the card BM for the cards spanned by this object. 1197 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1198 1199 // Add the size of this object to the number of marked bytes. 1200 marked_bytes += (size_t)obj_sz * HeapWordSize; 1201 1202 // This will happen if we are handling a humongous object that spans 1203 // several heap regions. 1204 if (obj_end > hr->end()) { 1205 break; 1206 } 1207 // Find the next marked object after this one. 1208 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1209 } 1210 1211 // Mark the allocated-since-marking portion... 1212 HeapWord* top = hr->top(); 1213 if (ntams < top) { 1214 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1215 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1216 1217 // Note: if we're looking at the last region in heap - top 1218 // could be actually just beyond the end of the heap; end_idx 1219 // will then correspond to a (non-existent) card that is also 1220 // just beyond the heap. 1221 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1222 // end of object is not card aligned - increment to cover 1223 // all the cards spanned by the object 1224 end_idx += 1; 1225 } 1226 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1227 1228 // This definitely means the region has live objects. 1229 set_bit_for_region(hr); 1230 } 1231 1232 // Update the live region bitmap. 1233 if (marked_bytes > 0) { 1234 set_bit_for_region(hr); 1235 } 1236 1237 // Set the marked bytes for the current region so that 1238 // it can be queried by a calling verification routine 1239 _region_marked_bytes = marked_bytes; 1240 1241 return false; 1242 } 1243 1244 size_t region_marked_bytes() const { return _region_marked_bytes; } 1245 }; 1246 1247 // Heap region closure used for verifying the counting data 1248 // that was accumulated concurrently and aggregated during 1249 // the remark pause. This closure is applied to the heap 1250 // regions during the STW cleanup pause. 1251 1252 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1253 G1CollectedHeap* _g1h; 1254 ConcurrentMark* _cm; 1255 CalcLiveObjectsClosure _calc_cl; 1256 BitMap* _region_bm; // Region BM to be verified 1257 BitMap* _card_bm; // Card BM to be verified 1258 1259 BitMap* _exp_region_bm; // Expected Region BM values 1260 BitMap* _exp_card_bm; // Expected card BM values 1261 1262 int _failures; 1263 1264 public: 1265 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1266 BitMap* region_bm, 1267 BitMap* card_bm, 1268 BitMap* exp_region_bm, 1269 BitMap* exp_card_bm) : 1270 _g1h(g1h), _cm(g1h->concurrent_mark()), 1271 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1272 _region_bm(region_bm), _card_bm(card_bm), 1273 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1274 _failures(0) { } 1275 1276 int failures() const { return _failures; } 1277 1278 bool doHeapRegion(HeapRegion* hr) { 1279 int failures = 0; 1280 1281 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1282 // this region and set the corresponding bits in the expected region 1283 // and card bitmaps. 1284 bool res = _calc_cl.doHeapRegion(hr); 1285 assert(res == false, "should be continuing"); 1286 1287 // Verify the marked bytes for this region. 1288 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1289 size_t act_marked_bytes = hr->next_marked_bytes(); 1290 1291 if (exp_marked_bytes > act_marked_bytes) { 1292 if (hr->is_starts_humongous()) { 1293 // For start_humongous regions, the size of the whole object will be 1294 // in exp_marked_bytes. 1295 HeapRegion* region = hr; 1296 int num_regions; 1297 for (num_regions = 0; region != NULL; num_regions++) { 1298 region = _g1h->next_region_in_humongous(region); 1299 } 1300 if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) { 1301 failures += 1; 1302 } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) { 1303 failures += 1; 1304 } 1305 } else { 1306 // We're not OK if expected marked bytes > actual marked bytes. It means 1307 // we have missed accounting some objects during the actual marking. 1308 failures += 1; 1309 } 1310 } 1311 1312 // Verify the bit, for this region, in the actual and expected 1313 // (which was just calculated) region bit maps. 1314 // We're not OK if the bit in the calculated expected region 1315 // bitmap is set and the bit in the actual region bitmap is not. 1316 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1317 1318 bool expected = _exp_region_bm->at(index); 1319 bool actual = _region_bm->at(index); 1320 if (expected && !actual) { 1321 failures += 1; 1322 } 1323 1324 // Verify that the card bit maps for the cards spanned by the current 1325 // region match. We have an error if we have a set bit in the expected 1326 // bit map and the corresponding bit in the actual bitmap is not set. 1327 1328 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1329 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1330 1331 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1332 expected = _exp_card_bm->at(i); 1333 actual = _card_bm->at(i); 1334 1335 if (expected && !actual) { 1336 failures += 1; 1337 } 1338 } 1339 1340 _failures += failures; 1341 1342 // We could stop iteration over the heap when we 1343 // find the first violating region by returning true. 1344 return false; 1345 } 1346 }; 1347 1348 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1349 protected: 1350 G1CollectedHeap* _g1h; 1351 ConcurrentMark* _cm; 1352 BitMap* _actual_region_bm; 1353 BitMap* _actual_card_bm; 1354 1355 uint _n_workers; 1356 1357 BitMap* _expected_region_bm; 1358 BitMap* _expected_card_bm; 1359 1360 int _failures; 1361 1362 HeapRegionClaimer _hrclaimer; 1363 1364 public: 1365 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1366 BitMap* region_bm, BitMap* card_bm, 1367 BitMap* expected_region_bm, BitMap* expected_card_bm) 1368 : AbstractGangTask("G1 verify final counting"), 1369 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1370 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1371 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1372 _failures(0), 1373 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1374 assert(VerifyDuringGC, "don't call this otherwise"); 1375 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1376 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1377 } 1378 1379 void work(uint worker_id) { 1380 assert(worker_id < _n_workers, "invariant"); 1381 1382 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1383 _actual_region_bm, _actual_card_bm, 1384 _expected_region_bm, 1385 _expected_card_bm); 1386 1387 _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer); 1388 1389 Atomic::add(verify_cl.failures(), &_failures); 1390 } 1391 1392 int failures() const { return _failures; } 1393 }; 1394 1395 // Closure that finalizes the liveness counting data. 1396 // Used during the cleanup pause. 1397 // Sets the bits corresponding to the interval [NTAMS, top] 1398 // (which contains the implicitly live objects) in the 1399 // card liveness bitmap. Also sets the bit for each region, 1400 // containing live data, in the region liveness bitmap. 1401 1402 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1403 public: 1404 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1405 BitMap* region_bm, 1406 BitMap* card_bm) : 1407 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1408 1409 bool doHeapRegion(HeapRegion* hr) { 1410 HeapWord* ntams = hr->next_top_at_mark_start(); 1411 HeapWord* top = hr->top(); 1412 1413 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1414 1415 // Mark the allocated-since-marking portion... 1416 if (ntams < top) { 1417 // This definitely means the region has live objects. 1418 set_bit_for_region(hr); 1419 1420 // Now set the bits in the card bitmap for [ntams, top) 1421 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1422 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1423 1424 // Note: if we're looking at the last region in heap - top 1425 // could be actually just beyond the end of the heap; end_idx 1426 // will then correspond to a (non-existent) card that is also 1427 // just beyond the heap. 1428 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1429 // end of object is not card aligned - increment to cover 1430 // all the cards spanned by the object 1431 end_idx += 1; 1432 } 1433 1434 assert(end_idx <= _card_bm->size(), 1435 "oob: end_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1436 end_idx, _card_bm->size()); 1437 assert(start_idx < _card_bm->size(), 1438 "oob: start_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1439 start_idx, _card_bm->size()); 1440 1441 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1442 } 1443 1444 // Set the bit for the region if it contains live data 1445 if (hr->next_marked_bytes() > 0) { 1446 set_bit_for_region(hr); 1447 } 1448 1449 return false; 1450 } 1451 }; 1452 1453 class G1ParFinalCountTask: public AbstractGangTask { 1454 protected: 1455 G1CollectedHeap* _g1h; 1456 ConcurrentMark* _cm; 1457 BitMap* _actual_region_bm; 1458 BitMap* _actual_card_bm; 1459 1460 uint _n_workers; 1461 HeapRegionClaimer _hrclaimer; 1462 1463 public: 1464 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1465 : AbstractGangTask("G1 final counting"), 1466 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1467 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1468 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1469 } 1470 1471 void work(uint worker_id) { 1472 assert(worker_id < _n_workers, "invariant"); 1473 1474 FinalCountDataUpdateClosure final_update_cl(_g1h, 1475 _actual_region_bm, 1476 _actual_card_bm); 1477 1478 _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer); 1479 } 1480 }; 1481 1482 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1483 G1CollectedHeap* _g1; 1484 size_t _freed_bytes; 1485 FreeRegionList* _local_cleanup_list; 1486 HeapRegionSetCount _old_regions_removed; 1487 HeapRegionSetCount _humongous_regions_removed; 1488 HRRSCleanupTask* _hrrs_cleanup_task; 1489 1490 public: 1491 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1492 FreeRegionList* local_cleanup_list, 1493 HRRSCleanupTask* hrrs_cleanup_task) : 1494 _g1(g1), 1495 _freed_bytes(0), 1496 _local_cleanup_list(local_cleanup_list), 1497 _old_regions_removed(), 1498 _humongous_regions_removed(), 1499 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1500 1501 size_t freed_bytes() { return _freed_bytes; } 1502 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; } 1503 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } 1504 1505 bool doHeapRegion(HeapRegion *hr) { 1506 if (hr->is_archive()) { 1507 return false; 1508 } 1509 // We use a claim value of zero here because all regions 1510 // were claimed with value 1 in the FinalCount task. 1511 _g1->reset_gc_time_stamps(hr); 1512 hr->note_end_of_marking(); 1513 1514 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1515 _freed_bytes += hr->used(); 1516 hr->set_containing_set(NULL); 1517 if (hr->is_humongous()) { 1518 _humongous_regions_removed.increment(1u, hr->capacity()); 1519 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1520 } else { 1521 _old_regions_removed.increment(1u, hr->capacity()); 1522 _g1->free_region(hr, _local_cleanup_list, true); 1523 } 1524 } else { 1525 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1526 } 1527 1528 return false; 1529 } 1530 }; 1531 1532 class G1ParNoteEndTask: public AbstractGangTask { 1533 friend class G1NoteEndOfConcMarkClosure; 1534 1535 protected: 1536 G1CollectedHeap* _g1h; 1537 FreeRegionList* _cleanup_list; 1538 HeapRegionClaimer _hrclaimer; 1539 1540 public: 1541 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1542 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1543 } 1544 1545 void work(uint worker_id) { 1546 FreeRegionList local_cleanup_list("Local Cleanup List"); 1547 HRRSCleanupTask hrrs_cleanup_task; 1548 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1549 &hrrs_cleanup_task); 1550 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1551 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1552 1553 // Now update the lists 1554 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1555 { 1556 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1557 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1558 1559 // If we iterate over the global cleanup list at the end of 1560 // cleanup to do this printing we will not guarantee to only 1561 // generate output for the newly-reclaimed regions (the list 1562 // might not be empty at the beginning of cleanup; we might 1563 // still be working on its previous contents). So we do the 1564 // printing here, before we append the new regions to the global 1565 // cleanup list. 1566 1567 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1568 if (hr_printer->is_active()) { 1569 FreeRegionListIterator iter(&local_cleanup_list); 1570 while (iter.more_available()) { 1571 HeapRegion* hr = iter.get_next(); 1572 hr_printer->cleanup(hr); 1573 } 1574 } 1575 1576 _cleanup_list->add_ordered(&local_cleanup_list); 1577 assert(local_cleanup_list.is_empty(), "post-condition"); 1578 1579 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1580 } 1581 } 1582 }; 1583 1584 class G1ParScrubRemSetTask: public AbstractGangTask { 1585 protected: 1586 G1RemSet* _g1rs; 1587 BitMap* _region_bm; 1588 BitMap* _card_bm; 1589 HeapRegionClaimer _hrclaimer; 1590 1591 public: 1592 G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) : 1593 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) { 1594 } 1595 1596 void work(uint worker_id) { 1597 _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer); 1598 } 1599 1600 }; 1601 1602 void ConcurrentMark::cleanup() { 1603 // world is stopped at this checkpoint 1604 assert(SafepointSynchronize::is_at_safepoint(), 1605 "world should be stopped"); 1606 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1607 1608 // If a full collection has happened, we shouldn't do this. 1609 if (has_aborted()) { 1610 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1611 return; 1612 } 1613 1614 g1h->verify_region_sets_optional(); 1615 1616 if (VerifyDuringGC) { 1617 HandleMark hm; // handle scope 1618 g1h->prepare_for_verify(); 1619 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1620 } 1621 g1h->check_bitmaps("Cleanup Start"); 1622 1623 G1CollectorPolicy* g1p = g1h->g1_policy(); 1624 g1p->record_concurrent_mark_cleanup_start(); 1625 1626 double start = os::elapsedTime(); 1627 1628 HeapRegionRemSet::reset_for_cleanup_tasks(); 1629 1630 // Do counting once more with the world stopped for good measure. 1631 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1632 1633 g1h->workers()->run_task(&g1_par_count_task); 1634 1635 if (VerifyDuringGC) { 1636 // Verify that the counting data accumulated during marking matches 1637 // that calculated by walking the marking bitmap. 1638 1639 // Bitmaps to hold expected values 1640 BitMap expected_region_bm(_region_bm.size(), true); 1641 BitMap expected_card_bm(_card_bm.size(), true); 1642 1643 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1644 &_region_bm, 1645 &_card_bm, 1646 &expected_region_bm, 1647 &expected_card_bm); 1648 1649 g1h->workers()->run_task(&g1_par_verify_task); 1650 1651 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1652 } 1653 1654 size_t start_used_bytes = g1h->used(); 1655 g1h->collector_state()->set_mark_in_progress(false); 1656 1657 double count_end = os::elapsedTime(); 1658 double this_final_counting_time = (count_end - start); 1659 _total_counting_time += this_final_counting_time; 1660 1661 if (log_is_enabled(Trace, gc, liveness)) { 1662 G1PrintRegionLivenessInfoClosure cl("Post-Marking"); 1663 _g1h->heap_region_iterate(&cl); 1664 } 1665 1666 // Install newly created mark bitMap as "prev". 1667 swapMarkBitMaps(); 1668 1669 g1h->reset_gc_time_stamp(); 1670 1671 uint n_workers = _g1h->workers()->active_workers(); 1672 1673 // Note end of marking in all heap regions. 1674 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1675 g1h->workers()->run_task(&g1_par_note_end_task); 1676 g1h->check_gc_time_stamps(); 1677 1678 if (!cleanup_list_is_empty()) { 1679 // The cleanup list is not empty, so we'll have to process it 1680 // concurrently. Notify anyone else that might be wanting free 1681 // regions that there will be more free regions coming soon. 1682 g1h->set_free_regions_coming(); 1683 } 1684 1685 // call below, since it affects the metric by which we sort the heap 1686 // regions. 1687 if (G1ScrubRemSets) { 1688 double rs_scrub_start = os::elapsedTime(); 1689 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers); 1690 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1691 1692 double rs_scrub_end = os::elapsedTime(); 1693 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1694 _total_rs_scrub_time += this_rs_scrub_time; 1695 } 1696 1697 // this will also free any regions totally full of garbage objects, 1698 // and sort the regions. 1699 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1700 1701 // Statistics. 1702 double end = os::elapsedTime(); 1703 _cleanup_times.add((end - start) * 1000.0); 1704 1705 // Clean up will have freed any regions completely full of garbage. 1706 // Update the soft reference policy with the new heap occupancy. 1707 Universe::update_heap_info_at_gc(); 1708 1709 if (VerifyDuringGC) { 1710 HandleMark hm; // handle scope 1711 g1h->prepare_for_verify(); 1712 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)"); 1713 } 1714 1715 g1h->check_bitmaps("Cleanup End"); 1716 1717 g1h->verify_region_sets_optional(); 1718 1719 // We need to make this be a "collection" so any collection pause that 1720 // races with it goes around and waits for completeCleanup to finish. 1721 g1h->increment_total_collections(); 1722 1723 // Clean out dead classes and update Metaspace sizes. 1724 if (ClassUnloadingWithConcurrentMark) { 1725 ClassLoaderDataGraph::purge(); 1726 } 1727 MetaspaceGC::compute_new_size(); 1728 1729 // We reclaimed old regions so we should calculate the sizes to make 1730 // sure we update the old gen/space data. 1731 g1h->g1mm()->update_sizes(); 1732 g1h->allocation_context_stats().update_after_mark(); 1733 1734 g1h->trace_heap_after_concurrent_cycle(); 1735 } 1736 1737 void ConcurrentMark::completeCleanup() { 1738 if (has_aborted()) return; 1739 1740 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1741 1742 _cleanup_list.verify_optional(); 1743 FreeRegionList tmp_free_list("Tmp Free List"); 1744 1745 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1746 "cleanup list has %u entries", 1747 _cleanup_list.length()); 1748 1749 // No one else should be accessing the _cleanup_list at this point, 1750 // so it is not necessary to take any locks 1751 while (!_cleanup_list.is_empty()) { 1752 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 1753 assert(hr != NULL, "Got NULL from a non-empty list"); 1754 hr->par_clear(); 1755 tmp_free_list.add_ordered(hr); 1756 1757 // Instead of adding one region at a time to the secondary_free_list, 1758 // we accumulate them in the local list and move them a few at a 1759 // time. This also cuts down on the number of notify_all() calls 1760 // we do during this process. We'll also append the local list when 1761 // _cleanup_list is empty (which means we just removed the last 1762 // region from the _cleanup_list). 1763 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1764 _cleanup_list.is_empty()) { 1765 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1766 "appending %u entries to the secondary_free_list, " 1767 "cleanup list still has %u entries", 1768 tmp_free_list.length(), 1769 _cleanup_list.length()); 1770 1771 { 1772 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1773 g1h->secondary_free_list_add(&tmp_free_list); 1774 SecondaryFreeList_lock->notify_all(); 1775 } 1776 #ifndef PRODUCT 1777 if (G1StressConcRegionFreeing) { 1778 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1779 os::sleep(Thread::current(), (jlong) 1, false); 1780 } 1781 } 1782 #endif 1783 } 1784 } 1785 assert(tmp_free_list.is_empty(), "post-condition"); 1786 } 1787 1788 // Supporting Object and Oop closures for reference discovery 1789 // and processing in during marking 1790 1791 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1792 HeapWord* addr = (HeapWord*)obj; 1793 return addr != NULL && 1794 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1795 } 1796 1797 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1798 // Uses the CMTask associated with a worker thread (for serial reference 1799 // processing the CMTask for worker 0 is used) to preserve (mark) and 1800 // trace referent objects. 1801 // 1802 // Using the CMTask and embedded local queues avoids having the worker 1803 // threads operating on the global mark stack. This reduces the risk 1804 // of overflowing the stack - which we would rather avoid at this late 1805 // state. Also using the tasks' local queues removes the potential 1806 // of the workers interfering with each other that could occur if 1807 // operating on the global stack. 1808 1809 class G1CMKeepAliveAndDrainClosure: public OopClosure { 1810 ConcurrentMark* _cm; 1811 CMTask* _task; 1812 int _ref_counter_limit; 1813 int _ref_counter; 1814 bool _is_serial; 1815 public: 1816 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 1817 _cm(cm), _task(task), _is_serial(is_serial), 1818 _ref_counter_limit(G1RefProcDrainInterval) { 1819 assert(_ref_counter_limit > 0, "sanity"); 1820 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1821 _ref_counter = _ref_counter_limit; 1822 } 1823 1824 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1825 virtual void do_oop( oop* p) { do_oop_work(p); } 1826 1827 template <class T> void do_oop_work(T* p) { 1828 if (!_cm->has_overflown()) { 1829 oop obj = oopDesc::load_decode_heap_oop(p); 1830 _task->deal_with_reference(obj); 1831 _ref_counter--; 1832 1833 if (_ref_counter == 0) { 1834 // We have dealt with _ref_counter_limit references, pushing them 1835 // and objects reachable from them on to the local stack (and 1836 // possibly the global stack). Call CMTask::do_marking_step() to 1837 // process these entries. 1838 // 1839 // We call CMTask::do_marking_step() in a loop, which we'll exit if 1840 // there's nothing more to do (i.e. we're done with the entries that 1841 // were pushed as a result of the CMTask::deal_with_reference() calls 1842 // above) or we overflow. 1843 // 1844 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 1845 // flag while there may still be some work to do. (See the comment at 1846 // the beginning of CMTask::do_marking_step() for those conditions - 1847 // one of which is reaching the specified time target.) It is only 1848 // when CMTask::do_marking_step() returns without setting the 1849 // has_aborted() flag that the marking step has completed. 1850 do { 1851 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1852 _task->do_marking_step(mark_step_duration_ms, 1853 false /* do_termination */, 1854 _is_serial); 1855 } while (_task->has_aborted() && !_cm->has_overflown()); 1856 _ref_counter = _ref_counter_limit; 1857 } 1858 } 1859 } 1860 }; 1861 1862 // 'Drain' oop closure used by both serial and parallel reference processing. 1863 // Uses the CMTask associated with a given worker thread (for serial 1864 // reference processing the CMtask for worker 0 is used). Calls the 1865 // do_marking_step routine, with an unbelievably large timeout value, 1866 // to drain the marking data structures of the remaining entries 1867 // added by the 'keep alive' oop closure above. 1868 1869 class G1CMDrainMarkingStackClosure: public VoidClosure { 1870 ConcurrentMark* _cm; 1871 CMTask* _task; 1872 bool _is_serial; 1873 public: 1874 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 1875 _cm(cm), _task(task), _is_serial(is_serial) { 1876 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1877 } 1878 1879 void do_void() { 1880 do { 1881 // We call CMTask::do_marking_step() to completely drain the local 1882 // and global marking stacks of entries pushed by the 'keep alive' 1883 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 1884 // 1885 // CMTask::do_marking_step() is called in a loop, which we'll exit 1886 // if there's nothing more to do (i.e. we've completely drained the 1887 // entries that were pushed as a a result of applying the 'keep alive' 1888 // closure to the entries on the discovered ref lists) or we overflow 1889 // the global marking stack. 1890 // 1891 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 1892 // flag while there may still be some work to do. (See the comment at 1893 // the beginning of CMTask::do_marking_step() for those conditions - 1894 // one of which is reaching the specified time target.) It is only 1895 // when CMTask::do_marking_step() returns without setting the 1896 // has_aborted() flag that the marking step has completed. 1897 1898 _task->do_marking_step(1000000000.0 /* something very large */, 1899 true /* do_termination */, 1900 _is_serial); 1901 } while (_task->has_aborted() && !_cm->has_overflown()); 1902 } 1903 }; 1904 1905 // Implementation of AbstractRefProcTaskExecutor for parallel 1906 // reference processing at the end of G1 concurrent marking 1907 1908 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 1909 private: 1910 G1CollectedHeap* _g1h; 1911 ConcurrentMark* _cm; 1912 WorkGang* _workers; 1913 uint _active_workers; 1914 1915 public: 1916 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 1917 ConcurrentMark* cm, 1918 WorkGang* workers, 1919 uint n_workers) : 1920 _g1h(g1h), _cm(cm), 1921 _workers(workers), _active_workers(n_workers) { } 1922 1923 // Executes the given task using concurrent marking worker threads. 1924 virtual void execute(ProcessTask& task); 1925 virtual void execute(EnqueueTask& task); 1926 }; 1927 1928 class G1CMRefProcTaskProxy: public AbstractGangTask { 1929 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 1930 ProcessTask& _proc_task; 1931 G1CollectedHeap* _g1h; 1932 ConcurrentMark* _cm; 1933 1934 public: 1935 G1CMRefProcTaskProxy(ProcessTask& proc_task, 1936 G1CollectedHeap* g1h, 1937 ConcurrentMark* cm) : 1938 AbstractGangTask("Process reference objects in parallel"), 1939 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 1940 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1941 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 1942 } 1943 1944 virtual void work(uint worker_id) { 1945 ResourceMark rm; 1946 HandleMark hm; 1947 CMTask* task = _cm->task(worker_id); 1948 G1CMIsAliveClosure g1_is_alive(_g1h); 1949 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 1950 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 1951 1952 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 1953 } 1954 }; 1955 1956 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 1957 assert(_workers != NULL, "Need parallel worker threads."); 1958 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1959 1960 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 1961 1962 // We need to reset the concurrency level before each 1963 // proxy task execution, so that the termination protocol 1964 // and overflow handling in CMTask::do_marking_step() knows 1965 // how many workers to wait for. 1966 _cm->set_concurrency(_active_workers); 1967 _workers->run_task(&proc_task_proxy); 1968 } 1969 1970 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 1971 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 1972 EnqueueTask& _enq_task; 1973 1974 public: 1975 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 1976 AbstractGangTask("Enqueue reference objects in parallel"), 1977 _enq_task(enq_task) { } 1978 1979 virtual void work(uint worker_id) { 1980 _enq_task.work(worker_id); 1981 } 1982 }; 1983 1984 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 1985 assert(_workers != NULL, "Need parallel worker threads."); 1986 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1987 1988 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 1989 1990 // Not strictly necessary but... 1991 // 1992 // We need to reset the concurrency level before each 1993 // proxy task execution, so that the termination protocol 1994 // and overflow handling in CMTask::do_marking_step() knows 1995 // how many workers to wait for. 1996 _cm->set_concurrency(_active_workers); 1997 _workers->run_task(&enq_task_proxy); 1998 } 1999 2000 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 2001 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 2002 } 2003 2004 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2005 if (has_overflown()) { 2006 // Skip processing the discovered references if we have 2007 // overflown the global marking stack. Reference objects 2008 // only get discovered once so it is OK to not 2009 // de-populate the discovered reference lists. We could have, 2010 // but the only benefit would be that, when marking restarts, 2011 // less reference objects are discovered. 2012 return; 2013 } 2014 2015 ResourceMark rm; 2016 HandleMark hm; 2017 2018 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2019 2020 // Is alive closure. 2021 G1CMIsAliveClosure g1_is_alive(g1h); 2022 2023 // Inner scope to exclude the cleaning of the string and symbol 2024 // tables from the displayed time. 2025 { 2026 GCTraceTime(Debug, gc) trace("GC Ref Proc", g1h->gc_timer_cm()); 2027 2028 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2029 2030 // See the comment in G1CollectedHeap::ref_processing_init() 2031 // about how reference processing currently works in G1. 2032 2033 // Set the soft reference policy 2034 rp->setup_policy(clear_all_soft_refs); 2035 assert(_markStack.isEmpty(), "mark stack should be empty"); 2036 2037 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2038 // in serial reference processing. Note these closures are also 2039 // used for serially processing (by the the current thread) the 2040 // JNI references during parallel reference processing. 2041 // 2042 // These closures do not need to synchronize with the worker 2043 // threads involved in parallel reference processing as these 2044 // instances are executed serially by the current thread (e.g. 2045 // reference processing is not multi-threaded and is thus 2046 // performed by the current thread instead of a gang worker). 2047 // 2048 // The gang tasks involved in parallel reference processing create 2049 // their own instances of these closures, which do their own 2050 // synchronization among themselves. 2051 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2052 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2053 2054 // We need at least one active thread. If reference processing 2055 // is not multi-threaded we use the current (VMThread) thread, 2056 // otherwise we use the work gang from the G1CollectedHeap and 2057 // we utilize all the worker threads we can. 2058 bool processing_is_mt = rp->processing_is_mt(); 2059 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2060 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2061 2062 // Parallel processing task executor. 2063 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2064 g1h->workers(), active_workers); 2065 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2066 2067 // Set the concurrency level. The phase was already set prior to 2068 // executing the remark task. 2069 set_concurrency(active_workers); 2070 2071 // Set the degree of MT processing here. If the discovery was done MT, 2072 // the number of threads involved during discovery could differ from 2073 // the number of active workers. This is OK as long as the discovered 2074 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2075 rp->set_active_mt_degree(active_workers); 2076 2077 // Process the weak references. 2078 const ReferenceProcessorStats& stats = 2079 rp->process_discovered_references(&g1_is_alive, 2080 &g1_keep_alive, 2081 &g1_drain_mark_stack, 2082 executor, 2083 g1h->gc_timer_cm()); 2084 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2085 2086 // The do_oop work routines of the keep_alive and drain_marking_stack 2087 // oop closures will set the has_overflown flag if we overflow the 2088 // global marking stack. 2089 2090 assert(_markStack.overflow() || _markStack.isEmpty(), 2091 "mark stack should be empty (unless it overflowed)"); 2092 2093 if (_markStack.overflow()) { 2094 // This should have been done already when we tried to push an 2095 // entry on to the global mark stack. But let's do it again. 2096 set_has_overflown(); 2097 } 2098 2099 assert(rp->num_q() == active_workers, "why not"); 2100 2101 rp->enqueue_discovered_references(executor); 2102 2103 rp->verify_no_references_recorded(); 2104 assert(!rp->discovery_enabled(), "Post condition"); 2105 } 2106 2107 if (has_overflown()) { 2108 // We can not trust g1_is_alive if the marking stack overflowed 2109 return; 2110 } 2111 2112 assert(_markStack.isEmpty(), "Marking should have completed"); 2113 2114 // Unload Klasses, String, Symbols, Code Cache, etc. 2115 { 2116 GCTraceTime(Debug, gc) trace("Unloading", g1h->gc_timer_cm()); 2117 2118 if (ClassUnloadingWithConcurrentMark) { 2119 bool purged_classes; 2120 2121 { 2122 GCTraceTime(Trace, gc) trace("System Dictionary Unloading", g1h->gc_timer_cm()); 2123 purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */); 2124 } 2125 2126 { 2127 GCTraceTime(Trace, gc) trace("Parallel Unloading", g1h->gc_timer_cm()); 2128 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2129 } 2130 } 2131 2132 if (G1StringDedup::is_enabled()) { 2133 GCTraceTime(Trace, gc) trace("String Deduplication Unlink", g1h->gc_timer_cm()); 2134 G1StringDedup::unlink(&g1_is_alive); 2135 } 2136 } 2137 } 2138 2139 void ConcurrentMark::swapMarkBitMaps() { 2140 CMBitMapRO* temp = _prevMarkBitMap; 2141 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2142 _nextMarkBitMap = (CMBitMap*) temp; 2143 } 2144 2145 // Closure for marking entries in SATB buffers. 2146 class CMSATBBufferClosure : public SATBBufferClosure { 2147 private: 2148 CMTask* _task; 2149 G1CollectedHeap* _g1h; 2150 2151 // This is very similar to CMTask::deal_with_reference, but with 2152 // more relaxed requirements for the argument, so this must be more 2153 // circumspect about treating the argument as an object. 2154 void do_entry(void* entry) const { 2155 _task->increment_refs_reached(); 2156 HeapRegion* hr = _g1h->heap_region_containing(entry); 2157 if (entry < hr->next_top_at_mark_start()) { 2158 // Until we get here, we don't know whether entry refers to a valid 2159 // object; it could instead have been a stale reference. 2160 oop obj = static_cast<oop>(entry); 2161 assert(obj->is_oop(true /* ignore mark word */), 2162 "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)); 2163 _task->make_reference_grey(obj, hr); 2164 } 2165 } 2166 2167 public: 2168 CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h) 2169 : _task(task), _g1h(g1h) { } 2170 2171 virtual void do_buffer(void** buffer, size_t size) { 2172 for (size_t i = 0; i < size; ++i) { 2173 do_entry(buffer[i]); 2174 } 2175 } 2176 }; 2177 2178 class G1RemarkThreadsClosure : public ThreadClosure { 2179 CMSATBBufferClosure _cm_satb_cl; 2180 G1CMOopClosure _cm_cl; 2181 MarkingCodeBlobClosure _code_cl; 2182 int _thread_parity; 2183 2184 public: 2185 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) : 2186 _cm_satb_cl(task, g1h), 2187 _cm_cl(g1h, g1h->concurrent_mark(), task), 2188 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2189 _thread_parity(Threads::thread_claim_parity()) {} 2190 2191 void do_thread(Thread* thread) { 2192 if (thread->is_Java_thread()) { 2193 if (thread->claim_oops_do(true, _thread_parity)) { 2194 JavaThread* jt = (JavaThread*)thread; 2195 2196 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2197 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2198 // * Alive if on the stack of an executing method 2199 // * Weakly reachable otherwise 2200 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2201 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2202 jt->nmethods_do(&_code_cl); 2203 2204 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 2205 } 2206 } else if (thread->is_VM_thread()) { 2207 if (thread->claim_oops_do(true, _thread_parity)) { 2208 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 2209 } 2210 } 2211 } 2212 }; 2213 2214 class CMRemarkTask: public AbstractGangTask { 2215 private: 2216 ConcurrentMark* _cm; 2217 public: 2218 void work(uint worker_id) { 2219 // Since all available tasks are actually started, we should 2220 // only proceed if we're supposed to be active. 2221 if (worker_id < _cm->active_tasks()) { 2222 CMTask* task = _cm->task(worker_id); 2223 task->record_start_time(); 2224 { 2225 ResourceMark rm; 2226 HandleMark hm; 2227 2228 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 2229 Threads::threads_do(&threads_f); 2230 } 2231 2232 do { 2233 task->do_marking_step(1000000000.0 /* something very large */, 2234 true /* do_termination */, 2235 false /* is_serial */); 2236 } while (task->has_aborted() && !_cm->has_overflown()); 2237 // If we overflow, then we do not want to restart. We instead 2238 // want to abort remark and do concurrent marking again. 2239 task->record_end_time(); 2240 } 2241 } 2242 2243 CMRemarkTask(ConcurrentMark* cm, uint active_workers) : 2244 AbstractGangTask("Par Remark"), _cm(cm) { 2245 _cm->terminator()->reset_for_reuse(active_workers); 2246 } 2247 }; 2248 2249 void ConcurrentMark::checkpointRootsFinalWork() { 2250 ResourceMark rm; 2251 HandleMark hm; 2252 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2253 2254 GCTraceTime(Debug, gc) trace("Finalize Marking", g1h->gc_timer_cm()); 2255 2256 g1h->ensure_parsability(false); 2257 2258 // this is remark, so we'll use up all active threads 2259 uint active_workers = g1h->workers()->active_workers(); 2260 set_concurrency_and_phase(active_workers, false /* concurrent */); 2261 // Leave _parallel_marking_threads at it's 2262 // value originally calculated in the ConcurrentMark 2263 // constructor and pass values of the active workers 2264 // through the gang in the task. 2265 2266 { 2267 StrongRootsScope srs(active_workers); 2268 2269 CMRemarkTask remarkTask(this, active_workers); 2270 // We will start all available threads, even if we decide that the 2271 // active_workers will be fewer. The extra ones will just bail out 2272 // immediately. 2273 g1h->workers()->run_task(&remarkTask); 2274 } 2275 2276 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2277 guarantee(has_overflown() || 2278 satb_mq_set.completed_buffers_num() == 0, 2279 "Invariant: has_overflown = %s, num buffers = %d", 2280 BOOL_TO_STR(has_overflown()), 2281 satb_mq_set.completed_buffers_num()); 2282 2283 print_stats(); 2284 } 2285 2286 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2287 // Note we are overriding the read-only view of the prev map here, via 2288 // the cast. 2289 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2290 } 2291 2292 HeapRegion* 2293 ConcurrentMark::claim_region(uint worker_id) { 2294 // "checkpoint" the finger 2295 HeapWord* finger = _finger; 2296 2297 // _heap_end will not change underneath our feet; it only changes at 2298 // yield points. 2299 while (finger < _heap_end) { 2300 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2301 2302 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 2303 2304 // Above heap_region_containing may return NULL as we always scan claim 2305 // until the end of the heap. In this case, just jump to the next region. 2306 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 2307 2308 // Is the gap between reading the finger and doing the CAS too long? 2309 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2310 if (res == finger && curr_region != NULL) { 2311 // we succeeded 2312 HeapWord* bottom = curr_region->bottom(); 2313 HeapWord* limit = curr_region->next_top_at_mark_start(); 2314 2315 // notice that _finger == end cannot be guaranteed here since, 2316 // someone else might have moved the finger even further 2317 assert(_finger >= end, "the finger should have moved forward"); 2318 2319 if (limit > bottom) { 2320 return curr_region; 2321 } else { 2322 assert(limit == bottom, 2323 "the region limit should be at bottom"); 2324 // we return NULL and the caller should try calling 2325 // claim_region() again. 2326 return NULL; 2327 } 2328 } else { 2329 assert(_finger > finger, "the finger should have moved forward"); 2330 // read it again 2331 finger = _finger; 2332 } 2333 } 2334 2335 return NULL; 2336 } 2337 2338 #ifndef PRODUCT 2339 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 2340 private: 2341 G1CollectedHeap* _g1h; 2342 const char* _phase; 2343 int _info; 2344 2345 public: 2346 VerifyNoCSetOops(const char* phase, int info = -1) : 2347 _g1h(G1CollectedHeap::heap()), 2348 _phase(phase), 2349 _info(info) 2350 { } 2351 2352 void operator()(oop obj) const { 2353 guarantee(obj->is_oop(), 2354 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 2355 p2i(obj), _phase, _info); 2356 guarantee(!_g1h->obj_in_cs(obj), 2357 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 2358 p2i(obj), _phase, _info); 2359 } 2360 }; 2361 2362 void ConcurrentMark::verify_no_cset_oops() { 2363 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2364 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 2365 return; 2366 } 2367 2368 // Verify entries on the global mark stack 2369 _markStack.iterate(VerifyNoCSetOops("Stack")); 2370 2371 // Verify entries on the task queues 2372 for (uint i = 0; i < _max_worker_id; ++i) { 2373 CMTaskQueue* queue = _task_queues->queue(i); 2374 queue->iterate(VerifyNoCSetOops("Queue", i)); 2375 } 2376 2377 // Verify the global finger 2378 HeapWord* global_finger = finger(); 2379 if (global_finger != NULL && global_finger < _heap_end) { 2380 // Since we always iterate over all regions, we might get a NULL HeapRegion 2381 // here. 2382 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 2383 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 2384 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 2385 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 2386 } 2387 2388 // Verify the task fingers 2389 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2390 for (uint i = 0; i < parallel_marking_threads(); ++i) { 2391 CMTask* task = _tasks[i]; 2392 HeapWord* task_finger = task->finger(); 2393 if (task_finger != NULL && task_finger < _heap_end) { 2394 // See above note on the global finger verification. 2395 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 2396 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 2397 !task_hr->in_collection_set(), 2398 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 2399 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 2400 } 2401 } 2402 } 2403 #endif // PRODUCT 2404 2405 // Aggregate the counting data that was constructed concurrently 2406 // with marking. 2407 class AggregateCountDataHRClosure: public HeapRegionClosure { 2408 G1CollectedHeap* _g1h; 2409 ConcurrentMark* _cm; 2410 CardTableModRefBS* _ct_bs; 2411 BitMap* _cm_card_bm; 2412 uint _max_worker_id; 2413 2414 public: 2415 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2416 BitMap* cm_card_bm, 2417 uint max_worker_id) : 2418 _g1h(g1h), _cm(g1h->concurrent_mark()), 2419 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 2420 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 2421 2422 bool doHeapRegion(HeapRegion* hr) { 2423 HeapWord* start = hr->bottom(); 2424 HeapWord* limit = hr->next_top_at_mark_start(); 2425 HeapWord* end = hr->end(); 2426 2427 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2428 "Preconditions not met - " 2429 "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", " 2430 "top: " PTR_FORMAT ", end: " PTR_FORMAT, 2431 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())); 2432 2433 assert(hr->next_marked_bytes() == 0, "Precondition"); 2434 2435 if (start == limit) { 2436 // NTAMS of this region has not been set so nothing to do. 2437 return false; 2438 } 2439 2440 // 'start' should be in the heap. 2441 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2442 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 2443 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2444 2445 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2446 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2447 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2448 2449 // If ntams is not card aligned then we bump card bitmap index 2450 // for limit so that we get the all the cards spanned by 2451 // the object ending at ntams. 2452 // Note: if this is the last region in the heap then ntams 2453 // could be actually just beyond the end of the the heap; 2454 // limit_idx will then correspond to a (non-existent) card 2455 // that is also outside the heap. 2456 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2457 limit_idx += 1; 2458 } 2459 2460 assert(limit_idx <= end_idx, "or else use atomics"); 2461 2462 // Aggregate the "stripe" in the count data associated with hr. 2463 uint hrm_index = hr->hrm_index(); 2464 size_t marked_bytes = 0; 2465 2466 for (uint i = 0; i < _max_worker_id; i += 1) { 2467 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2468 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2469 2470 // Fetch the marked_bytes in this region for task i and 2471 // add it to the running total for this region. 2472 marked_bytes += marked_bytes_array[hrm_index]; 2473 2474 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 2475 // into the global card bitmap. 2476 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2477 2478 while (scan_idx < limit_idx) { 2479 assert(task_card_bm->at(scan_idx) == true, "should be"); 2480 _cm_card_bm->set_bit(scan_idx); 2481 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2482 2483 // BitMap::get_next_one_offset() can handle the case when 2484 // its left_offset parameter is greater than its right_offset 2485 // parameter. It does, however, have an early exit if 2486 // left_offset == right_offset. So let's limit the value 2487 // passed in for left offset here. 2488 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2489 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2490 } 2491 } 2492 2493 // Update the marked bytes for this region. 2494 hr->add_to_marked_bytes(marked_bytes); 2495 2496 // Next heap region 2497 return false; 2498 } 2499 }; 2500 2501 class G1AggregateCountDataTask: public AbstractGangTask { 2502 protected: 2503 G1CollectedHeap* _g1h; 2504 ConcurrentMark* _cm; 2505 BitMap* _cm_card_bm; 2506 uint _max_worker_id; 2507 uint _active_workers; 2508 HeapRegionClaimer _hrclaimer; 2509 2510 public: 2511 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2512 ConcurrentMark* cm, 2513 BitMap* cm_card_bm, 2514 uint max_worker_id, 2515 uint n_workers) : 2516 AbstractGangTask("Count Aggregation"), 2517 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2518 _max_worker_id(max_worker_id), 2519 _active_workers(n_workers), 2520 _hrclaimer(_active_workers) { 2521 } 2522 2523 void work(uint worker_id) { 2524 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 2525 2526 _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer); 2527 } 2528 }; 2529 2530 2531 void ConcurrentMark::aggregate_count_data() { 2532 uint n_workers = _g1h->workers()->active_workers(); 2533 2534 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 2535 _max_worker_id, n_workers); 2536 2537 _g1h->workers()->run_task(&g1_par_agg_task); 2538 } 2539 2540 // Clear the per-worker arrays used to store the per-region counting data 2541 void ConcurrentMark::clear_all_count_data() { 2542 // Clear the global card bitmap - it will be filled during 2543 // liveness count aggregation (during remark) and the 2544 // final counting task. 2545 _card_bm.clear(); 2546 2547 // Clear the global region bitmap - it will be filled as part 2548 // of the final counting task. 2549 _region_bm.clear(); 2550 2551 uint max_regions = _g1h->max_regions(); 2552 assert(_max_worker_id > 0, "uninitialized"); 2553 2554 for (uint i = 0; i < _max_worker_id; i += 1) { 2555 BitMap* task_card_bm = count_card_bitmap_for(i); 2556 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 2557 2558 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 2559 assert(marked_bytes_array != NULL, "uninitialized"); 2560 2561 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 2562 task_card_bm->clear(); 2563 } 2564 } 2565 2566 void ConcurrentMark::print_stats() { 2567 if (!log_is_enabled(Debug, gc, stats)) { 2568 return; 2569 } 2570 log_debug(gc, stats)("---------------------------------------------------------------------"); 2571 for (size_t i = 0; i < _active_tasks; ++i) { 2572 _tasks[i]->print_stats(); 2573 log_debug(gc, stats)("---------------------------------------------------------------------"); 2574 } 2575 } 2576 2577 // abandon current marking iteration due to a Full GC 2578 void ConcurrentMark::abort() { 2579 if (!cmThread()->during_cycle() || _has_aborted) { 2580 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2581 return; 2582 } 2583 2584 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2585 // concurrent bitmap clearing. 2586 _nextMarkBitMap->clearAll(); 2587 2588 // Note we cannot clear the previous marking bitmap here 2589 // since VerifyDuringGC verifies the objects marked during 2590 // a full GC against the previous bitmap. 2591 2592 // Clear the liveness counting data 2593 clear_all_count_data(); 2594 // Empty mark stack 2595 reset_marking_state(); 2596 for (uint i = 0; i < _max_worker_id; ++i) { 2597 _tasks[i]->clear_region_fields(); 2598 } 2599 _first_overflow_barrier_sync.abort(); 2600 _second_overflow_barrier_sync.abort(); 2601 _has_aborted = true; 2602 2603 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2604 satb_mq_set.abandon_partial_marking(); 2605 // This can be called either during or outside marking, we'll read 2606 // the expected_active value from the SATB queue set. 2607 satb_mq_set.set_active_all_threads( 2608 false, /* new active value */ 2609 satb_mq_set.is_active() /* expected_active */); 2610 2611 _g1h->trace_heap_after_concurrent_cycle(); 2612 _g1h->register_concurrent_cycle_end(); 2613 } 2614 2615 static void print_ms_time_info(const char* prefix, const char* name, 2616 NumberSeq& ns) { 2617 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2618 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2619 if (ns.num() > 0) { 2620 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2621 prefix, ns.sd(), ns.maximum()); 2622 } 2623 } 2624 2625 void ConcurrentMark::print_summary_info() { 2626 LogHandle(gc, marking) log; 2627 if (!log.is_trace()) { 2628 return; 2629 } 2630 2631 log.trace(" Concurrent marking:"); 2632 print_ms_time_info(" ", "init marks", _init_times); 2633 print_ms_time_info(" ", "remarks", _remark_times); 2634 { 2635 print_ms_time_info(" ", "final marks", _remark_mark_times); 2636 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2637 2638 } 2639 print_ms_time_info(" ", "cleanups", _cleanup_times); 2640 log.trace(" Final counting total time = %8.2f s (avg = %8.2f ms).", 2641 _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2642 if (G1ScrubRemSets) { 2643 log.trace(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2644 _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2645 } 2646 log.trace(" Total stop_world time = %8.2f s.", 2647 (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); 2648 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).", 2649 cmThread()->vtime_accum(), cmThread()->vtime_mark_accum()); 2650 } 2651 2652 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2653 _parallel_workers->print_worker_threads_on(st); 2654 } 2655 2656 void ConcurrentMark::print_on_error(outputStream* st) const { 2657 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2658 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 2659 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 2660 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 2661 } 2662 2663 // We take a break if someone is trying to stop the world. 2664 bool ConcurrentMark::do_yield_check(uint worker_id) { 2665 if (SuspendibleThreadSet::should_yield()) { 2666 if (worker_id == 0) { 2667 _g1h->g1_policy()->record_concurrent_pause(); 2668 } 2669 SuspendibleThreadSet::yield(); 2670 return true; 2671 } else { 2672 return false; 2673 } 2674 } 2675 2676 // Closure for iteration over bitmaps 2677 class CMBitMapClosure : public BitMapClosure { 2678 private: 2679 // the bitmap that is being iterated over 2680 CMBitMap* _nextMarkBitMap; 2681 ConcurrentMark* _cm; 2682 CMTask* _task; 2683 2684 public: 2685 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 2686 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 2687 2688 bool do_bit(size_t offset) { 2689 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 2690 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 2691 assert( addr < _cm->finger(), "invariant"); 2692 assert(addr >= _task->finger(), "invariant"); 2693 2694 // We move that task's local finger along. 2695 _task->move_finger_to(addr); 2696 2697 _task->scan_object(oop(addr)); 2698 // we only partially drain the local queue and global stack 2699 _task->drain_local_queue(true); 2700 _task->drain_global_stack(true); 2701 2702 // if the has_aborted flag has been raised, we need to bail out of 2703 // the iteration 2704 return !_task->has_aborted(); 2705 } 2706 }; 2707 2708 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2709 ReferenceProcessor* result = NULL; 2710 if (G1UseConcMarkReferenceProcessing) { 2711 result = g1h->ref_processor_cm(); 2712 assert(result != NULL, "should not be NULL"); 2713 } 2714 return result; 2715 } 2716 2717 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2718 ConcurrentMark* cm, 2719 CMTask* task) 2720 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2721 _g1h(g1h), _cm(cm), _task(task) 2722 { } 2723 2724 void CMTask::setup_for_region(HeapRegion* hr) { 2725 assert(hr != NULL, 2726 "claim_region() should have filtered out NULL regions"); 2727 _curr_region = hr; 2728 _finger = hr->bottom(); 2729 update_region_limit(); 2730 } 2731 2732 void CMTask::update_region_limit() { 2733 HeapRegion* hr = _curr_region; 2734 HeapWord* bottom = hr->bottom(); 2735 HeapWord* limit = hr->next_top_at_mark_start(); 2736 2737 if (limit == bottom) { 2738 // The region was collected underneath our feet. 2739 // We set the finger to bottom to ensure that the bitmap 2740 // iteration that will follow this will not do anything. 2741 // (this is not a condition that holds when we set the region up, 2742 // as the region is not supposed to be empty in the first place) 2743 _finger = bottom; 2744 } else if (limit >= _region_limit) { 2745 assert(limit >= _finger, "peace of mind"); 2746 } else { 2747 assert(limit < _region_limit, "only way to get here"); 2748 // This can happen under some pretty unusual circumstances. An 2749 // evacuation pause empties the region underneath our feet (NTAMS 2750 // at bottom). We then do some allocation in the region (NTAMS 2751 // stays at bottom), followed by the region being used as a GC 2752 // alloc region (NTAMS will move to top() and the objects 2753 // originally below it will be grayed). All objects now marked in 2754 // the region are explicitly grayed, if below the global finger, 2755 // and we do not need in fact to scan anything else. So, we simply 2756 // set _finger to be limit to ensure that the bitmap iteration 2757 // doesn't do anything. 2758 _finger = limit; 2759 } 2760 2761 _region_limit = limit; 2762 } 2763 2764 void CMTask::giveup_current_region() { 2765 assert(_curr_region != NULL, "invariant"); 2766 clear_region_fields(); 2767 } 2768 2769 void CMTask::clear_region_fields() { 2770 // Values for these three fields that indicate that we're not 2771 // holding on to a region. 2772 _curr_region = NULL; 2773 _finger = NULL; 2774 _region_limit = NULL; 2775 } 2776 2777 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2778 if (cm_oop_closure == NULL) { 2779 assert(_cm_oop_closure != NULL, "invariant"); 2780 } else { 2781 assert(_cm_oop_closure == NULL, "invariant"); 2782 } 2783 _cm_oop_closure = cm_oop_closure; 2784 } 2785 2786 void CMTask::reset(CMBitMap* nextMarkBitMap) { 2787 guarantee(nextMarkBitMap != NULL, "invariant"); 2788 _nextMarkBitMap = nextMarkBitMap; 2789 clear_region_fields(); 2790 2791 _calls = 0; 2792 _elapsed_time_ms = 0.0; 2793 _termination_time_ms = 0.0; 2794 _termination_start_time_ms = 0.0; 2795 } 2796 2797 bool CMTask::should_exit_termination() { 2798 regular_clock_call(); 2799 // This is called when we are in the termination protocol. We should 2800 // quit if, for some reason, this task wants to abort or the global 2801 // stack is not empty (this means that we can get work from it). 2802 return !_cm->mark_stack_empty() || has_aborted(); 2803 } 2804 2805 void CMTask::reached_limit() { 2806 assert(_words_scanned >= _words_scanned_limit || 2807 _refs_reached >= _refs_reached_limit , 2808 "shouldn't have been called otherwise"); 2809 regular_clock_call(); 2810 } 2811 2812 void CMTask::regular_clock_call() { 2813 if (has_aborted()) return; 2814 2815 // First, we need to recalculate the words scanned and refs reached 2816 // limits for the next clock call. 2817 recalculate_limits(); 2818 2819 // During the regular clock call we do the following 2820 2821 // (1) If an overflow has been flagged, then we abort. 2822 if (_cm->has_overflown()) { 2823 set_has_aborted(); 2824 return; 2825 } 2826 2827 // If we are not concurrent (i.e. we're doing remark) we don't need 2828 // to check anything else. The other steps are only needed during 2829 // the concurrent marking phase. 2830 if (!concurrent()) return; 2831 2832 // (2) If marking has been aborted for Full GC, then we also abort. 2833 if (_cm->has_aborted()) { 2834 set_has_aborted(); 2835 return; 2836 } 2837 2838 double curr_time_ms = os::elapsedVTime() * 1000.0; 2839 2840 // (4) We check whether we should yield. If we have to, then we abort. 2841 if (SuspendibleThreadSet::should_yield()) { 2842 // We should yield. To do this we abort the task. The caller is 2843 // responsible for yielding. 2844 set_has_aborted(); 2845 return; 2846 } 2847 2848 // (5) We check whether we've reached our time quota. If we have, 2849 // then we abort. 2850 double elapsed_time_ms = curr_time_ms - _start_time_ms; 2851 if (elapsed_time_ms > _time_target_ms) { 2852 set_has_aborted(); 2853 _has_timed_out = true; 2854 return; 2855 } 2856 2857 // (6) Finally, we check whether there are enough completed STAB 2858 // buffers available for processing. If there are, we abort. 2859 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2860 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 2861 // we do need to process SATB buffers, we'll abort and restart 2862 // the marking task to do so 2863 set_has_aborted(); 2864 return; 2865 } 2866 } 2867 2868 void CMTask::recalculate_limits() { 2869 _real_words_scanned_limit = _words_scanned + words_scanned_period; 2870 _words_scanned_limit = _real_words_scanned_limit; 2871 2872 _real_refs_reached_limit = _refs_reached + refs_reached_period; 2873 _refs_reached_limit = _real_refs_reached_limit; 2874 } 2875 2876 void CMTask::decrease_limits() { 2877 // This is called when we believe that we're going to do an infrequent 2878 // operation which will increase the per byte scanned cost (i.e. move 2879 // entries to/from the global stack). It basically tries to decrease the 2880 // scanning limit so that the clock is called earlier. 2881 2882 _words_scanned_limit = _real_words_scanned_limit - 2883 3 * words_scanned_period / 4; 2884 _refs_reached_limit = _real_refs_reached_limit - 2885 3 * refs_reached_period / 4; 2886 } 2887 2888 void CMTask::move_entries_to_global_stack() { 2889 // local array where we'll store the entries that will be popped 2890 // from the local queue 2891 oop buffer[global_stack_transfer_size]; 2892 2893 int n = 0; 2894 oop obj; 2895 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 2896 buffer[n] = obj; 2897 ++n; 2898 } 2899 2900 if (n > 0) { 2901 // we popped at least one entry from the local queue 2902 2903 if (!_cm->mark_stack_push(buffer, n)) { 2904 set_has_aborted(); 2905 } 2906 } 2907 2908 // this operation was quite expensive, so decrease the limits 2909 decrease_limits(); 2910 } 2911 2912 void CMTask::get_entries_from_global_stack() { 2913 // local array where we'll store the entries that will be popped 2914 // from the global stack. 2915 oop buffer[global_stack_transfer_size]; 2916 int n; 2917 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 2918 assert(n <= global_stack_transfer_size, 2919 "we should not pop more than the given limit"); 2920 if (n > 0) { 2921 // yes, we did actually pop at least one entry 2922 for (int i = 0; i < n; ++i) { 2923 bool success = _task_queue->push(buffer[i]); 2924 // We only call this when the local queue is empty or under a 2925 // given target limit. So, we do not expect this push to fail. 2926 assert(success, "invariant"); 2927 } 2928 } 2929 2930 // this operation was quite expensive, so decrease the limits 2931 decrease_limits(); 2932 } 2933 2934 void CMTask::drain_local_queue(bool partially) { 2935 if (has_aborted()) return; 2936 2937 // Decide what the target size is, depending whether we're going to 2938 // drain it partially (so that other tasks can steal if they run out 2939 // of things to do) or totally (at the very end). 2940 size_t target_size; 2941 if (partially) { 2942 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 2943 } else { 2944 target_size = 0; 2945 } 2946 2947 if (_task_queue->size() > target_size) { 2948 oop obj; 2949 bool ret = _task_queue->pop_local(obj); 2950 while (ret) { 2951 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 2952 assert(!_g1h->is_on_master_free_list( 2953 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 2954 2955 scan_object(obj); 2956 2957 if (_task_queue->size() <= target_size || has_aborted()) { 2958 ret = false; 2959 } else { 2960 ret = _task_queue->pop_local(obj); 2961 } 2962 } 2963 } 2964 } 2965 2966 void CMTask::drain_global_stack(bool partially) { 2967 if (has_aborted()) return; 2968 2969 // We have a policy to drain the local queue before we attempt to 2970 // drain the global stack. 2971 assert(partially || _task_queue->size() == 0, "invariant"); 2972 2973 // Decide what the target size is, depending whether we're going to 2974 // drain it partially (so that other tasks can steal if they run out 2975 // of things to do) or totally (at the very end). Notice that, 2976 // because we move entries from the global stack in chunks or 2977 // because another task might be doing the same, we might in fact 2978 // drop below the target. But, this is not a problem. 2979 size_t target_size; 2980 if (partially) { 2981 target_size = _cm->partial_mark_stack_size_target(); 2982 } else { 2983 target_size = 0; 2984 } 2985 2986 if (_cm->mark_stack_size() > target_size) { 2987 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 2988 get_entries_from_global_stack(); 2989 drain_local_queue(partially); 2990 } 2991 } 2992 } 2993 2994 // SATB Queue has several assumptions on whether to call the par or 2995 // non-par versions of the methods. this is why some of the code is 2996 // replicated. We should really get rid of the single-threaded version 2997 // of the code to simplify things. 2998 void CMTask::drain_satb_buffers() { 2999 if (has_aborted()) return; 3000 3001 // We set this so that the regular clock knows that we're in the 3002 // middle of draining buffers and doesn't set the abort flag when it 3003 // notices that SATB buffers are available for draining. It'd be 3004 // very counter productive if it did that. :-) 3005 _draining_satb_buffers = true; 3006 3007 CMSATBBufferClosure satb_cl(this, _g1h); 3008 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3009 3010 // This keeps claiming and applying the closure to completed buffers 3011 // until we run out of buffers or we need to abort. 3012 while (!has_aborted() && 3013 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 3014 regular_clock_call(); 3015 } 3016 3017 _draining_satb_buffers = false; 3018 3019 assert(has_aborted() || 3020 concurrent() || 3021 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3022 3023 // again, this was a potentially expensive operation, decrease the 3024 // limits to get the regular clock call early 3025 decrease_limits(); 3026 } 3027 3028 void CMTask::print_stats() { 3029 log_debug(gc, stats)("Marking Stats, task = %u, calls = %d", 3030 _worker_id, _calls); 3031 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3032 _elapsed_time_ms, _termination_time_ms); 3033 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3034 _step_times_ms.num(), _step_times_ms.avg(), 3035 _step_times_ms.sd()); 3036 log_debug(gc, stats)(" max = %1.2lfms, total = %1.2lfms", 3037 _step_times_ms.maximum(), _step_times_ms.sum()); 3038 } 3039 3040 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) { 3041 return _task_queues->steal(worker_id, hash_seed, obj); 3042 } 3043 3044 /***************************************************************************** 3045 3046 The do_marking_step(time_target_ms, ...) method is the building 3047 block of the parallel marking framework. It can be called in parallel 3048 with other invocations of do_marking_step() on different tasks 3049 (but only one per task, obviously) and concurrently with the 3050 mutator threads, or during remark, hence it eliminates the need 3051 for two versions of the code. When called during remark, it will 3052 pick up from where the task left off during the concurrent marking 3053 phase. Interestingly, tasks are also claimable during evacuation 3054 pauses too, since do_marking_step() ensures that it aborts before 3055 it needs to yield. 3056 3057 The data structures that it uses to do marking work are the 3058 following: 3059 3060 (1) Marking Bitmap. If there are gray objects that appear only 3061 on the bitmap (this happens either when dealing with an overflow 3062 or when the initial marking phase has simply marked the roots 3063 and didn't push them on the stack), then tasks claim heap 3064 regions whose bitmap they then scan to find gray objects. A 3065 global finger indicates where the end of the last claimed region 3066 is. A local finger indicates how far into the region a task has 3067 scanned. The two fingers are used to determine how to gray an 3068 object (i.e. whether simply marking it is OK, as it will be 3069 visited by a task in the future, or whether it needs to be also 3070 pushed on a stack). 3071 3072 (2) Local Queue. The local queue of the task which is accessed 3073 reasonably efficiently by the task. Other tasks can steal from 3074 it when they run out of work. Throughout the marking phase, a 3075 task attempts to keep its local queue short but not totally 3076 empty, so that entries are available for stealing by other 3077 tasks. Only when there is no more work, a task will totally 3078 drain its local queue. 3079 3080 (3) Global Mark Stack. This handles local queue overflow. During 3081 marking only sets of entries are moved between it and the local 3082 queues, as access to it requires a mutex and more fine-grain 3083 interaction with it which might cause contention. If it 3084 overflows, then the marking phase should restart and iterate 3085 over the bitmap to identify gray objects. Throughout the marking 3086 phase, tasks attempt to keep the global mark stack at a small 3087 length but not totally empty, so that entries are available for 3088 popping by other tasks. Only when there is no more work, tasks 3089 will totally drain the global mark stack. 3090 3091 (4) SATB Buffer Queue. This is where completed SATB buffers are 3092 made available. Buffers are regularly removed from this queue 3093 and scanned for roots, so that the queue doesn't get too 3094 long. During remark, all completed buffers are processed, as 3095 well as the filled in parts of any uncompleted buffers. 3096 3097 The do_marking_step() method tries to abort when the time target 3098 has been reached. There are a few other cases when the 3099 do_marking_step() method also aborts: 3100 3101 (1) When the marking phase has been aborted (after a Full GC). 3102 3103 (2) When a global overflow (on the global stack) has been 3104 triggered. Before the task aborts, it will actually sync up with 3105 the other tasks to ensure that all the marking data structures 3106 (local queues, stacks, fingers etc.) are re-initialized so that 3107 when do_marking_step() completes, the marking phase can 3108 immediately restart. 3109 3110 (3) When enough completed SATB buffers are available. The 3111 do_marking_step() method only tries to drain SATB buffers right 3112 at the beginning. So, if enough buffers are available, the 3113 marking step aborts and the SATB buffers are processed at 3114 the beginning of the next invocation. 3115 3116 (4) To yield. when we have to yield then we abort and yield 3117 right at the end of do_marking_step(). This saves us from a lot 3118 of hassle as, by yielding we might allow a Full GC. If this 3119 happens then objects will be compacted underneath our feet, the 3120 heap might shrink, etc. We save checking for this by just 3121 aborting and doing the yield right at the end. 3122 3123 From the above it follows that the do_marking_step() method should 3124 be called in a loop (or, otherwise, regularly) until it completes. 3125 3126 If a marking step completes without its has_aborted() flag being 3127 true, it means it has completed the current marking phase (and 3128 also all other marking tasks have done so and have all synced up). 3129 3130 A method called regular_clock_call() is invoked "regularly" (in 3131 sub ms intervals) throughout marking. It is this clock method that 3132 checks all the abort conditions which were mentioned above and 3133 decides when the task should abort. A work-based scheme is used to 3134 trigger this clock method: when the number of object words the 3135 marking phase has scanned or the number of references the marking 3136 phase has visited reach a given limit. Additional invocations to 3137 the method clock have been planted in a few other strategic places 3138 too. The initial reason for the clock method was to avoid calling 3139 vtime too regularly, as it is quite expensive. So, once it was in 3140 place, it was natural to piggy-back all the other conditions on it 3141 too and not constantly check them throughout the code. 3142 3143 If do_termination is true then do_marking_step will enter its 3144 termination protocol. 3145 3146 The value of is_serial must be true when do_marking_step is being 3147 called serially (i.e. by the VMThread) and do_marking_step should 3148 skip any synchronization in the termination and overflow code. 3149 Examples include the serial remark code and the serial reference 3150 processing closures. 3151 3152 The value of is_serial must be false when do_marking_step is 3153 being called by any of the worker threads in a work gang. 3154 Examples include the concurrent marking code (CMMarkingTask), 3155 the MT remark code, and the MT reference processing closures. 3156 3157 *****************************************************************************/ 3158 3159 void CMTask::do_marking_step(double time_target_ms, 3160 bool do_termination, 3161 bool is_serial) { 3162 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3163 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3164 3165 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3166 assert(_task_queues != NULL, "invariant"); 3167 assert(_task_queue != NULL, "invariant"); 3168 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 3169 3170 assert(!_claimed, 3171 "only one thread should claim this task at any one time"); 3172 3173 // OK, this doesn't safeguard again all possible scenarios, as it is 3174 // possible for two threads to set the _claimed flag at the same 3175 // time. But it is only for debugging purposes anyway and it will 3176 // catch most problems. 3177 _claimed = true; 3178 3179 _start_time_ms = os::elapsedVTime() * 1000.0; 3180 3181 // If do_stealing is true then do_marking_step will attempt to 3182 // steal work from the other CMTasks. It only makes sense to 3183 // enable stealing when the termination protocol is enabled 3184 // and do_marking_step() is not being called serially. 3185 bool do_stealing = do_termination && !is_serial; 3186 3187 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 3188 _time_target_ms = time_target_ms - diff_prediction_ms; 3189 3190 // set up the variables that are used in the work-based scheme to 3191 // call the regular clock method 3192 _words_scanned = 0; 3193 _refs_reached = 0; 3194 recalculate_limits(); 3195 3196 // clear all flags 3197 clear_has_aborted(); 3198 _has_timed_out = false; 3199 _draining_satb_buffers = false; 3200 3201 ++_calls; 3202 3203 // Set up the bitmap and oop closures. Anything that uses them is 3204 // eventually called from this method, so it is OK to allocate these 3205 // statically. 3206 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3207 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3208 set_cm_oop_closure(&cm_oop_closure); 3209 3210 if (_cm->has_overflown()) { 3211 // This can happen if the mark stack overflows during a GC pause 3212 // and this task, after a yield point, restarts. We have to abort 3213 // as we need to get into the overflow protocol which happens 3214 // right at the end of this task. 3215 set_has_aborted(); 3216 } 3217 3218 // First drain any available SATB buffers. After this, we will not 3219 // look at SATB buffers before the next invocation of this method. 3220 // If enough completed SATB buffers are queued up, the regular clock 3221 // will abort this task so that it restarts. 3222 drain_satb_buffers(); 3223 // ...then partially drain the local queue and the global stack 3224 drain_local_queue(true); 3225 drain_global_stack(true); 3226 3227 do { 3228 if (!has_aborted() && _curr_region != NULL) { 3229 // This means that we're already holding on to a region. 3230 assert(_finger != NULL, "if region is not NULL, then the finger " 3231 "should not be NULL either"); 3232 3233 // We might have restarted this task after an evacuation pause 3234 // which might have evacuated the region we're holding on to 3235 // underneath our feet. Let's read its limit again to make sure 3236 // that we do not iterate over a region of the heap that 3237 // contains garbage (update_region_limit() will also move 3238 // _finger to the start of the region if it is found empty). 3239 update_region_limit(); 3240 // We will start from _finger not from the start of the region, 3241 // as we might be restarting this task after aborting half-way 3242 // through scanning this region. In this case, _finger points to 3243 // the address where we last found a marked object. If this is a 3244 // fresh region, _finger points to start(). 3245 MemRegion mr = MemRegion(_finger, _region_limit); 3246 3247 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 3248 "humongous regions should go around loop once only"); 3249 3250 // Some special cases: 3251 // If the memory region is empty, we can just give up the region. 3252 // If the current region is humongous then we only need to check 3253 // the bitmap for the bit associated with the start of the object, 3254 // scan the object if it's live, and give up the region. 3255 // Otherwise, let's iterate over the bitmap of the part of the region 3256 // that is left. 3257 // If the iteration is successful, give up the region. 3258 if (mr.is_empty()) { 3259 giveup_current_region(); 3260 regular_clock_call(); 3261 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 3262 if (_nextMarkBitMap->isMarked(mr.start())) { 3263 // The object is marked - apply the closure 3264 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 3265 bitmap_closure.do_bit(offset); 3266 } 3267 // Even if this task aborted while scanning the humongous object 3268 // we can (and should) give up the current region. 3269 giveup_current_region(); 3270 regular_clock_call(); 3271 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3272 giveup_current_region(); 3273 regular_clock_call(); 3274 } else { 3275 assert(has_aborted(), "currently the only way to do so"); 3276 // The only way to abort the bitmap iteration is to return 3277 // false from the do_bit() method. However, inside the 3278 // do_bit() method we move the _finger to point to the 3279 // object currently being looked at. So, if we bail out, we 3280 // have definitely set _finger to something non-null. 3281 assert(_finger != NULL, "invariant"); 3282 3283 // Region iteration was actually aborted. So now _finger 3284 // points to the address of the object we last scanned. If we 3285 // leave it there, when we restart this task, we will rescan 3286 // the object. It is easy to avoid this. We move the finger by 3287 // enough to point to the next possible object header (the 3288 // bitmap knows by how much we need to move it as it knows its 3289 // granularity). 3290 assert(_finger < _region_limit, "invariant"); 3291 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 3292 // Check if bitmap iteration was aborted while scanning the last object 3293 if (new_finger >= _region_limit) { 3294 giveup_current_region(); 3295 } else { 3296 move_finger_to(new_finger); 3297 } 3298 } 3299 } 3300 // At this point we have either completed iterating over the 3301 // region we were holding on to, or we have aborted. 3302 3303 // We then partially drain the local queue and the global stack. 3304 // (Do we really need this?) 3305 drain_local_queue(true); 3306 drain_global_stack(true); 3307 3308 // Read the note on the claim_region() method on why it might 3309 // return NULL with potentially more regions available for 3310 // claiming and why we have to check out_of_regions() to determine 3311 // whether we're done or not. 3312 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 3313 // We are going to try to claim a new region. We should have 3314 // given up on the previous one. 3315 // Separated the asserts so that we know which one fires. 3316 assert(_curr_region == NULL, "invariant"); 3317 assert(_finger == NULL, "invariant"); 3318 assert(_region_limit == NULL, "invariant"); 3319 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 3320 if (claimed_region != NULL) { 3321 // Yes, we managed to claim one 3322 setup_for_region(claimed_region); 3323 assert(_curr_region == claimed_region, "invariant"); 3324 } 3325 // It is important to call the regular clock here. It might take 3326 // a while to claim a region if, for example, we hit a large 3327 // block of empty regions. So we need to call the regular clock 3328 // method once round the loop to make sure it's called 3329 // frequently enough. 3330 regular_clock_call(); 3331 } 3332 3333 if (!has_aborted() && _curr_region == NULL) { 3334 assert(_cm->out_of_regions(), 3335 "at this point we should be out of regions"); 3336 } 3337 } while ( _curr_region != NULL && !has_aborted()); 3338 3339 if (!has_aborted()) { 3340 // We cannot check whether the global stack is empty, since other 3341 // tasks might be pushing objects to it concurrently. 3342 assert(_cm->out_of_regions(), 3343 "at this point we should be out of regions"); 3344 // Try to reduce the number of available SATB buffers so that 3345 // remark has less work to do. 3346 drain_satb_buffers(); 3347 } 3348 3349 // Since we've done everything else, we can now totally drain the 3350 // local queue and global stack. 3351 drain_local_queue(false); 3352 drain_global_stack(false); 3353 3354 // Attempt at work stealing from other task's queues. 3355 if (do_stealing && !has_aborted()) { 3356 // We have not aborted. This means that we have finished all that 3357 // we could. Let's try to do some stealing... 3358 3359 // We cannot check whether the global stack is empty, since other 3360 // tasks might be pushing objects to it concurrently. 3361 assert(_cm->out_of_regions() && _task_queue->size() == 0, 3362 "only way to reach here"); 3363 while (!has_aborted()) { 3364 oop obj; 3365 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 3366 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 3367 "any stolen object should be marked"); 3368 scan_object(obj); 3369 3370 // And since we're towards the end, let's totally drain the 3371 // local queue and global stack. 3372 drain_local_queue(false); 3373 drain_global_stack(false); 3374 } else { 3375 break; 3376 } 3377 } 3378 } 3379 3380 // We still haven't aborted. Now, let's try to get into the 3381 // termination protocol. 3382 if (do_termination && !has_aborted()) { 3383 // We cannot check whether the global stack is empty, since other 3384 // tasks might be concurrently pushing objects on it. 3385 // Separated the asserts so that we know which one fires. 3386 assert(_cm->out_of_regions(), "only way to reach here"); 3387 assert(_task_queue->size() == 0, "only way to reach here"); 3388 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 3389 3390 // The CMTask class also extends the TerminatorTerminator class, 3391 // hence its should_exit_termination() method will also decide 3392 // whether to exit the termination protocol or not. 3393 bool finished = (is_serial || 3394 _cm->terminator()->offer_termination(this)); 3395 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 3396 _termination_time_ms += 3397 termination_end_time_ms - _termination_start_time_ms; 3398 3399 if (finished) { 3400 // We're all done. 3401 3402 if (_worker_id == 0) { 3403 // let's allow task 0 to do this 3404 if (concurrent()) { 3405 assert(_cm->concurrent_marking_in_progress(), "invariant"); 3406 // we need to set this to false before the next 3407 // safepoint. This way we ensure that the marking phase 3408 // doesn't observe any more heap expansions. 3409 _cm->clear_concurrent_marking_in_progress(); 3410 } 3411 } 3412 3413 // We can now guarantee that the global stack is empty, since 3414 // all other tasks have finished. We separated the guarantees so 3415 // that, if a condition is false, we can immediately find out 3416 // which one. 3417 guarantee(_cm->out_of_regions(), "only way to reach here"); 3418 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 3419 guarantee(_task_queue->size() == 0, "only way to reach here"); 3420 guarantee(!_cm->has_overflown(), "only way to reach here"); 3421 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 3422 } else { 3423 // Apparently there's more work to do. Let's abort this task. It 3424 // will restart it and we can hopefully find more things to do. 3425 set_has_aborted(); 3426 } 3427 } 3428 3429 // Mainly for debugging purposes to make sure that a pointer to the 3430 // closure which was statically allocated in this frame doesn't 3431 // escape it by accident. 3432 set_cm_oop_closure(NULL); 3433 double end_time_ms = os::elapsedVTime() * 1000.0; 3434 double elapsed_time_ms = end_time_ms - _start_time_ms; 3435 // Update the step history. 3436 _step_times_ms.add(elapsed_time_ms); 3437 3438 if (has_aborted()) { 3439 // The task was aborted for some reason. 3440 if (_has_timed_out) { 3441 double diff_ms = elapsed_time_ms - _time_target_ms; 3442 // Keep statistics of how well we did with respect to hitting 3443 // our target only if we actually timed out (if we aborted for 3444 // other reasons, then the results might get skewed). 3445 _marking_step_diffs_ms.add(diff_ms); 3446 } 3447 3448 if (_cm->has_overflown()) { 3449 // This is the interesting one. We aborted because a global 3450 // overflow was raised. This means we have to restart the 3451 // marking phase and start iterating over regions. However, in 3452 // order to do this we have to make sure that all tasks stop 3453 // what they are doing and re-initialize in a safe manner. We 3454 // will achieve this with the use of two barrier sync points. 3455 3456 if (!is_serial) { 3457 // We only need to enter the sync barrier if being called 3458 // from a parallel context 3459 _cm->enter_first_sync_barrier(_worker_id); 3460 3461 // When we exit this sync barrier we know that all tasks have 3462 // stopped doing marking work. So, it's now safe to 3463 // re-initialize our data structures. At the end of this method, 3464 // task 0 will clear the global data structures. 3465 } 3466 3467 // We clear the local state of this task... 3468 clear_region_fields(); 3469 3470 if (!is_serial) { 3471 // ...and enter the second barrier. 3472 _cm->enter_second_sync_barrier(_worker_id); 3473 } 3474 // At this point, if we're during the concurrent phase of 3475 // marking, everything has been re-initialized and we're 3476 // ready to restart. 3477 } 3478 } 3479 3480 _claimed = false; 3481 } 3482 3483 CMTask::CMTask(uint worker_id, 3484 ConcurrentMark* cm, 3485 size_t* marked_bytes, 3486 BitMap* card_bm, 3487 CMTaskQueue* task_queue, 3488 CMTaskQueueSet* task_queues) 3489 : _g1h(G1CollectedHeap::heap()), 3490 _worker_id(worker_id), _cm(cm), 3491 _claimed(false), 3492 _nextMarkBitMap(NULL), _hash_seed(17), 3493 _task_queue(task_queue), 3494 _task_queues(task_queues), 3495 _cm_oop_closure(NULL), 3496 _marked_bytes_array(marked_bytes), 3497 _card_bm(card_bm) { 3498 guarantee(task_queue != NULL, "invariant"); 3499 guarantee(task_queues != NULL, "invariant"); 3500 3501 _marking_step_diffs_ms.add(0.5); 3502 } 3503 3504 // These are formatting macros that are used below to ensure 3505 // consistent formatting. The *_H_* versions are used to format the 3506 // header for a particular value and they should be kept consistent 3507 // with the corresponding macro. Also note that most of the macros add 3508 // the necessary white space (as a prefix) which makes them a bit 3509 // easier to compose. 3510 3511 // All the output lines are prefixed with this string to be able to 3512 // identify them easily in a large log file. 3513 #define G1PPRL_LINE_PREFIX "###" 3514 3515 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 3516 #ifdef _LP64 3517 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 3518 #else // _LP64 3519 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 3520 #endif // _LP64 3521 3522 // For per-region info 3523 #define G1PPRL_TYPE_FORMAT " %-4s" 3524 #define G1PPRL_TYPE_H_FORMAT " %4s" 3525 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 3526 #define G1PPRL_BYTE_H_FORMAT " %9s" 3527 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 3528 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 3529 3530 // For summary info 3531 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 3532 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 3533 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 3534 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 3535 3536 G1PrintRegionLivenessInfoClosure:: 3537 G1PrintRegionLivenessInfoClosure(const char* phase_name) 3538 : _total_used_bytes(0), _total_capacity_bytes(0), 3539 _total_prev_live_bytes(0), _total_next_live_bytes(0), 3540 _hum_used_bytes(0), _hum_capacity_bytes(0), 3541 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 3542 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 3543 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 3544 MemRegion g1_reserved = g1h->g1_reserved(); 3545 double now = os::elapsedTime(); 3546 3547 // Print the header of the output. 3548 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 3549 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP" 3550 G1PPRL_SUM_ADDR_FORMAT("reserved") 3551 G1PPRL_SUM_BYTE_FORMAT("region-size"), 3552 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 3553 HeapRegion::GrainBytes); 3554 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3555 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3556 G1PPRL_TYPE_H_FORMAT 3557 G1PPRL_ADDR_BASE_H_FORMAT 3558 G1PPRL_BYTE_H_FORMAT 3559 G1PPRL_BYTE_H_FORMAT 3560 G1PPRL_BYTE_H_FORMAT 3561 G1PPRL_DOUBLE_H_FORMAT 3562 G1PPRL_BYTE_H_FORMAT 3563 G1PPRL_BYTE_H_FORMAT, 3564 "type", "address-range", 3565 "used", "prev-live", "next-live", "gc-eff", 3566 "remset", "code-roots"); 3567 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3568 G1PPRL_TYPE_H_FORMAT 3569 G1PPRL_ADDR_BASE_H_FORMAT 3570 G1PPRL_BYTE_H_FORMAT 3571 G1PPRL_BYTE_H_FORMAT 3572 G1PPRL_BYTE_H_FORMAT 3573 G1PPRL_DOUBLE_H_FORMAT 3574 G1PPRL_BYTE_H_FORMAT 3575 G1PPRL_BYTE_H_FORMAT, 3576 "", "", 3577 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 3578 "(bytes)", "(bytes)"); 3579 } 3580 3581 // It takes as a parameter a reference to one of the _hum_* fields, it 3582 // deduces the corresponding value for a region in a humongous region 3583 // series (either the region size, or what's left if the _hum_* field 3584 // is < the region size), and updates the _hum_* field accordingly. 3585 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 3586 size_t bytes = 0; 3587 // The > 0 check is to deal with the prev and next live bytes which 3588 // could be 0. 3589 if (*hum_bytes > 0) { 3590 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 3591 *hum_bytes -= bytes; 3592 } 3593 return bytes; 3594 } 3595 3596 // It deduces the values for a region in a humongous region series 3597 // from the _hum_* fields and updates those accordingly. It assumes 3598 // that that _hum_* fields have already been set up from the "starts 3599 // humongous" region and we visit the regions in address order. 3600 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 3601 size_t* capacity_bytes, 3602 size_t* prev_live_bytes, 3603 size_t* next_live_bytes) { 3604 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 3605 *used_bytes = get_hum_bytes(&_hum_used_bytes); 3606 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 3607 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 3608 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 3609 } 3610 3611 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 3612 const char* type = r->get_type_str(); 3613 HeapWord* bottom = r->bottom(); 3614 HeapWord* end = r->end(); 3615 size_t capacity_bytes = r->capacity(); 3616 size_t used_bytes = r->used(); 3617 size_t prev_live_bytes = r->live_bytes(); 3618 size_t next_live_bytes = r->next_live_bytes(); 3619 double gc_eff = r->gc_efficiency(); 3620 size_t remset_bytes = r->rem_set()->mem_size(); 3621 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 3622 3623 if (r->is_starts_humongous()) { 3624 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 3625 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 3626 "they should have been zeroed after the last time we used them"); 3627 // Set up the _hum_* fields. 3628 _hum_capacity_bytes = capacity_bytes; 3629 _hum_used_bytes = used_bytes; 3630 _hum_prev_live_bytes = prev_live_bytes; 3631 _hum_next_live_bytes = next_live_bytes; 3632 get_hum_bytes(&used_bytes, &capacity_bytes, 3633 &prev_live_bytes, &next_live_bytes); 3634 end = bottom + HeapRegion::GrainWords; 3635 } else if (r->is_continues_humongous()) { 3636 get_hum_bytes(&used_bytes, &capacity_bytes, 3637 &prev_live_bytes, &next_live_bytes); 3638 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 3639 } 3640 3641 _total_used_bytes += used_bytes; 3642 _total_capacity_bytes += capacity_bytes; 3643 _total_prev_live_bytes += prev_live_bytes; 3644 _total_next_live_bytes += next_live_bytes; 3645 _total_remset_bytes += remset_bytes; 3646 _total_strong_code_roots_bytes += strong_code_roots_bytes; 3647 3648 // Print a line for this particular region. 3649 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3650 G1PPRL_TYPE_FORMAT 3651 G1PPRL_ADDR_BASE_FORMAT 3652 G1PPRL_BYTE_FORMAT 3653 G1PPRL_BYTE_FORMAT 3654 G1PPRL_BYTE_FORMAT 3655 G1PPRL_DOUBLE_FORMAT 3656 G1PPRL_BYTE_FORMAT 3657 G1PPRL_BYTE_FORMAT, 3658 type, p2i(bottom), p2i(end), 3659 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 3660 remset_bytes, strong_code_roots_bytes); 3661 3662 return false; 3663 } 3664 3665 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 3666 // add static memory usages to remembered set sizes 3667 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3668 // Print the footer of the output. 3669 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3670 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3671 " SUMMARY" 3672 G1PPRL_SUM_MB_FORMAT("capacity") 3673 G1PPRL_SUM_MB_PERC_FORMAT("used") 3674 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3675 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3676 G1PPRL_SUM_MB_FORMAT("remset") 3677 G1PPRL_SUM_MB_FORMAT("code-roots"), 3678 bytes_to_mb(_total_capacity_bytes), 3679 bytes_to_mb(_total_used_bytes), 3680 perc(_total_used_bytes, _total_capacity_bytes), 3681 bytes_to_mb(_total_prev_live_bytes), 3682 perc(_total_prev_live_bytes, _total_capacity_bytes), 3683 bytes_to_mb(_total_next_live_bytes), 3684 perc(_total_next_live_bytes, _total_capacity_bytes), 3685 bytes_to_mb(_total_remset_bytes), 3686 bytes_to_mb(_total_strong_code_roots_bytes)); 3687 }