1 /* 2 * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMark.inline.hpp" 30 #include "gc/g1/concurrentMarkThread.inline.hpp" 31 #include "gc/g1/g1CollectedHeap.inline.hpp" 32 #include "gc/g1/g1CollectorPolicy.hpp" 33 #include "gc/g1/g1CollectorState.hpp" 34 #include "gc/g1/g1HeapVerifier.hpp" 35 #include "gc/g1/g1OopClosures.inline.hpp" 36 #include "gc/g1/g1StringDedup.hpp" 37 #include "gc/g1/heapRegion.inline.hpp" 38 #include "gc/g1/heapRegionRemSet.hpp" 39 #include "gc/g1/heapRegionSet.inline.hpp" 40 #include "gc/g1/suspendibleThreadSet.hpp" 41 #include "gc/shared/gcId.hpp" 42 #include "gc/shared/gcTimer.hpp" 43 #include "gc/shared/gcTrace.hpp" 44 #include "gc/shared/gcTraceTime.inline.hpp" 45 #include "gc/shared/genOopClosures.inline.hpp" 46 #include "gc/shared/referencePolicy.hpp" 47 #include "gc/shared/strongRootsScope.hpp" 48 #include "gc/shared/taskqueue.inline.hpp" 49 #include "gc/shared/vmGCOperations.hpp" 50 #include "logging/log.hpp" 51 #include "memory/allocation.hpp" 52 #include "memory/resourceArea.hpp" 53 #include "oops/oop.inline.hpp" 54 #include "runtime/atomic.inline.hpp" 55 #include "runtime/handles.inline.hpp" 56 #include "runtime/java.hpp" 57 #include "runtime/prefetch.inline.hpp" 58 #include "services/memTracker.hpp" 59 60 // Concurrent marking bit map wrapper 61 62 CMBitMapRO::CMBitMapRO(int shifter) : 63 _bm(), 64 _shifter(shifter) { 65 _bmStartWord = 0; 66 _bmWordSize = 0; 67 } 68 69 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 70 const HeapWord* limit) const { 71 // First we must round addr *up* to a possible object boundary. 72 addr = (HeapWord*)align_size_up((intptr_t)addr, 73 HeapWordSize << _shifter); 74 size_t addrOffset = heapWordToOffset(addr); 75 assert(limit != NULL, "limit must not be NULL"); 76 size_t limitOffset = heapWordToOffset(limit); 77 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 78 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 79 assert(nextAddr >= addr, "get_next_one postcondition"); 80 assert(nextAddr == limit || isMarked(nextAddr), 81 "get_next_one postcondition"); 82 return nextAddr; 83 } 84 85 #ifndef PRODUCT 86 bool CMBitMapRO::covers(MemRegion heap_rs) const { 87 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 88 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 89 "size inconsistency"); 90 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 91 _bmWordSize == heap_rs.word_size(); 92 } 93 #endif 94 95 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 96 _bm.print_on_error(st, prefix); 97 } 98 99 size_t CMBitMap::compute_size(size_t heap_size) { 100 return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); 101 } 102 103 size_t CMBitMap::mark_distance() { 104 return MinObjAlignmentInBytes * BitsPerByte; 105 } 106 107 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 108 _bmStartWord = heap.start(); 109 _bmWordSize = heap.word_size(); 110 111 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start()); 112 _bm.set_size(_bmWordSize >> _shifter); 113 114 storage->set_mapping_changed_listener(&_listener); 115 } 116 117 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 118 if (zero_filled) { 119 return; 120 } 121 // We need to clear the bitmap on commit, removing any existing information. 122 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 123 _bm->clearRange(mr); 124 } 125 126 // Closure used for clearing the given mark bitmap. 127 class ClearBitmapHRClosure : public HeapRegionClosure { 128 private: 129 ConcurrentMark* _cm; 130 CMBitMap* _bitmap; 131 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration. 132 public: 133 ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) { 134 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield."); 135 } 136 137 virtual bool doHeapRegion(HeapRegion* r) { 138 size_t const chunk_size_in_words = M / HeapWordSize; 139 140 HeapWord* cur = r->bottom(); 141 HeapWord* const end = r->end(); 142 143 while (cur < end) { 144 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 145 _bitmap->clearRange(mr); 146 147 cur += chunk_size_in_words; 148 149 // Abort iteration if after yielding the marking has been aborted. 150 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) { 151 return true; 152 } 153 // Repeat the asserts from before the start of the closure. We will do them 154 // as asserts here to minimize their overhead on the product. However, we 155 // will have them as guarantees at the beginning / end of the bitmap 156 // clearing to get some checking in the product. 157 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant"); 158 assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 159 } 160 161 return false; 162 } 163 }; 164 165 class ParClearNextMarkBitmapTask : public AbstractGangTask { 166 ClearBitmapHRClosure* _cl; 167 HeapRegionClaimer _hrclaimer; 168 bool _suspendible; // If the task is suspendible, workers must join the STS. 169 170 public: 171 ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) : 172 _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {} 173 174 void work(uint worker_id) { 175 SuspendibleThreadSetJoiner sts_join(_suspendible); 176 G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true); 177 } 178 }; 179 180 void CMBitMap::clearAll() { 181 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 182 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */); 183 uint n_workers = g1h->workers()->active_workers(); 184 ParClearNextMarkBitmapTask task(&cl, n_workers, false); 185 g1h->workers()->run_task(&task); 186 guarantee(cl.complete(), "Must have completed iteration."); 187 return; 188 } 189 190 void CMBitMap::clearRange(MemRegion mr) { 191 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 192 assert(!mr.is_empty(), "unexpected empty region"); 193 // convert address range into offset range 194 _bm.at_put_range(heapWordToOffset(mr.start()), 195 heapWordToOffset(mr.end()), false); 196 } 197 198 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 199 _base(NULL), _cm(cm) 200 {} 201 202 bool CMMarkStack::allocate(size_t capacity) { 203 // allocate a stack of the requisite depth 204 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 205 if (!rs.is_reserved()) { 206 warning("ConcurrentMark MarkStack allocation failure"); 207 return false; 208 } 209 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 210 if (!_virtual_space.initialize(rs, rs.size())) { 211 warning("ConcurrentMark MarkStack backing store failure"); 212 // Release the virtual memory reserved for the marking stack 213 rs.release(); 214 return false; 215 } 216 assert(_virtual_space.committed_size() == rs.size(), 217 "Didn't reserve backing store for all of ConcurrentMark stack?"); 218 _base = (oop*) _virtual_space.low(); 219 setEmpty(); 220 _capacity = (jint) capacity; 221 _saved_index = -1; 222 _should_expand = false; 223 return true; 224 } 225 226 void CMMarkStack::expand() { 227 // Called, during remark, if we've overflown the marking stack during marking. 228 assert(isEmpty(), "stack should been emptied while handling overflow"); 229 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 230 // Clear expansion flag 231 _should_expand = false; 232 if (_capacity == (jint) MarkStackSizeMax) { 233 log_trace(gc)("(benign) Can't expand marking stack capacity, at max size limit"); 234 return; 235 } 236 // Double capacity if possible 237 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 238 // Do not give up existing stack until we have managed to 239 // get the double capacity that we desired. 240 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 241 sizeof(oop))); 242 if (rs.is_reserved()) { 243 // Release the backing store associated with old stack 244 _virtual_space.release(); 245 // Reinitialize virtual space for new stack 246 if (!_virtual_space.initialize(rs, rs.size())) { 247 fatal("Not enough swap for expanded marking stack capacity"); 248 } 249 _base = (oop*)(_virtual_space.low()); 250 _index = 0; 251 _capacity = new_capacity; 252 } else { 253 // Failed to double capacity, continue; 254 log_trace(gc)("(benign) Failed to expand marking stack capacity from " SIZE_FORMAT "K to " SIZE_FORMAT "K", 255 _capacity / K, new_capacity / K); 256 } 257 } 258 259 void CMMarkStack::set_should_expand() { 260 // If we're resetting the marking state because of an 261 // marking stack overflow, record that we should, if 262 // possible, expand the stack. 263 _should_expand = _cm->has_overflown(); 264 } 265 266 CMMarkStack::~CMMarkStack() { 267 if (_base != NULL) { 268 _base = NULL; 269 _virtual_space.release(); 270 } 271 } 272 273 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 274 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 275 jint start = _index; 276 jint next_index = start + n; 277 if (next_index > _capacity) { 278 _overflow = true; 279 return; 280 } 281 // Otherwise. 282 _index = next_index; 283 for (int i = 0; i < n; i++) { 284 int ind = start + i; 285 assert(ind < _capacity, "By overflow test above."); 286 _base[ind] = ptr_arr[i]; 287 } 288 } 289 290 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 291 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 292 jint index = _index; 293 if (index == 0) { 294 *n = 0; 295 return false; 296 } else { 297 int k = MIN2(max, index); 298 jint new_ind = index - k; 299 for (int j = 0; j < k; j++) { 300 ptr_arr[j] = _base[new_ind + j]; 301 } 302 _index = new_ind; 303 *n = k; 304 return true; 305 } 306 } 307 308 void CMMarkStack::note_start_of_gc() { 309 assert(_saved_index == -1, 310 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 311 _saved_index = _index; 312 } 313 314 void CMMarkStack::note_end_of_gc() { 315 // This is intentionally a guarantee, instead of an assert. If we 316 // accidentally add something to the mark stack during GC, it 317 // will be a correctness issue so it's better if we crash. we'll 318 // only check this once per GC anyway, so it won't be a performance 319 // issue in any way. 320 guarantee(_saved_index == _index, 321 "saved index: %d index: %d", _saved_index, _index); 322 _saved_index = -1; 323 } 324 325 CMRootRegions::CMRootRegions() : 326 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 327 _should_abort(false), _next_survivor(NULL) { } 328 329 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 330 _young_list = g1h->young_list(); 331 _cm = cm; 332 } 333 334 void CMRootRegions::prepare_for_scan() { 335 assert(!scan_in_progress(), "pre-condition"); 336 337 // Currently, only survivors can be root regions. 338 assert(_next_survivor == NULL, "pre-condition"); 339 _next_survivor = _young_list->first_survivor_region(); 340 _scan_in_progress = (_next_survivor != NULL); 341 _should_abort = false; 342 } 343 344 HeapRegion* CMRootRegions::claim_next() { 345 if (_should_abort) { 346 // If someone has set the should_abort flag, we return NULL to 347 // force the caller to bail out of their loop. 348 return NULL; 349 } 350 351 // Currently, only survivors can be root regions. 352 HeapRegion* res = _next_survivor; 353 if (res != NULL) { 354 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 355 // Read it again in case it changed while we were waiting for the lock. 356 res = _next_survivor; 357 if (res != NULL) { 358 if (res == _young_list->last_survivor_region()) { 359 // We just claimed the last survivor so store NULL to indicate 360 // that we're done. 361 _next_survivor = NULL; 362 } else { 363 _next_survivor = res->get_next_young_region(); 364 } 365 } else { 366 // Someone else claimed the last survivor while we were trying 367 // to take the lock so nothing else to do. 368 } 369 } 370 assert(res == NULL || res->is_survivor(), "post-condition"); 371 372 return res; 373 } 374 375 void CMRootRegions::scan_finished() { 376 assert(scan_in_progress(), "pre-condition"); 377 378 // Currently, only survivors can be root regions. 379 if (!_should_abort) { 380 assert(_next_survivor == NULL, "we should have claimed all survivors"); 381 } 382 _next_survivor = NULL; 383 384 { 385 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 386 _scan_in_progress = false; 387 RootRegionScan_lock->notify_all(); 388 } 389 } 390 391 bool CMRootRegions::wait_until_scan_finished() { 392 if (!scan_in_progress()) return false; 393 394 { 395 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 396 while (scan_in_progress()) { 397 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 398 } 399 } 400 return true; 401 } 402 403 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 404 return MAX2((n_par_threads + 2) / 4, 1U); 405 } 406 407 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 408 _g1h(g1h), 409 _markBitMap1(), 410 _markBitMap2(), 411 _parallel_marking_threads(0), 412 _max_parallel_marking_threads(0), 413 _sleep_factor(0.0), 414 _marking_task_overhead(1.0), 415 _cleanup_list("Cleanup List"), 416 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 417 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> 418 CardTableModRefBS::card_shift, 419 false /* in_resource_area*/), 420 421 _prevMarkBitMap(&_markBitMap1), 422 _nextMarkBitMap(&_markBitMap2), 423 424 _markStack(this), 425 // _finger set in set_non_marking_state 426 427 _max_worker_id(ParallelGCThreads), 428 // _active_tasks set in set_non_marking_state 429 // _tasks set inside the constructor 430 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 431 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 432 433 _has_overflown(false), 434 _concurrent(false), 435 _has_aborted(false), 436 _restart_for_overflow(false), 437 _concurrent_marking_in_progress(false), 438 _concurrent_phase_started(false), 439 440 // _verbose_level set below 441 442 _init_times(), 443 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 444 _cleanup_times(), 445 _total_counting_time(0.0), 446 _total_rs_scrub_time(0.0), 447 448 _parallel_workers(NULL), 449 450 _count_card_bitmaps(NULL), 451 _count_marked_bytes(NULL), 452 _completed_initialization(false) { 453 454 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 455 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 456 457 // Create & start a ConcurrentMark thread. 458 _cmThread = new ConcurrentMarkThread(this); 459 assert(cmThread() != NULL, "CM Thread should have been created"); 460 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 461 if (_cmThread->osthread() == NULL) { 462 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 463 } 464 465 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 466 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 467 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 468 469 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 470 satb_qs.set_buffer_size(G1SATBBufferSize); 471 472 _root_regions.init(_g1h, this); 473 474 if (ConcGCThreads > ParallelGCThreads) { 475 warning("Can't have more ConcGCThreads (%u) " 476 "than ParallelGCThreads (%u).", 477 ConcGCThreads, ParallelGCThreads); 478 return; 479 } 480 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 481 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 482 // if both are set 483 _sleep_factor = 0.0; 484 _marking_task_overhead = 1.0; 485 } else if (G1MarkingOverheadPercent > 0) { 486 // We will calculate the number of parallel marking threads based 487 // on a target overhead with respect to the soft real-time goal 488 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 489 double overall_cm_overhead = 490 (double) MaxGCPauseMillis * marking_overhead / 491 (double) GCPauseIntervalMillis; 492 double cpu_ratio = 1.0 / (double) os::processor_count(); 493 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 494 double marking_task_overhead = 495 overall_cm_overhead / marking_thread_num * 496 (double) os::processor_count(); 497 double sleep_factor = 498 (1.0 - marking_task_overhead) / marking_task_overhead; 499 500 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 501 _sleep_factor = sleep_factor; 502 _marking_task_overhead = marking_task_overhead; 503 } else { 504 // Calculate the number of parallel marking threads by scaling 505 // the number of parallel GC threads. 506 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 507 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 508 _sleep_factor = 0.0; 509 _marking_task_overhead = 1.0; 510 } 511 512 assert(ConcGCThreads > 0, "Should have been set"); 513 _parallel_marking_threads = ConcGCThreads; 514 _max_parallel_marking_threads = _parallel_marking_threads; 515 516 _parallel_workers = new WorkGang("G1 Marker", 517 _max_parallel_marking_threads, false, true); 518 if (_parallel_workers == NULL) { 519 vm_exit_during_initialization("Failed necessary allocation."); 520 } else { 521 _parallel_workers->initialize_workers(); 522 } 523 524 if (FLAG_IS_DEFAULT(MarkStackSize)) { 525 size_t mark_stack_size = 526 MIN2(MarkStackSizeMax, 527 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 528 // Verify that the calculated value for MarkStackSize is in range. 529 // It would be nice to use the private utility routine from Arguments. 530 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 531 warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 532 "must be between 1 and " SIZE_FORMAT, 533 mark_stack_size, MarkStackSizeMax); 534 return; 535 } 536 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 537 } else { 538 // Verify MarkStackSize is in range. 539 if (FLAG_IS_CMDLINE(MarkStackSize)) { 540 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 541 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 542 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 543 "must be between 1 and " SIZE_FORMAT, 544 MarkStackSize, MarkStackSizeMax); 545 return; 546 } 547 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 548 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 549 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 550 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 551 MarkStackSize, MarkStackSizeMax); 552 return; 553 } 554 } 555 } 556 } 557 558 if (!_markStack.allocate(MarkStackSize)) { 559 warning("Failed to allocate CM marking stack"); 560 return; 561 } 562 563 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 564 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 565 566 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 567 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 568 569 BitMap::idx_t card_bm_size = _card_bm.size(); 570 571 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 572 _active_tasks = _max_worker_id; 573 574 uint max_regions = _g1h->max_regions(); 575 for (uint i = 0; i < _max_worker_id; ++i) { 576 CMTaskQueue* task_queue = new CMTaskQueue(); 577 task_queue->initialize(); 578 _task_queues->register_queue(i, task_queue); 579 580 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 581 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 582 583 _tasks[i] = new CMTask(i, this, 584 _count_marked_bytes[i], 585 &_count_card_bitmaps[i], 586 task_queue, _task_queues); 587 588 _accum_task_vtime[i] = 0.0; 589 } 590 591 // Calculate the card number for the bottom of the heap. Used 592 // in biasing indexes into the accounting card bitmaps. 593 _heap_bottom_card_num = 594 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 595 CardTableModRefBS::card_shift); 596 597 // Clear all the liveness counting data 598 clear_all_count_data(); 599 600 // so that the call below can read a sensible value 601 _heap_start = g1h->reserved_region().start(); 602 set_non_marking_state(); 603 _completed_initialization = true; 604 } 605 606 void ConcurrentMark::reset() { 607 // Starting values for these two. This should be called in a STW 608 // phase. 609 MemRegion reserved = _g1h->g1_reserved(); 610 _heap_start = reserved.start(); 611 _heap_end = reserved.end(); 612 613 // Separated the asserts so that we know which one fires. 614 assert(_heap_start != NULL, "heap bounds should look ok"); 615 assert(_heap_end != NULL, "heap bounds should look ok"); 616 assert(_heap_start < _heap_end, "heap bounds should look ok"); 617 618 // Reset all the marking data structures and any necessary flags 619 reset_marking_state(); 620 621 // We do reset all of them, since different phases will use 622 // different number of active threads. So, it's easiest to have all 623 // of them ready. 624 for (uint i = 0; i < _max_worker_id; ++i) { 625 _tasks[i]->reset(_nextMarkBitMap); 626 } 627 628 // we need this to make sure that the flag is on during the evac 629 // pause with initial mark piggy-backed 630 set_concurrent_marking_in_progress(); 631 } 632 633 634 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 635 _markStack.set_should_expand(); 636 _markStack.setEmpty(); // Also clears the _markStack overflow flag 637 if (clear_overflow) { 638 clear_has_overflown(); 639 } else { 640 assert(has_overflown(), "pre-condition"); 641 } 642 _finger = _heap_start; 643 644 for (uint i = 0; i < _max_worker_id; ++i) { 645 CMTaskQueue* queue = _task_queues->queue(i); 646 queue->set_empty(); 647 } 648 } 649 650 void ConcurrentMark::set_concurrency(uint active_tasks) { 651 assert(active_tasks <= _max_worker_id, "we should not have more"); 652 653 _active_tasks = active_tasks; 654 // Need to update the three data structures below according to the 655 // number of active threads for this phase. 656 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 657 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 658 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 659 } 660 661 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 662 set_concurrency(active_tasks); 663 664 _concurrent = concurrent; 665 // We propagate this to all tasks, not just the active ones. 666 for (uint i = 0; i < _max_worker_id; ++i) 667 _tasks[i]->set_concurrent(concurrent); 668 669 if (concurrent) { 670 set_concurrent_marking_in_progress(); 671 } else { 672 // We currently assume that the concurrent flag has been set to 673 // false before we start remark. At this point we should also be 674 // in a STW phase. 675 assert(!concurrent_marking_in_progress(), "invariant"); 676 assert(out_of_regions(), 677 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 678 p2i(_finger), p2i(_heap_end)); 679 } 680 } 681 682 void ConcurrentMark::set_non_marking_state() { 683 // We set the global marking state to some default values when we're 684 // not doing marking. 685 reset_marking_state(); 686 _active_tasks = 0; 687 clear_concurrent_marking_in_progress(); 688 } 689 690 ConcurrentMark::~ConcurrentMark() { 691 // The ConcurrentMark instance is never freed. 692 ShouldNotReachHere(); 693 } 694 695 void ConcurrentMark::clearNextBitmap() { 696 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 697 698 // Make sure that the concurrent mark thread looks to still be in 699 // the current cycle. 700 guarantee(cmThread()->during_cycle(), "invariant"); 701 702 // We are finishing up the current cycle by clearing the next 703 // marking bitmap and getting it ready for the next cycle. During 704 // this time no other cycle can start. So, let's make sure that this 705 // is the case. 706 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 707 708 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */); 709 ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true); 710 _parallel_workers->run_task(&task); 711 712 // Clear the liveness counting data. If the marking has been aborted, the abort() 713 // call already did that. 714 if (cl.complete()) { 715 clear_all_count_data(); 716 } 717 718 // Repeat the asserts from above. 719 guarantee(cmThread()->during_cycle(), "invariant"); 720 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 721 } 722 723 class CheckBitmapClearHRClosure : public HeapRegionClosure { 724 CMBitMap* _bitmap; 725 bool _error; 726 public: 727 CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) { 728 } 729 730 virtual bool doHeapRegion(HeapRegion* r) { 731 // This closure can be called concurrently to the mutator, so we must make sure 732 // that the result of the getNextMarkedWordAddress() call is compared to the 733 // value passed to it as limit to detect any found bits. 734 // end never changes in G1. 735 HeapWord* end = r->end(); 736 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end; 737 } 738 }; 739 740 bool ConcurrentMark::nextMarkBitmapIsClear() { 741 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 742 _g1h->heap_region_iterate(&cl); 743 return cl.complete(); 744 } 745 746 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 747 public: 748 bool doHeapRegion(HeapRegion* r) { 749 r->note_start_of_marking(); 750 return false; 751 } 752 }; 753 754 void ConcurrentMark::checkpointRootsInitialPre() { 755 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 756 G1CollectorPolicy* g1p = g1h->g1_policy(); 757 758 _has_aborted = false; 759 760 // Initialize marking structures. This has to be done in a STW phase. 761 reset(); 762 763 // For each region note start of marking. 764 NoteStartOfMarkHRClosure startcl; 765 g1h->heap_region_iterate(&startcl); 766 } 767 768 769 void ConcurrentMark::checkpointRootsInitialPost() { 770 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 771 772 // Start Concurrent Marking weak-reference discovery. 773 ReferenceProcessor* rp = g1h->ref_processor_cm(); 774 // enable ("weak") refs discovery 775 rp->enable_discovery(); 776 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 777 778 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 779 // This is the start of the marking cycle, we're expected all 780 // threads to have SATB queues with active set to false. 781 satb_mq_set.set_active_all_threads(true, /* new active value */ 782 false /* expected_active */); 783 784 _root_regions.prepare_for_scan(); 785 786 // update_g1_committed() will be called at the end of an evac pause 787 // when marking is on. So, it's also called at the end of the 788 // initial-mark pause to update the heap end, if the heap expands 789 // during it. No need to call it here. 790 } 791 792 /* 793 * Notice that in the next two methods, we actually leave the STS 794 * during the barrier sync and join it immediately afterwards. If we 795 * do not do this, the following deadlock can occur: one thread could 796 * be in the barrier sync code, waiting for the other thread to also 797 * sync up, whereas another one could be trying to yield, while also 798 * waiting for the other threads to sync up too. 799 * 800 * Note, however, that this code is also used during remark and in 801 * this case we should not attempt to leave / enter the STS, otherwise 802 * we'll either hit an assert (debug / fastdebug) or deadlock 803 * (product). So we should only leave / enter the STS if we are 804 * operating concurrently. 805 * 806 * Because the thread that does the sync barrier has left the STS, it 807 * is possible to be suspended for a Full GC or an evacuation pause 808 * could occur. This is actually safe, since the entering the sync 809 * barrier is one of the last things do_marking_step() does, and it 810 * doesn't manipulate any data structures afterwards. 811 */ 812 813 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 814 bool barrier_aborted; 815 { 816 SuspendibleThreadSetLeaver sts_leave(concurrent()); 817 barrier_aborted = !_first_overflow_barrier_sync.enter(); 818 } 819 820 // at this point everyone should have synced up and not be doing any 821 // more work 822 823 if (barrier_aborted) { 824 // If the barrier aborted we ignore the overflow condition and 825 // just abort the whole marking phase as quickly as possible. 826 return; 827 } 828 829 // If we're executing the concurrent phase of marking, reset the marking 830 // state; otherwise the marking state is reset after reference processing, 831 // during the remark pause. 832 // If we reset here as a result of an overflow during the remark we will 833 // see assertion failures from any subsequent set_concurrency_and_phase() 834 // calls. 835 if (concurrent()) { 836 // let the task associated with with worker 0 do this 837 if (worker_id == 0) { 838 // task 0 is responsible for clearing the global data structures 839 // We should be here because of an overflow. During STW we should 840 // not clear the overflow flag since we rely on it being true when 841 // we exit this method to abort the pause and restart concurrent 842 // marking. 843 reset_marking_state(true /* clear_overflow */); 844 845 log_info(gc)("Concurrent Mark reset for overflow"); 846 } 847 } 848 849 // after this, each task should reset its own data structures then 850 // then go into the second barrier 851 } 852 853 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 854 SuspendibleThreadSetLeaver sts_leave(concurrent()); 855 _second_overflow_barrier_sync.enter(); 856 857 // at this point everything should be re-initialized and ready to go 858 } 859 860 class CMConcurrentMarkingTask: public AbstractGangTask { 861 private: 862 ConcurrentMark* _cm; 863 ConcurrentMarkThread* _cmt; 864 865 public: 866 void work(uint worker_id) { 867 assert(Thread::current()->is_ConcurrentGC_thread(), 868 "this should only be done by a conc GC thread"); 869 ResourceMark rm; 870 871 double start_vtime = os::elapsedVTime(); 872 873 { 874 SuspendibleThreadSetJoiner sts_join; 875 876 assert(worker_id < _cm->active_tasks(), "invariant"); 877 CMTask* the_task = _cm->task(worker_id); 878 the_task->record_start_time(); 879 if (!_cm->has_aborted()) { 880 do { 881 double start_vtime_sec = os::elapsedVTime(); 882 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 883 884 the_task->do_marking_step(mark_step_duration_ms, 885 true /* do_termination */, 886 false /* is_serial*/); 887 888 double end_vtime_sec = os::elapsedVTime(); 889 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 890 _cm->clear_has_overflown(); 891 892 _cm->do_yield_check(worker_id); 893 894 jlong sleep_time_ms; 895 if (!_cm->has_aborted() && the_task->has_aborted()) { 896 sleep_time_ms = 897 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 898 { 899 SuspendibleThreadSetLeaver sts_leave; 900 os::sleep(Thread::current(), sleep_time_ms, false); 901 } 902 } 903 } while (!_cm->has_aborted() && the_task->has_aborted()); 904 } 905 the_task->record_end_time(); 906 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 907 } 908 909 double end_vtime = os::elapsedVTime(); 910 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 911 } 912 913 CMConcurrentMarkingTask(ConcurrentMark* cm, 914 ConcurrentMarkThread* cmt) : 915 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 916 917 ~CMConcurrentMarkingTask() { } 918 }; 919 920 // Calculates the number of active workers for a concurrent 921 // phase. 922 uint ConcurrentMark::calc_parallel_marking_threads() { 923 uint n_conc_workers = 0; 924 if (!UseDynamicNumberOfGCThreads || 925 (!FLAG_IS_DEFAULT(ConcGCThreads) && 926 !ForceDynamicNumberOfGCThreads)) { 927 n_conc_workers = max_parallel_marking_threads(); 928 } else { 929 n_conc_workers = 930 AdaptiveSizePolicy::calc_default_active_workers( 931 max_parallel_marking_threads(), 932 1, /* Minimum workers */ 933 parallel_marking_threads(), 934 Threads::number_of_non_daemon_threads()); 935 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 936 // that scaling has already gone into "_max_parallel_marking_threads". 937 } 938 assert(n_conc_workers > 0, "Always need at least 1"); 939 return n_conc_workers; 940 } 941 942 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 943 // Currently, only survivors can be root regions. 944 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 945 G1RootRegionScanClosure cl(_g1h, this, worker_id); 946 947 const uintx interval = PrefetchScanIntervalInBytes; 948 HeapWord* curr = hr->bottom(); 949 const HeapWord* end = hr->top(); 950 while (curr < end) { 951 Prefetch::read(curr, interval); 952 oop obj = oop(curr); 953 int size = obj->oop_iterate_size(&cl); 954 assert(size == obj->size(), "sanity"); 955 curr += size; 956 } 957 } 958 959 class CMRootRegionScanTask : public AbstractGangTask { 960 private: 961 ConcurrentMark* _cm; 962 963 public: 964 CMRootRegionScanTask(ConcurrentMark* cm) : 965 AbstractGangTask("Root Region Scan"), _cm(cm) { } 966 967 void work(uint worker_id) { 968 assert(Thread::current()->is_ConcurrentGC_thread(), 969 "this should only be done by a conc GC thread"); 970 971 CMRootRegions* root_regions = _cm->root_regions(); 972 HeapRegion* hr = root_regions->claim_next(); 973 while (hr != NULL) { 974 _cm->scanRootRegion(hr, worker_id); 975 hr = root_regions->claim_next(); 976 } 977 } 978 }; 979 980 void ConcurrentMark::scanRootRegions() { 981 // Start of concurrent marking. 982 ClassLoaderDataGraph::clear_claimed_marks(); 983 984 // scan_in_progress() will have been set to true only if there was 985 // at least one root region to scan. So, if it's false, we 986 // should not attempt to do any further work. 987 if (root_regions()->scan_in_progress()) { 988 GCTraceConcTime(Info, gc) tt("Concurrent Root Region Scan"); 989 990 _parallel_marking_threads = calc_parallel_marking_threads(); 991 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 992 "Maximum number of marking threads exceeded"); 993 uint active_workers = MAX2(1U, parallel_marking_threads()); 994 995 CMRootRegionScanTask task(this); 996 _parallel_workers->set_active_workers(active_workers); 997 _parallel_workers->run_task(&task); 998 999 // It's possible that has_aborted() is true here without actually 1000 // aborting the survivor scan earlier. This is OK as it's 1001 // mainly used for sanity checking. 1002 root_regions()->scan_finished(); 1003 } 1004 } 1005 1006 void ConcurrentMark::register_concurrent_phase_start(const char* title) { 1007 assert(!_concurrent_phase_started, "Sanity"); 1008 _concurrent_phase_started = true; 1009 _g1h->gc_timer_cm()->register_gc_concurrent_start(title); 1010 } 1011 1012 void ConcurrentMark::register_concurrent_phase_end() { 1013 if (_concurrent_phase_started) { 1014 _concurrent_phase_started = false; 1015 _g1h->gc_timer_cm()->register_gc_concurrent_end(); 1016 } 1017 } 1018 1019 void ConcurrentMark::markFromRoots() { 1020 // we might be tempted to assert that: 1021 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1022 // "inconsistent argument?"); 1023 // However that wouldn't be right, because it's possible that 1024 // a safepoint is indeed in progress as a younger generation 1025 // stop-the-world GC happens even as we mark in this generation. 1026 1027 _restart_for_overflow = false; 1028 1029 // _g1h has _n_par_threads 1030 _parallel_marking_threads = calc_parallel_marking_threads(); 1031 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1032 "Maximum number of marking threads exceeded"); 1033 1034 uint active_workers = MAX2(1U, parallel_marking_threads()); 1035 assert(active_workers > 0, "Should have been set"); 1036 1037 // Parallel task terminator is set in "set_concurrency_and_phase()" 1038 set_concurrency_and_phase(active_workers, true /* concurrent */); 1039 1040 CMConcurrentMarkingTask markingTask(this, cmThread()); 1041 _parallel_workers->set_active_workers(active_workers); 1042 _parallel_workers->run_task(&markingTask); 1043 print_stats(); 1044 } 1045 1046 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1047 // world is stopped at this checkpoint 1048 assert(SafepointSynchronize::is_at_safepoint(), 1049 "world should be stopped"); 1050 1051 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1052 1053 // If a full collection has happened, we shouldn't do this. 1054 if (has_aborted()) { 1055 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1056 return; 1057 } 1058 1059 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1060 1061 if (VerifyDuringGC) { 1062 HandleMark hm; // handle scope 1063 g1h->prepare_for_verify(); 1064 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1065 } 1066 g1h->verifier()->check_bitmaps("Remark Start"); 1067 1068 G1CollectorPolicy* g1p = g1h->g1_policy(); 1069 g1p->record_concurrent_mark_remark_start(); 1070 1071 double start = os::elapsedTime(); 1072 1073 checkpointRootsFinalWork(); 1074 1075 double mark_work_end = os::elapsedTime(); 1076 1077 weakRefsWork(clear_all_soft_refs); 1078 1079 if (has_overflown()) { 1080 // Oops. We overflowed. Restart concurrent marking. 1081 _restart_for_overflow = true; 1082 log_develop_trace(gc)("Remark led to restart for overflow."); 1083 1084 // Verify the heap w.r.t. the previous marking bitmap. 1085 if (VerifyDuringGC) { 1086 HandleMark hm; // handle scope 1087 g1h->prepare_for_verify(); 1088 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)"); 1089 } 1090 1091 // Clear the marking state because we will be restarting 1092 // marking due to overflowing the global mark stack. 1093 reset_marking_state(); 1094 } else { 1095 { 1096 GCTraceTime(Debug, gc) trace("GC Aggregate Data", g1h->gc_timer_cm()); 1097 1098 // Aggregate the per-task counting data that we have accumulated 1099 // while marking. 1100 aggregate_count_data(); 1101 } 1102 1103 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1104 // We're done with marking. 1105 // This is the end of the marking cycle, we're expected all 1106 // threads to have SATB queues with active set to true. 1107 satb_mq_set.set_active_all_threads(false, /* new active value */ 1108 true /* expected_active */); 1109 1110 if (VerifyDuringGC) { 1111 HandleMark hm; // handle scope 1112 g1h->prepare_for_verify(); 1113 Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)"); 1114 } 1115 g1h->verifier()->check_bitmaps("Remark End"); 1116 assert(!restart_for_overflow(), "sanity"); 1117 // Completely reset the marking state since marking completed 1118 set_non_marking_state(); 1119 } 1120 1121 // Expand the marking stack, if we have to and if we can. 1122 if (_markStack.should_expand()) { 1123 _markStack.expand(); 1124 } 1125 1126 // Statistics 1127 double now = os::elapsedTime(); 1128 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1129 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1130 _remark_times.add((now - start) * 1000.0); 1131 1132 g1p->record_concurrent_mark_remark_end(); 1133 1134 G1CMIsAliveClosure is_alive(g1h); 1135 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1136 } 1137 1138 // Base class of the closures that finalize and verify the 1139 // liveness counting data. 1140 class CMCountDataClosureBase: public HeapRegionClosure { 1141 protected: 1142 G1CollectedHeap* _g1h; 1143 ConcurrentMark* _cm; 1144 CardTableModRefBS* _ct_bs; 1145 1146 BitMap* _region_bm; 1147 BitMap* _card_bm; 1148 1149 // Takes a region that's not empty (i.e., it has at least one 1150 // live object in it and sets its corresponding bit on the region 1151 // bitmap to 1. 1152 void set_bit_for_region(HeapRegion* hr) { 1153 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1154 _region_bm->par_at_put(index, true); 1155 } 1156 1157 public: 1158 CMCountDataClosureBase(G1CollectedHeap* g1h, 1159 BitMap* region_bm, BitMap* card_bm): 1160 _g1h(g1h), _cm(g1h->concurrent_mark()), 1161 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 1162 _region_bm(region_bm), _card_bm(card_bm) { } 1163 }; 1164 1165 // Closure that calculates the # live objects per region. Used 1166 // for verification purposes during the cleanup pause. 1167 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1168 CMBitMapRO* _bm; 1169 size_t _region_marked_bytes; 1170 1171 public: 1172 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1173 BitMap* region_bm, BitMap* card_bm) : 1174 CMCountDataClosureBase(g1h, region_bm, card_bm), 1175 _bm(bm), _region_marked_bytes(0) { } 1176 1177 bool doHeapRegion(HeapRegion* hr) { 1178 HeapWord* ntams = hr->next_top_at_mark_start(); 1179 HeapWord* start = hr->bottom(); 1180 1181 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1182 "Preconditions not met - " 1183 "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT, 1184 p2i(start), p2i(ntams), p2i(hr->end())); 1185 1186 // Find the first marked object at or after "start". 1187 start = _bm->getNextMarkedWordAddress(start, ntams); 1188 1189 size_t marked_bytes = 0; 1190 1191 while (start < ntams) { 1192 oop obj = oop(start); 1193 int obj_sz = obj->size(); 1194 HeapWord* obj_end = start + obj_sz; 1195 1196 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1197 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1198 1199 // Note: if we're looking at the last region in heap - obj_end 1200 // could be actually just beyond the end of the heap; end_idx 1201 // will then correspond to a (non-existent) card that is also 1202 // just beyond the heap. 1203 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1204 // end of object is not card aligned - increment to cover 1205 // all the cards spanned by the object 1206 end_idx += 1; 1207 } 1208 1209 // Set the bits in the card BM for the cards spanned by this object. 1210 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1211 1212 // Add the size of this object to the number of marked bytes. 1213 marked_bytes += (size_t)obj_sz * HeapWordSize; 1214 1215 // This will happen if we are handling a humongous object that spans 1216 // several heap regions. 1217 if (obj_end > hr->end()) { 1218 break; 1219 } 1220 // Find the next marked object after this one. 1221 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1222 } 1223 1224 // Mark the allocated-since-marking portion... 1225 HeapWord* top = hr->top(); 1226 if (ntams < top) { 1227 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1228 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1229 1230 // Note: if we're looking at the last region in heap - top 1231 // could be actually just beyond the end of the heap; end_idx 1232 // will then correspond to a (non-existent) card that is also 1233 // just beyond the heap. 1234 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1235 // end of object is not card aligned - increment to cover 1236 // all the cards spanned by the object 1237 end_idx += 1; 1238 } 1239 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1240 1241 // This definitely means the region has live objects. 1242 set_bit_for_region(hr); 1243 } 1244 1245 // Update the live region bitmap. 1246 if (marked_bytes > 0) { 1247 set_bit_for_region(hr); 1248 } 1249 1250 // Set the marked bytes for the current region so that 1251 // it can be queried by a calling verification routine 1252 _region_marked_bytes = marked_bytes; 1253 1254 return false; 1255 } 1256 1257 size_t region_marked_bytes() const { return _region_marked_bytes; } 1258 }; 1259 1260 // Heap region closure used for verifying the counting data 1261 // that was accumulated concurrently and aggregated during 1262 // the remark pause. This closure is applied to the heap 1263 // regions during the STW cleanup pause. 1264 1265 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1266 G1CollectedHeap* _g1h; 1267 ConcurrentMark* _cm; 1268 CalcLiveObjectsClosure _calc_cl; 1269 BitMap* _region_bm; // Region BM to be verified 1270 BitMap* _card_bm; // Card BM to be verified 1271 1272 BitMap* _exp_region_bm; // Expected Region BM values 1273 BitMap* _exp_card_bm; // Expected card BM values 1274 1275 int _failures; 1276 1277 public: 1278 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1279 BitMap* region_bm, 1280 BitMap* card_bm, 1281 BitMap* exp_region_bm, 1282 BitMap* exp_card_bm) : 1283 _g1h(g1h), _cm(g1h->concurrent_mark()), 1284 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1285 _region_bm(region_bm), _card_bm(card_bm), 1286 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1287 _failures(0) { } 1288 1289 int failures() const { return _failures; } 1290 1291 bool doHeapRegion(HeapRegion* hr) { 1292 int failures = 0; 1293 1294 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1295 // this region and set the corresponding bits in the expected region 1296 // and card bitmaps. 1297 bool res = _calc_cl.doHeapRegion(hr); 1298 assert(res == false, "should be continuing"); 1299 1300 // Verify the marked bytes for this region. 1301 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1302 size_t act_marked_bytes = hr->next_marked_bytes(); 1303 1304 if (exp_marked_bytes > act_marked_bytes) { 1305 if (hr->is_starts_humongous()) { 1306 // For start_humongous regions, the size of the whole object will be 1307 // in exp_marked_bytes. 1308 HeapRegion* region = hr; 1309 int num_regions; 1310 for (num_regions = 0; region != NULL; num_regions++) { 1311 region = _g1h->next_region_in_humongous(region); 1312 } 1313 if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) { 1314 failures += 1; 1315 } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) { 1316 failures += 1; 1317 } 1318 } else { 1319 // We're not OK if expected marked bytes > actual marked bytes. It means 1320 // we have missed accounting some objects during the actual marking. 1321 failures += 1; 1322 } 1323 } 1324 1325 // Verify the bit, for this region, in the actual and expected 1326 // (which was just calculated) region bit maps. 1327 // We're not OK if the bit in the calculated expected region 1328 // bitmap is set and the bit in the actual region bitmap is not. 1329 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1330 1331 bool expected = _exp_region_bm->at(index); 1332 bool actual = _region_bm->at(index); 1333 if (expected && !actual) { 1334 failures += 1; 1335 } 1336 1337 // Verify that the card bit maps for the cards spanned by the current 1338 // region match. We have an error if we have a set bit in the expected 1339 // bit map and the corresponding bit in the actual bitmap is not set. 1340 1341 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1342 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1343 1344 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1345 expected = _exp_card_bm->at(i); 1346 actual = _card_bm->at(i); 1347 1348 if (expected && !actual) { 1349 failures += 1; 1350 } 1351 } 1352 1353 _failures += failures; 1354 1355 // We could stop iteration over the heap when we 1356 // find the first violating region by returning true. 1357 return false; 1358 } 1359 }; 1360 1361 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1362 protected: 1363 G1CollectedHeap* _g1h; 1364 ConcurrentMark* _cm; 1365 BitMap* _actual_region_bm; 1366 BitMap* _actual_card_bm; 1367 1368 uint _n_workers; 1369 1370 BitMap* _expected_region_bm; 1371 BitMap* _expected_card_bm; 1372 1373 int _failures; 1374 1375 HeapRegionClaimer _hrclaimer; 1376 1377 public: 1378 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1379 BitMap* region_bm, BitMap* card_bm, 1380 BitMap* expected_region_bm, BitMap* expected_card_bm) 1381 : AbstractGangTask("G1 verify final counting"), 1382 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1383 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1384 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1385 _failures(0), 1386 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1387 assert(VerifyDuringGC, "don't call this otherwise"); 1388 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1389 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1390 } 1391 1392 void work(uint worker_id) { 1393 assert(worker_id < _n_workers, "invariant"); 1394 1395 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1396 _actual_region_bm, _actual_card_bm, 1397 _expected_region_bm, 1398 _expected_card_bm); 1399 1400 _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer); 1401 1402 Atomic::add(verify_cl.failures(), &_failures); 1403 } 1404 1405 int failures() const { return _failures; } 1406 }; 1407 1408 // Closure that finalizes the liveness counting data. 1409 // Used during the cleanup pause. 1410 // Sets the bits corresponding to the interval [NTAMS, top] 1411 // (which contains the implicitly live objects) in the 1412 // card liveness bitmap. Also sets the bit for each region, 1413 // containing live data, in the region liveness bitmap. 1414 1415 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1416 public: 1417 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1418 BitMap* region_bm, 1419 BitMap* card_bm) : 1420 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1421 1422 bool doHeapRegion(HeapRegion* hr) { 1423 HeapWord* ntams = hr->next_top_at_mark_start(); 1424 HeapWord* top = hr->top(); 1425 1426 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1427 1428 // Mark the allocated-since-marking portion... 1429 if (ntams < top) { 1430 // This definitely means the region has live objects. 1431 set_bit_for_region(hr); 1432 1433 // Now set the bits in the card bitmap for [ntams, top) 1434 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1435 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1436 1437 // Note: if we're looking at the last region in heap - top 1438 // could be actually just beyond the end of the heap; end_idx 1439 // will then correspond to a (non-existent) card that is also 1440 // just beyond the heap. 1441 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1442 // end of object is not card aligned - increment to cover 1443 // all the cards spanned by the object 1444 end_idx += 1; 1445 } 1446 1447 assert(end_idx <= _card_bm->size(), 1448 "oob: end_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1449 end_idx, _card_bm->size()); 1450 assert(start_idx < _card_bm->size(), 1451 "oob: start_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1452 start_idx, _card_bm->size()); 1453 1454 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1455 } 1456 1457 // Set the bit for the region if it contains live data 1458 if (hr->next_marked_bytes() > 0) { 1459 set_bit_for_region(hr); 1460 } 1461 1462 return false; 1463 } 1464 }; 1465 1466 class G1ParFinalCountTask: public AbstractGangTask { 1467 protected: 1468 G1CollectedHeap* _g1h; 1469 ConcurrentMark* _cm; 1470 BitMap* _actual_region_bm; 1471 BitMap* _actual_card_bm; 1472 1473 uint _n_workers; 1474 HeapRegionClaimer _hrclaimer; 1475 1476 public: 1477 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1478 : AbstractGangTask("G1 final counting"), 1479 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1480 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1481 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1482 } 1483 1484 void work(uint worker_id) { 1485 assert(worker_id < _n_workers, "invariant"); 1486 1487 FinalCountDataUpdateClosure final_update_cl(_g1h, 1488 _actual_region_bm, 1489 _actual_card_bm); 1490 1491 _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer); 1492 } 1493 }; 1494 1495 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1496 G1CollectedHeap* _g1; 1497 size_t _freed_bytes; 1498 FreeRegionList* _local_cleanup_list; 1499 uint _old_regions_removed; 1500 uint _humongous_regions_removed; 1501 HRRSCleanupTask* _hrrs_cleanup_task; 1502 1503 public: 1504 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1505 FreeRegionList* local_cleanup_list, 1506 HRRSCleanupTask* hrrs_cleanup_task) : 1507 _g1(g1), 1508 _freed_bytes(0), 1509 _local_cleanup_list(local_cleanup_list), 1510 _old_regions_removed(0), 1511 _humongous_regions_removed(0), 1512 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1513 1514 size_t freed_bytes() { return _freed_bytes; } 1515 const uint old_regions_removed() { return _old_regions_removed; } 1516 const uint humongous_regions_removed() { return _humongous_regions_removed; } 1517 1518 bool doHeapRegion(HeapRegion *hr) { 1519 if (hr->is_archive()) { 1520 return false; 1521 } 1522 // We use a claim value of zero here because all regions 1523 // were claimed with value 1 in the FinalCount task. 1524 _g1->reset_gc_time_stamps(hr); 1525 hr->note_end_of_marking(); 1526 1527 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1528 _freed_bytes += hr->used(); 1529 hr->set_containing_set(NULL); 1530 if (hr->is_humongous()) { 1531 _humongous_regions_removed++; 1532 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1533 } else { 1534 _old_regions_removed++; 1535 _g1->free_region(hr, _local_cleanup_list, true); 1536 } 1537 } else { 1538 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1539 } 1540 1541 return false; 1542 } 1543 }; 1544 1545 class G1ParNoteEndTask: public AbstractGangTask { 1546 friend class G1NoteEndOfConcMarkClosure; 1547 1548 protected: 1549 G1CollectedHeap* _g1h; 1550 FreeRegionList* _cleanup_list; 1551 HeapRegionClaimer _hrclaimer; 1552 1553 public: 1554 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1555 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1556 } 1557 1558 void work(uint worker_id) { 1559 FreeRegionList local_cleanup_list("Local Cleanup List"); 1560 HRRSCleanupTask hrrs_cleanup_task; 1561 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1562 &hrrs_cleanup_task); 1563 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1564 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1565 1566 // Now update the lists 1567 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1568 { 1569 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1570 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1571 1572 // If we iterate over the global cleanup list at the end of 1573 // cleanup to do this printing we will not guarantee to only 1574 // generate output for the newly-reclaimed regions (the list 1575 // might not be empty at the beginning of cleanup; we might 1576 // still be working on its previous contents). So we do the 1577 // printing here, before we append the new regions to the global 1578 // cleanup list. 1579 1580 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1581 if (hr_printer->is_active()) { 1582 FreeRegionListIterator iter(&local_cleanup_list); 1583 while (iter.more_available()) { 1584 HeapRegion* hr = iter.get_next(); 1585 hr_printer->cleanup(hr); 1586 } 1587 } 1588 1589 _cleanup_list->add_ordered(&local_cleanup_list); 1590 assert(local_cleanup_list.is_empty(), "post-condition"); 1591 1592 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1593 } 1594 } 1595 }; 1596 1597 void ConcurrentMark::cleanup() { 1598 // world is stopped at this checkpoint 1599 assert(SafepointSynchronize::is_at_safepoint(), 1600 "world should be stopped"); 1601 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1602 1603 // If a full collection has happened, we shouldn't do this. 1604 if (has_aborted()) { 1605 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1606 return; 1607 } 1608 1609 g1h->verifier()->verify_region_sets_optional(); 1610 1611 if (VerifyDuringGC) { 1612 HandleMark hm; // handle scope 1613 g1h->prepare_for_verify(); 1614 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1615 } 1616 g1h->verifier()->check_bitmaps("Cleanup Start"); 1617 1618 G1CollectorPolicy* g1p = g1h->g1_policy(); 1619 g1p->record_concurrent_mark_cleanup_start(); 1620 1621 double start = os::elapsedTime(); 1622 1623 HeapRegionRemSet::reset_for_cleanup_tasks(); 1624 1625 // Do counting once more with the world stopped for good measure. 1626 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1627 1628 g1h->workers()->run_task(&g1_par_count_task); 1629 1630 if (VerifyDuringGC) { 1631 // Verify that the counting data accumulated during marking matches 1632 // that calculated by walking the marking bitmap. 1633 1634 // Bitmaps to hold expected values 1635 BitMap expected_region_bm(_region_bm.size(), true); 1636 BitMap expected_card_bm(_card_bm.size(), true); 1637 1638 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1639 &_region_bm, 1640 &_card_bm, 1641 &expected_region_bm, 1642 &expected_card_bm); 1643 1644 g1h->workers()->run_task(&g1_par_verify_task); 1645 1646 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1647 } 1648 1649 size_t start_used_bytes = g1h->used(); 1650 g1h->collector_state()->set_mark_in_progress(false); 1651 1652 double count_end = os::elapsedTime(); 1653 double this_final_counting_time = (count_end - start); 1654 _total_counting_time += this_final_counting_time; 1655 1656 if (log_is_enabled(Trace, gc, liveness)) { 1657 G1PrintRegionLivenessInfoClosure cl("Post-Marking"); 1658 _g1h->heap_region_iterate(&cl); 1659 } 1660 1661 // Install newly created mark bitMap as "prev". 1662 swapMarkBitMaps(); 1663 1664 g1h->reset_gc_time_stamp(); 1665 1666 uint n_workers = _g1h->workers()->active_workers(); 1667 1668 // Note end of marking in all heap regions. 1669 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1670 g1h->workers()->run_task(&g1_par_note_end_task); 1671 g1h->check_gc_time_stamps(); 1672 1673 if (!cleanup_list_is_empty()) { 1674 // The cleanup list is not empty, so we'll have to process it 1675 // concurrently. Notify anyone else that might be wanting free 1676 // regions that there will be more free regions coming soon. 1677 g1h->set_free_regions_coming(); 1678 } 1679 1680 // call below, since it affects the metric by which we sort the heap 1681 // regions. 1682 if (G1ScrubRemSets) { 1683 double rs_scrub_start = os::elapsedTime(); 1684 g1h->scrub_rem_set(&_region_bm, &_card_bm); 1685 _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start); 1686 } 1687 1688 // this will also free any regions totally full of garbage objects, 1689 // and sort the regions. 1690 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1691 1692 // Statistics. 1693 double end = os::elapsedTime(); 1694 _cleanup_times.add((end - start) * 1000.0); 1695 1696 // Clean up will have freed any regions completely full of garbage. 1697 // Update the soft reference policy with the new heap occupancy. 1698 Universe::update_heap_info_at_gc(); 1699 1700 if (VerifyDuringGC) { 1701 HandleMark hm; // handle scope 1702 g1h->prepare_for_verify(); 1703 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)"); 1704 } 1705 1706 g1h->verifier()->check_bitmaps("Cleanup End"); 1707 1708 g1h->verifier()->verify_region_sets_optional(); 1709 1710 // We need to make this be a "collection" so any collection pause that 1711 // races with it goes around and waits for completeCleanup to finish. 1712 g1h->increment_total_collections(); 1713 1714 // Clean out dead classes and update Metaspace sizes. 1715 if (ClassUnloadingWithConcurrentMark) { 1716 ClassLoaderDataGraph::purge(); 1717 } 1718 MetaspaceGC::compute_new_size(); 1719 1720 // We reclaimed old regions so we should calculate the sizes to make 1721 // sure we update the old gen/space data. 1722 g1h->g1mm()->update_sizes(); 1723 g1h->allocation_context_stats().update_after_mark(); 1724 1725 g1h->trace_heap_after_concurrent_cycle(); 1726 } 1727 1728 void ConcurrentMark::completeCleanup() { 1729 if (has_aborted()) return; 1730 1731 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1732 1733 _cleanup_list.verify_optional(); 1734 FreeRegionList tmp_free_list("Tmp Free List"); 1735 1736 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1737 "cleanup list has %u entries", 1738 _cleanup_list.length()); 1739 1740 // No one else should be accessing the _cleanup_list at this point, 1741 // so it is not necessary to take any locks 1742 while (!_cleanup_list.is_empty()) { 1743 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 1744 assert(hr != NULL, "Got NULL from a non-empty list"); 1745 hr->par_clear(); 1746 tmp_free_list.add_ordered(hr); 1747 1748 // Instead of adding one region at a time to the secondary_free_list, 1749 // we accumulate them in the local list and move them a few at a 1750 // time. This also cuts down on the number of notify_all() calls 1751 // we do during this process. We'll also append the local list when 1752 // _cleanup_list is empty (which means we just removed the last 1753 // region from the _cleanup_list). 1754 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1755 _cleanup_list.is_empty()) { 1756 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1757 "appending %u entries to the secondary_free_list, " 1758 "cleanup list still has %u entries", 1759 tmp_free_list.length(), 1760 _cleanup_list.length()); 1761 1762 { 1763 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1764 g1h->secondary_free_list_add(&tmp_free_list); 1765 SecondaryFreeList_lock->notify_all(); 1766 } 1767 #ifndef PRODUCT 1768 if (G1StressConcRegionFreeing) { 1769 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1770 os::sleep(Thread::current(), (jlong) 1, false); 1771 } 1772 } 1773 #endif 1774 } 1775 } 1776 assert(tmp_free_list.is_empty(), "post-condition"); 1777 } 1778 1779 // Supporting Object and Oop closures for reference discovery 1780 // and processing in during marking 1781 1782 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1783 HeapWord* addr = (HeapWord*)obj; 1784 return addr != NULL && 1785 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1786 } 1787 1788 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1789 // Uses the CMTask associated with a worker thread (for serial reference 1790 // processing the CMTask for worker 0 is used) to preserve (mark) and 1791 // trace referent objects. 1792 // 1793 // Using the CMTask and embedded local queues avoids having the worker 1794 // threads operating on the global mark stack. This reduces the risk 1795 // of overflowing the stack - which we would rather avoid at this late 1796 // state. Also using the tasks' local queues removes the potential 1797 // of the workers interfering with each other that could occur if 1798 // operating on the global stack. 1799 1800 class G1CMKeepAliveAndDrainClosure: public OopClosure { 1801 ConcurrentMark* _cm; 1802 CMTask* _task; 1803 int _ref_counter_limit; 1804 int _ref_counter; 1805 bool _is_serial; 1806 public: 1807 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 1808 _cm(cm), _task(task), _is_serial(is_serial), 1809 _ref_counter_limit(G1RefProcDrainInterval) { 1810 assert(_ref_counter_limit > 0, "sanity"); 1811 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1812 _ref_counter = _ref_counter_limit; 1813 } 1814 1815 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1816 virtual void do_oop( oop* p) { do_oop_work(p); } 1817 1818 template <class T> void do_oop_work(T* p) { 1819 if (!_cm->has_overflown()) { 1820 oop obj = oopDesc::load_decode_heap_oop(p); 1821 _task->deal_with_reference(obj); 1822 _ref_counter--; 1823 1824 if (_ref_counter == 0) { 1825 // We have dealt with _ref_counter_limit references, pushing them 1826 // and objects reachable from them on to the local stack (and 1827 // possibly the global stack). Call CMTask::do_marking_step() to 1828 // process these entries. 1829 // 1830 // We call CMTask::do_marking_step() in a loop, which we'll exit if 1831 // there's nothing more to do (i.e. we're done with the entries that 1832 // were pushed as a result of the CMTask::deal_with_reference() calls 1833 // above) or we overflow. 1834 // 1835 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 1836 // flag while there may still be some work to do. (See the comment at 1837 // the beginning of CMTask::do_marking_step() for those conditions - 1838 // one of which is reaching the specified time target.) It is only 1839 // when CMTask::do_marking_step() returns without setting the 1840 // has_aborted() flag that the marking step has completed. 1841 do { 1842 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1843 _task->do_marking_step(mark_step_duration_ms, 1844 false /* do_termination */, 1845 _is_serial); 1846 } while (_task->has_aborted() && !_cm->has_overflown()); 1847 _ref_counter = _ref_counter_limit; 1848 } 1849 } 1850 } 1851 }; 1852 1853 // 'Drain' oop closure used by both serial and parallel reference processing. 1854 // Uses the CMTask associated with a given worker thread (for serial 1855 // reference processing the CMtask for worker 0 is used). Calls the 1856 // do_marking_step routine, with an unbelievably large timeout value, 1857 // to drain the marking data structures of the remaining entries 1858 // added by the 'keep alive' oop closure above. 1859 1860 class G1CMDrainMarkingStackClosure: public VoidClosure { 1861 ConcurrentMark* _cm; 1862 CMTask* _task; 1863 bool _is_serial; 1864 public: 1865 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 1866 _cm(cm), _task(task), _is_serial(is_serial) { 1867 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1868 } 1869 1870 void do_void() { 1871 do { 1872 // We call CMTask::do_marking_step() to completely drain the local 1873 // and global marking stacks of entries pushed by the 'keep alive' 1874 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 1875 // 1876 // CMTask::do_marking_step() is called in a loop, which we'll exit 1877 // if there's nothing more to do (i.e. we've completely drained the 1878 // entries that were pushed as a a result of applying the 'keep alive' 1879 // closure to the entries on the discovered ref lists) or we overflow 1880 // the global marking stack. 1881 // 1882 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 1883 // flag while there may still be some work to do. (See the comment at 1884 // the beginning of CMTask::do_marking_step() for those conditions - 1885 // one of which is reaching the specified time target.) It is only 1886 // when CMTask::do_marking_step() returns without setting the 1887 // has_aborted() flag that the marking step has completed. 1888 1889 _task->do_marking_step(1000000000.0 /* something very large */, 1890 true /* do_termination */, 1891 _is_serial); 1892 } while (_task->has_aborted() && !_cm->has_overflown()); 1893 } 1894 }; 1895 1896 // Implementation of AbstractRefProcTaskExecutor for parallel 1897 // reference processing at the end of G1 concurrent marking 1898 1899 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 1900 private: 1901 G1CollectedHeap* _g1h; 1902 ConcurrentMark* _cm; 1903 WorkGang* _workers; 1904 uint _active_workers; 1905 1906 public: 1907 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 1908 ConcurrentMark* cm, 1909 WorkGang* workers, 1910 uint n_workers) : 1911 _g1h(g1h), _cm(cm), 1912 _workers(workers), _active_workers(n_workers) { } 1913 1914 // Executes the given task using concurrent marking worker threads. 1915 virtual void execute(ProcessTask& task); 1916 virtual void execute(EnqueueTask& task); 1917 }; 1918 1919 class G1CMRefProcTaskProxy: public AbstractGangTask { 1920 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 1921 ProcessTask& _proc_task; 1922 G1CollectedHeap* _g1h; 1923 ConcurrentMark* _cm; 1924 1925 public: 1926 G1CMRefProcTaskProxy(ProcessTask& proc_task, 1927 G1CollectedHeap* g1h, 1928 ConcurrentMark* cm) : 1929 AbstractGangTask("Process reference objects in parallel"), 1930 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 1931 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1932 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 1933 } 1934 1935 virtual void work(uint worker_id) { 1936 ResourceMark rm; 1937 HandleMark hm; 1938 CMTask* task = _cm->task(worker_id); 1939 G1CMIsAliveClosure g1_is_alive(_g1h); 1940 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 1941 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 1942 1943 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 1944 } 1945 }; 1946 1947 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 1948 assert(_workers != NULL, "Need parallel worker threads."); 1949 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1950 1951 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 1952 1953 // We need to reset the concurrency level before each 1954 // proxy task execution, so that the termination protocol 1955 // and overflow handling in CMTask::do_marking_step() knows 1956 // how many workers to wait for. 1957 _cm->set_concurrency(_active_workers); 1958 _workers->run_task(&proc_task_proxy); 1959 } 1960 1961 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 1962 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 1963 EnqueueTask& _enq_task; 1964 1965 public: 1966 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 1967 AbstractGangTask("Enqueue reference objects in parallel"), 1968 _enq_task(enq_task) { } 1969 1970 virtual void work(uint worker_id) { 1971 _enq_task.work(worker_id); 1972 } 1973 }; 1974 1975 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 1976 assert(_workers != NULL, "Need parallel worker threads."); 1977 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1978 1979 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 1980 1981 // Not strictly necessary but... 1982 // 1983 // We need to reset the concurrency level before each 1984 // proxy task execution, so that the termination protocol 1985 // and overflow handling in CMTask::do_marking_step() knows 1986 // how many workers to wait for. 1987 _cm->set_concurrency(_active_workers); 1988 _workers->run_task(&enq_task_proxy); 1989 } 1990 1991 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 1992 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 1993 } 1994 1995 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 1996 if (has_overflown()) { 1997 // Skip processing the discovered references if we have 1998 // overflown the global marking stack. Reference objects 1999 // only get discovered once so it is OK to not 2000 // de-populate the discovered reference lists. We could have, 2001 // but the only benefit would be that, when marking restarts, 2002 // less reference objects are discovered. 2003 return; 2004 } 2005 2006 ResourceMark rm; 2007 HandleMark hm; 2008 2009 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2010 2011 // Is alive closure. 2012 G1CMIsAliveClosure g1_is_alive(g1h); 2013 2014 // Inner scope to exclude the cleaning of the string and symbol 2015 // tables from the displayed time. 2016 { 2017 GCTraceTime(Debug, gc) trace("GC Ref Proc", g1h->gc_timer_cm()); 2018 2019 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2020 2021 // See the comment in G1CollectedHeap::ref_processing_init() 2022 // about how reference processing currently works in G1. 2023 2024 // Set the soft reference policy 2025 rp->setup_policy(clear_all_soft_refs); 2026 assert(_markStack.isEmpty(), "mark stack should be empty"); 2027 2028 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2029 // in serial reference processing. Note these closures are also 2030 // used for serially processing (by the the current thread) the 2031 // JNI references during parallel reference processing. 2032 // 2033 // These closures do not need to synchronize with the worker 2034 // threads involved in parallel reference processing as these 2035 // instances are executed serially by the current thread (e.g. 2036 // reference processing is not multi-threaded and is thus 2037 // performed by the current thread instead of a gang worker). 2038 // 2039 // The gang tasks involved in parallel reference processing create 2040 // their own instances of these closures, which do their own 2041 // synchronization among themselves. 2042 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2043 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2044 2045 // We need at least one active thread. If reference processing 2046 // is not multi-threaded we use the current (VMThread) thread, 2047 // otherwise we use the work gang from the G1CollectedHeap and 2048 // we utilize all the worker threads we can. 2049 bool processing_is_mt = rp->processing_is_mt(); 2050 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2051 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2052 2053 // Parallel processing task executor. 2054 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2055 g1h->workers(), active_workers); 2056 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2057 2058 // Set the concurrency level. The phase was already set prior to 2059 // executing the remark task. 2060 set_concurrency(active_workers); 2061 2062 // Set the degree of MT processing here. If the discovery was done MT, 2063 // the number of threads involved during discovery could differ from 2064 // the number of active workers. This is OK as long as the discovered 2065 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2066 rp->set_active_mt_degree(active_workers); 2067 2068 // Process the weak references. 2069 const ReferenceProcessorStats& stats = 2070 rp->process_discovered_references(&g1_is_alive, 2071 &g1_keep_alive, 2072 &g1_drain_mark_stack, 2073 executor, 2074 g1h->gc_timer_cm()); 2075 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2076 2077 // The do_oop work routines of the keep_alive and drain_marking_stack 2078 // oop closures will set the has_overflown flag if we overflow the 2079 // global marking stack. 2080 2081 assert(_markStack.overflow() || _markStack.isEmpty(), 2082 "mark stack should be empty (unless it overflowed)"); 2083 2084 if (_markStack.overflow()) { 2085 // This should have been done already when we tried to push an 2086 // entry on to the global mark stack. But let's do it again. 2087 set_has_overflown(); 2088 } 2089 2090 assert(rp->num_q() == active_workers, "why not"); 2091 2092 rp->enqueue_discovered_references(executor); 2093 2094 rp->verify_no_references_recorded(); 2095 assert(!rp->discovery_enabled(), "Post condition"); 2096 } 2097 2098 if (has_overflown()) { 2099 // We can not trust g1_is_alive if the marking stack overflowed 2100 return; 2101 } 2102 2103 assert(_markStack.isEmpty(), "Marking should have completed"); 2104 2105 // Unload Klasses, String, Symbols, Code Cache, etc. 2106 { 2107 GCTraceTime(Debug, gc) trace("Unloading", g1h->gc_timer_cm()); 2108 2109 if (ClassUnloadingWithConcurrentMark) { 2110 bool purged_classes; 2111 2112 { 2113 GCTraceTime(Trace, gc) trace("System Dictionary Unloading", g1h->gc_timer_cm()); 2114 purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */); 2115 } 2116 2117 { 2118 GCTraceTime(Trace, gc) trace("Parallel Unloading", g1h->gc_timer_cm()); 2119 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2120 } 2121 } 2122 2123 if (G1StringDedup::is_enabled()) { 2124 GCTraceTime(Trace, gc) trace("String Deduplication Unlink", g1h->gc_timer_cm()); 2125 G1StringDedup::unlink(&g1_is_alive); 2126 } 2127 } 2128 } 2129 2130 void ConcurrentMark::swapMarkBitMaps() { 2131 CMBitMapRO* temp = _prevMarkBitMap; 2132 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2133 _nextMarkBitMap = (CMBitMap*) temp; 2134 } 2135 2136 // Closure for marking entries in SATB buffers. 2137 class CMSATBBufferClosure : public SATBBufferClosure { 2138 private: 2139 CMTask* _task; 2140 G1CollectedHeap* _g1h; 2141 2142 // This is very similar to CMTask::deal_with_reference, but with 2143 // more relaxed requirements for the argument, so this must be more 2144 // circumspect about treating the argument as an object. 2145 void do_entry(void* entry) const { 2146 _task->increment_refs_reached(); 2147 HeapRegion* hr = _g1h->heap_region_containing(entry); 2148 if (entry < hr->next_top_at_mark_start()) { 2149 // Until we get here, we don't know whether entry refers to a valid 2150 // object; it could instead have been a stale reference. 2151 oop obj = static_cast<oop>(entry); 2152 assert(obj->is_oop(true /* ignore mark word */), 2153 "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)); 2154 _task->make_reference_grey(obj, hr); 2155 } 2156 } 2157 2158 public: 2159 CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h) 2160 : _task(task), _g1h(g1h) { } 2161 2162 virtual void do_buffer(void** buffer, size_t size) { 2163 for (size_t i = 0; i < size; ++i) { 2164 do_entry(buffer[i]); 2165 } 2166 } 2167 }; 2168 2169 class G1RemarkThreadsClosure : public ThreadClosure { 2170 CMSATBBufferClosure _cm_satb_cl; 2171 G1CMOopClosure _cm_cl; 2172 MarkingCodeBlobClosure _code_cl; 2173 int _thread_parity; 2174 2175 public: 2176 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) : 2177 _cm_satb_cl(task, g1h), 2178 _cm_cl(g1h, g1h->concurrent_mark(), task), 2179 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2180 _thread_parity(Threads::thread_claim_parity()) {} 2181 2182 void do_thread(Thread* thread) { 2183 if (thread->is_Java_thread()) { 2184 if (thread->claim_oops_do(true, _thread_parity)) { 2185 JavaThread* jt = (JavaThread*)thread; 2186 2187 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2188 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2189 // * Alive if on the stack of an executing method 2190 // * Weakly reachable otherwise 2191 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2192 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2193 jt->nmethods_do(&_code_cl); 2194 2195 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 2196 } 2197 } else if (thread->is_VM_thread()) { 2198 if (thread->claim_oops_do(true, _thread_parity)) { 2199 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 2200 } 2201 } 2202 } 2203 }; 2204 2205 class CMRemarkTask: public AbstractGangTask { 2206 private: 2207 ConcurrentMark* _cm; 2208 public: 2209 void work(uint worker_id) { 2210 // Since all available tasks are actually started, we should 2211 // only proceed if we're supposed to be active. 2212 if (worker_id < _cm->active_tasks()) { 2213 CMTask* task = _cm->task(worker_id); 2214 task->record_start_time(); 2215 { 2216 ResourceMark rm; 2217 HandleMark hm; 2218 2219 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 2220 Threads::threads_do(&threads_f); 2221 } 2222 2223 do { 2224 task->do_marking_step(1000000000.0 /* something very large */, 2225 true /* do_termination */, 2226 false /* is_serial */); 2227 } while (task->has_aborted() && !_cm->has_overflown()); 2228 // If we overflow, then we do not want to restart. We instead 2229 // want to abort remark and do concurrent marking again. 2230 task->record_end_time(); 2231 } 2232 } 2233 2234 CMRemarkTask(ConcurrentMark* cm, uint active_workers) : 2235 AbstractGangTask("Par Remark"), _cm(cm) { 2236 _cm->terminator()->reset_for_reuse(active_workers); 2237 } 2238 }; 2239 2240 void ConcurrentMark::checkpointRootsFinalWork() { 2241 ResourceMark rm; 2242 HandleMark hm; 2243 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2244 2245 GCTraceTime(Debug, gc) trace("Finalize Marking", g1h->gc_timer_cm()); 2246 2247 g1h->ensure_parsability(false); 2248 2249 // this is remark, so we'll use up all active threads 2250 uint active_workers = g1h->workers()->active_workers(); 2251 set_concurrency_and_phase(active_workers, false /* concurrent */); 2252 // Leave _parallel_marking_threads at it's 2253 // value originally calculated in the ConcurrentMark 2254 // constructor and pass values of the active workers 2255 // through the gang in the task. 2256 2257 { 2258 StrongRootsScope srs(active_workers); 2259 2260 CMRemarkTask remarkTask(this, active_workers); 2261 // We will start all available threads, even if we decide that the 2262 // active_workers will be fewer. The extra ones will just bail out 2263 // immediately. 2264 g1h->workers()->run_task(&remarkTask); 2265 } 2266 2267 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2268 guarantee(has_overflown() || 2269 satb_mq_set.completed_buffers_num() == 0, 2270 "Invariant: has_overflown = %s, num buffers = %d", 2271 BOOL_TO_STR(has_overflown()), 2272 satb_mq_set.completed_buffers_num()); 2273 2274 print_stats(); 2275 } 2276 2277 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2278 // Note we are overriding the read-only view of the prev map here, via 2279 // the cast. 2280 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2281 } 2282 2283 HeapRegion* 2284 ConcurrentMark::claim_region(uint worker_id) { 2285 // "checkpoint" the finger 2286 HeapWord* finger = _finger; 2287 2288 // _heap_end will not change underneath our feet; it only changes at 2289 // yield points. 2290 while (finger < _heap_end) { 2291 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2292 2293 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 2294 2295 // Above heap_region_containing may return NULL as we always scan claim 2296 // until the end of the heap. In this case, just jump to the next region. 2297 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 2298 2299 // Is the gap between reading the finger and doing the CAS too long? 2300 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2301 if (res == finger && curr_region != NULL) { 2302 // we succeeded 2303 HeapWord* bottom = curr_region->bottom(); 2304 HeapWord* limit = curr_region->next_top_at_mark_start(); 2305 2306 // notice that _finger == end cannot be guaranteed here since, 2307 // someone else might have moved the finger even further 2308 assert(_finger >= end, "the finger should have moved forward"); 2309 2310 if (limit > bottom) { 2311 return curr_region; 2312 } else { 2313 assert(limit == bottom, 2314 "the region limit should be at bottom"); 2315 // we return NULL and the caller should try calling 2316 // claim_region() again. 2317 return NULL; 2318 } 2319 } else { 2320 assert(_finger > finger, "the finger should have moved forward"); 2321 // read it again 2322 finger = _finger; 2323 } 2324 } 2325 2326 return NULL; 2327 } 2328 2329 #ifndef PRODUCT 2330 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 2331 private: 2332 G1CollectedHeap* _g1h; 2333 const char* _phase; 2334 int _info; 2335 2336 public: 2337 VerifyNoCSetOops(const char* phase, int info = -1) : 2338 _g1h(G1CollectedHeap::heap()), 2339 _phase(phase), 2340 _info(info) 2341 { } 2342 2343 void operator()(oop obj) const { 2344 guarantee(obj->is_oop(), 2345 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 2346 p2i(obj), _phase, _info); 2347 guarantee(!_g1h->obj_in_cs(obj), 2348 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 2349 p2i(obj), _phase, _info); 2350 } 2351 }; 2352 2353 void ConcurrentMark::verify_no_cset_oops() { 2354 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2355 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 2356 return; 2357 } 2358 2359 // Verify entries on the global mark stack 2360 _markStack.iterate(VerifyNoCSetOops("Stack")); 2361 2362 // Verify entries on the task queues 2363 for (uint i = 0; i < _max_worker_id; ++i) { 2364 CMTaskQueue* queue = _task_queues->queue(i); 2365 queue->iterate(VerifyNoCSetOops("Queue", i)); 2366 } 2367 2368 // Verify the global finger 2369 HeapWord* global_finger = finger(); 2370 if (global_finger != NULL && global_finger < _heap_end) { 2371 // Since we always iterate over all regions, we might get a NULL HeapRegion 2372 // here. 2373 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 2374 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 2375 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 2376 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 2377 } 2378 2379 // Verify the task fingers 2380 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2381 for (uint i = 0; i < parallel_marking_threads(); ++i) { 2382 CMTask* task = _tasks[i]; 2383 HeapWord* task_finger = task->finger(); 2384 if (task_finger != NULL && task_finger < _heap_end) { 2385 // See above note on the global finger verification. 2386 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 2387 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 2388 !task_hr->in_collection_set(), 2389 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 2390 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 2391 } 2392 } 2393 } 2394 #endif // PRODUCT 2395 2396 // Aggregate the counting data that was constructed concurrently 2397 // with marking. 2398 class AggregateCountDataHRClosure: public HeapRegionClosure { 2399 G1CollectedHeap* _g1h; 2400 ConcurrentMark* _cm; 2401 CardTableModRefBS* _ct_bs; 2402 BitMap* _cm_card_bm; 2403 uint _max_worker_id; 2404 2405 public: 2406 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2407 BitMap* cm_card_bm, 2408 uint max_worker_id) : 2409 _g1h(g1h), _cm(g1h->concurrent_mark()), 2410 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 2411 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 2412 2413 bool doHeapRegion(HeapRegion* hr) { 2414 HeapWord* start = hr->bottom(); 2415 HeapWord* limit = hr->next_top_at_mark_start(); 2416 HeapWord* end = hr->end(); 2417 2418 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2419 "Preconditions not met - " 2420 "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", " 2421 "top: " PTR_FORMAT ", end: " PTR_FORMAT, 2422 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())); 2423 2424 assert(hr->next_marked_bytes() == 0, "Precondition"); 2425 2426 if (start == limit) { 2427 // NTAMS of this region has not been set so nothing to do. 2428 return false; 2429 } 2430 2431 // 'start' should be in the heap. 2432 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2433 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 2434 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2435 2436 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2437 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2438 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2439 2440 // If ntams is not card aligned then we bump card bitmap index 2441 // for limit so that we get the all the cards spanned by 2442 // the object ending at ntams. 2443 // Note: if this is the last region in the heap then ntams 2444 // could be actually just beyond the end of the the heap; 2445 // limit_idx will then correspond to a (non-existent) card 2446 // that is also outside the heap. 2447 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2448 limit_idx += 1; 2449 } 2450 2451 assert(limit_idx <= end_idx, "or else use atomics"); 2452 2453 // Aggregate the "stripe" in the count data associated with hr. 2454 uint hrm_index = hr->hrm_index(); 2455 size_t marked_bytes = 0; 2456 2457 for (uint i = 0; i < _max_worker_id; i += 1) { 2458 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2459 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2460 2461 // Fetch the marked_bytes in this region for task i and 2462 // add it to the running total for this region. 2463 marked_bytes += marked_bytes_array[hrm_index]; 2464 2465 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 2466 // into the global card bitmap. 2467 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2468 2469 while (scan_idx < limit_idx) { 2470 assert(task_card_bm->at(scan_idx) == true, "should be"); 2471 _cm_card_bm->set_bit(scan_idx); 2472 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2473 2474 // BitMap::get_next_one_offset() can handle the case when 2475 // its left_offset parameter is greater than its right_offset 2476 // parameter. It does, however, have an early exit if 2477 // left_offset == right_offset. So let's limit the value 2478 // passed in for left offset here. 2479 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2480 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2481 } 2482 } 2483 2484 // Update the marked bytes for this region. 2485 hr->add_to_marked_bytes(marked_bytes); 2486 2487 // Next heap region 2488 return false; 2489 } 2490 }; 2491 2492 class G1AggregateCountDataTask: public AbstractGangTask { 2493 protected: 2494 G1CollectedHeap* _g1h; 2495 ConcurrentMark* _cm; 2496 BitMap* _cm_card_bm; 2497 uint _max_worker_id; 2498 uint _active_workers; 2499 HeapRegionClaimer _hrclaimer; 2500 2501 public: 2502 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2503 ConcurrentMark* cm, 2504 BitMap* cm_card_bm, 2505 uint max_worker_id, 2506 uint n_workers) : 2507 AbstractGangTask("Count Aggregation"), 2508 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2509 _max_worker_id(max_worker_id), 2510 _active_workers(n_workers), 2511 _hrclaimer(_active_workers) { 2512 } 2513 2514 void work(uint worker_id) { 2515 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 2516 2517 _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer); 2518 } 2519 }; 2520 2521 2522 void ConcurrentMark::aggregate_count_data() { 2523 uint n_workers = _g1h->workers()->active_workers(); 2524 2525 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 2526 _max_worker_id, n_workers); 2527 2528 _g1h->workers()->run_task(&g1_par_agg_task); 2529 } 2530 2531 // Clear the per-worker arrays used to store the per-region counting data 2532 void ConcurrentMark::clear_all_count_data() { 2533 // Clear the global card bitmap - it will be filled during 2534 // liveness count aggregation (during remark) and the 2535 // final counting task. 2536 _card_bm.clear(); 2537 2538 // Clear the global region bitmap - it will be filled as part 2539 // of the final counting task. 2540 _region_bm.clear(); 2541 2542 uint max_regions = _g1h->max_regions(); 2543 assert(_max_worker_id > 0, "uninitialized"); 2544 2545 for (uint i = 0; i < _max_worker_id; i += 1) { 2546 BitMap* task_card_bm = count_card_bitmap_for(i); 2547 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 2548 2549 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 2550 assert(marked_bytes_array != NULL, "uninitialized"); 2551 2552 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 2553 task_card_bm->clear(); 2554 } 2555 } 2556 2557 void ConcurrentMark::print_stats() { 2558 if (!log_is_enabled(Debug, gc, stats)) { 2559 return; 2560 } 2561 log_debug(gc, stats)("---------------------------------------------------------------------"); 2562 for (size_t i = 0; i < _active_tasks; ++i) { 2563 _tasks[i]->print_stats(); 2564 log_debug(gc, stats)("---------------------------------------------------------------------"); 2565 } 2566 } 2567 2568 // abandon current marking iteration due to a Full GC 2569 void ConcurrentMark::abort() { 2570 if (!cmThread()->during_cycle() || _has_aborted) { 2571 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2572 return; 2573 } 2574 2575 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2576 // concurrent bitmap clearing. 2577 _nextMarkBitMap->clearAll(); 2578 2579 // Note we cannot clear the previous marking bitmap here 2580 // since VerifyDuringGC verifies the objects marked during 2581 // a full GC against the previous bitmap. 2582 2583 // Clear the liveness counting data 2584 clear_all_count_data(); 2585 // Empty mark stack 2586 reset_marking_state(); 2587 for (uint i = 0; i < _max_worker_id; ++i) { 2588 _tasks[i]->clear_region_fields(); 2589 } 2590 _first_overflow_barrier_sync.abort(); 2591 _second_overflow_barrier_sync.abort(); 2592 _has_aborted = true; 2593 2594 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2595 satb_mq_set.abandon_partial_marking(); 2596 // This can be called either during or outside marking, we'll read 2597 // the expected_active value from the SATB queue set. 2598 satb_mq_set.set_active_all_threads( 2599 false, /* new active value */ 2600 satb_mq_set.is_active() /* expected_active */); 2601 2602 _g1h->trace_heap_after_concurrent_cycle(); 2603 2604 // Close any open concurrent phase timing 2605 register_concurrent_phase_end(); 2606 2607 _g1h->register_concurrent_cycle_end(); 2608 } 2609 2610 static void print_ms_time_info(const char* prefix, const char* name, 2611 NumberSeq& ns) { 2612 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2613 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2614 if (ns.num() > 0) { 2615 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2616 prefix, ns.sd(), ns.maximum()); 2617 } 2618 } 2619 2620 void ConcurrentMark::print_summary_info() { 2621 LogHandle(gc, marking) log; 2622 if (!log.is_trace()) { 2623 return; 2624 } 2625 2626 log.trace(" Concurrent marking:"); 2627 print_ms_time_info(" ", "init marks", _init_times); 2628 print_ms_time_info(" ", "remarks", _remark_times); 2629 { 2630 print_ms_time_info(" ", "final marks", _remark_mark_times); 2631 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2632 2633 } 2634 print_ms_time_info(" ", "cleanups", _cleanup_times); 2635 log.trace(" Final counting total time = %8.2f s (avg = %8.2f ms).", 2636 _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2637 if (G1ScrubRemSets) { 2638 log.trace(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2639 _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2640 } 2641 log.trace(" Total stop_world time = %8.2f s.", 2642 (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); 2643 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).", 2644 cmThread()->vtime_accum(), cmThread()->vtime_mark_accum()); 2645 } 2646 2647 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2648 _parallel_workers->print_worker_threads_on(st); 2649 } 2650 2651 void ConcurrentMark::print_on_error(outputStream* st) const { 2652 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2653 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 2654 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 2655 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 2656 } 2657 2658 // We take a break if someone is trying to stop the world. 2659 bool ConcurrentMark::do_yield_check(uint worker_id) { 2660 if (SuspendibleThreadSet::should_yield()) { 2661 SuspendibleThreadSet::yield(); 2662 return true; 2663 } else { 2664 return false; 2665 } 2666 } 2667 2668 // Closure for iteration over bitmaps 2669 class CMBitMapClosure : public BitMapClosure { 2670 private: 2671 // the bitmap that is being iterated over 2672 CMBitMap* _nextMarkBitMap; 2673 ConcurrentMark* _cm; 2674 CMTask* _task; 2675 2676 public: 2677 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 2678 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 2679 2680 bool do_bit(size_t offset) { 2681 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 2682 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 2683 assert( addr < _cm->finger(), "invariant"); 2684 assert(addr >= _task->finger(), "invariant"); 2685 2686 // We move that task's local finger along. 2687 _task->move_finger_to(addr); 2688 2689 _task->scan_object(oop(addr)); 2690 // we only partially drain the local queue and global stack 2691 _task->drain_local_queue(true); 2692 _task->drain_global_stack(true); 2693 2694 // if the has_aborted flag has been raised, we need to bail out of 2695 // the iteration 2696 return !_task->has_aborted(); 2697 } 2698 }; 2699 2700 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2701 ReferenceProcessor* result = NULL; 2702 if (G1UseConcMarkReferenceProcessing) { 2703 result = g1h->ref_processor_cm(); 2704 assert(result != NULL, "should not be NULL"); 2705 } 2706 return result; 2707 } 2708 2709 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2710 ConcurrentMark* cm, 2711 CMTask* task) 2712 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2713 _g1h(g1h), _cm(cm), _task(task) 2714 { } 2715 2716 void CMTask::setup_for_region(HeapRegion* hr) { 2717 assert(hr != NULL, 2718 "claim_region() should have filtered out NULL regions"); 2719 _curr_region = hr; 2720 _finger = hr->bottom(); 2721 update_region_limit(); 2722 } 2723 2724 void CMTask::update_region_limit() { 2725 HeapRegion* hr = _curr_region; 2726 HeapWord* bottom = hr->bottom(); 2727 HeapWord* limit = hr->next_top_at_mark_start(); 2728 2729 if (limit == bottom) { 2730 // The region was collected underneath our feet. 2731 // We set the finger to bottom to ensure that the bitmap 2732 // iteration that will follow this will not do anything. 2733 // (this is not a condition that holds when we set the region up, 2734 // as the region is not supposed to be empty in the first place) 2735 _finger = bottom; 2736 } else if (limit >= _region_limit) { 2737 assert(limit >= _finger, "peace of mind"); 2738 } else { 2739 assert(limit < _region_limit, "only way to get here"); 2740 // This can happen under some pretty unusual circumstances. An 2741 // evacuation pause empties the region underneath our feet (NTAMS 2742 // at bottom). We then do some allocation in the region (NTAMS 2743 // stays at bottom), followed by the region being used as a GC 2744 // alloc region (NTAMS will move to top() and the objects 2745 // originally below it will be grayed). All objects now marked in 2746 // the region are explicitly grayed, if below the global finger, 2747 // and we do not need in fact to scan anything else. So, we simply 2748 // set _finger to be limit to ensure that the bitmap iteration 2749 // doesn't do anything. 2750 _finger = limit; 2751 } 2752 2753 _region_limit = limit; 2754 } 2755 2756 void CMTask::giveup_current_region() { 2757 assert(_curr_region != NULL, "invariant"); 2758 clear_region_fields(); 2759 } 2760 2761 void CMTask::clear_region_fields() { 2762 // Values for these three fields that indicate that we're not 2763 // holding on to a region. 2764 _curr_region = NULL; 2765 _finger = NULL; 2766 _region_limit = NULL; 2767 } 2768 2769 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2770 if (cm_oop_closure == NULL) { 2771 assert(_cm_oop_closure != NULL, "invariant"); 2772 } else { 2773 assert(_cm_oop_closure == NULL, "invariant"); 2774 } 2775 _cm_oop_closure = cm_oop_closure; 2776 } 2777 2778 void CMTask::reset(CMBitMap* nextMarkBitMap) { 2779 guarantee(nextMarkBitMap != NULL, "invariant"); 2780 _nextMarkBitMap = nextMarkBitMap; 2781 clear_region_fields(); 2782 2783 _calls = 0; 2784 _elapsed_time_ms = 0.0; 2785 _termination_time_ms = 0.0; 2786 _termination_start_time_ms = 0.0; 2787 } 2788 2789 bool CMTask::should_exit_termination() { 2790 regular_clock_call(); 2791 // This is called when we are in the termination protocol. We should 2792 // quit if, for some reason, this task wants to abort or the global 2793 // stack is not empty (this means that we can get work from it). 2794 return !_cm->mark_stack_empty() || has_aborted(); 2795 } 2796 2797 void CMTask::reached_limit() { 2798 assert(_words_scanned >= _words_scanned_limit || 2799 _refs_reached >= _refs_reached_limit , 2800 "shouldn't have been called otherwise"); 2801 regular_clock_call(); 2802 } 2803 2804 void CMTask::regular_clock_call() { 2805 if (has_aborted()) return; 2806 2807 // First, we need to recalculate the words scanned and refs reached 2808 // limits for the next clock call. 2809 recalculate_limits(); 2810 2811 // During the regular clock call we do the following 2812 2813 // (1) If an overflow has been flagged, then we abort. 2814 if (_cm->has_overflown()) { 2815 set_has_aborted(); 2816 return; 2817 } 2818 2819 // If we are not concurrent (i.e. we're doing remark) we don't need 2820 // to check anything else. The other steps are only needed during 2821 // the concurrent marking phase. 2822 if (!concurrent()) return; 2823 2824 // (2) If marking has been aborted for Full GC, then we also abort. 2825 if (_cm->has_aborted()) { 2826 set_has_aborted(); 2827 return; 2828 } 2829 2830 double curr_time_ms = os::elapsedVTime() * 1000.0; 2831 2832 // (4) We check whether we should yield. If we have to, then we abort. 2833 if (SuspendibleThreadSet::should_yield()) { 2834 // We should yield. To do this we abort the task. The caller is 2835 // responsible for yielding. 2836 set_has_aborted(); 2837 return; 2838 } 2839 2840 // (5) We check whether we've reached our time quota. If we have, 2841 // then we abort. 2842 double elapsed_time_ms = curr_time_ms - _start_time_ms; 2843 if (elapsed_time_ms > _time_target_ms) { 2844 set_has_aborted(); 2845 _has_timed_out = true; 2846 return; 2847 } 2848 2849 // (6) Finally, we check whether there are enough completed STAB 2850 // buffers available for processing. If there are, we abort. 2851 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2852 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 2853 // we do need to process SATB buffers, we'll abort and restart 2854 // the marking task to do so 2855 set_has_aborted(); 2856 return; 2857 } 2858 } 2859 2860 void CMTask::recalculate_limits() { 2861 _real_words_scanned_limit = _words_scanned + words_scanned_period; 2862 _words_scanned_limit = _real_words_scanned_limit; 2863 2864 _real_refs_reached_limit = _refs_reached + refs_reached_period; 2865 _refs_reached_limit = _real_refs_reached_limit; 2866 } 2867 2868 void CMTask::decrease_limits() { 2869 // This is called when we believe that we're going to do an infrequent 2870 // operation which will increase the per byte scanned cost (i.e. move 2871 // entries to/from the global stack). It basically tries to decrease the 2872 // scanning limit so that the clock is called earlier. 2873 2874 _words_scanned_limit = _real_words_scanned_limit - 2875 3 * words_scanned_period / 4; 2876 _refs_reached_limit = _real_refs_reached_limit - 2877 3 * refs_reached_period / 4; 2878 } 2879 2880 void CMTask::move_entries_to_global_stack() { 2881 // local array where we'll store the entries that will be popped 2882 // from the local queue 2883 oop buffer[global_stack_transfer_size]; 2884 2885 int n = 0; 2886 oop obj; 2887 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 2888 buffer[n] = obj; 2889 ++n; 2890 } 2891 2892 if (n > 0) { 2893 // we popped at least one entry from the local queue 2894 2895 if (!_cm->mark_stack_push(buffer, n)) { 2896 set_has_aborted(); 2897 } 2898 } 2899 2900 // this operation was quite expensive, so decrease the limits 2901 decrease_limits(); 2902 } 2903 2904 void CMTask::get_entries_from_global_stack() { 2905 // local array where we'll store the entries that will be popped 2906 // from the global stack. 2907 oop buffer[global_stack_transfer_size]; 2908 int n; 2909 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 2910 assert(n <= global_stack_transfer_size, 2911 "we should not pop more than the given limit"); 2912 if (n > 0) { 2913 // yes, we did actually pop at least one entry 2914 for (int i = 0; i < n; ++i) { 2915 bool success = _task_queue->push(buffer[i]); 2916 // We only call this when the local queue is empty or under a 2917 // given target limit. So, we do not expect this push to fail. 2918 assert(success, "invariant"); 2919 } 2920 } 2921 2922 // this operation was quite expensive, so decrease the limits 2923 decrease_limits(); 2924 } 2925 2926 void CMTask::drain_local_queue(bool partially) { 2927 if (has_aborted()) return; 2928 2929 // Decide what the target size is, depending whether we're going to 2930 // drain it partially (so that other tasks can steal if they run out 2931 // of things to do) or totally (at the very end). 2932 size_t target_size; 2933 if (partially) { 2934 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 2935 } else { 2936 target_size = 0; 2937 } 2938 2939 if (_task_queue->size() > target_size) { 2940 oop obj; 2941 bool ret = _task_queue->pop_local(obj); 2942 while (ret) { 2943 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 2944 assert(!_g1h->is_on_master_free_list( 2945 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 2946 2947 scan_object(obj); 2948 2949 if (_task_queue->size() <= target_size || has_aborted()) { 2950 ret = false; 2951 } else { 2952 ret = _task_queue->pop_local(obj); 2953 } 2954 } 2955 } 2956 } 2957 2958 void CMTask::drain_global_stack(bool partially) { 2959 if (has_aborted()) return; 2960 2961 // We have a policy to drain the local queue before we attempt to 2962 // drain the global stack. 2963 assert(partially || _task_queue->size() == 0, "invariant"); 2964 2965 // Decide what the target size is, depending whether we're going to 2966 // drain it partially (so that other tasks can steal if they run out 2967 // of things to do) or totally (at the very end). Notice that, 2968 // because we move entries from the global stack in chunks or 2969 // because another task might be doing the same, we might in fact 2970 // drop below the target. But, this is not a problem. 2971 size_t target_size; 2972 if (partially) { 2973 target_size = _cm->partial_mark_stack_size_target(); 2974 } else { 2975 target_size = 0; 2976 } 2977 2978 if (_cm->mark_stack_size() > target_size) { 2979 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 2980 get_entries_from_global_stack(); 2981 drain_local_queue(partially); 2982 } 2983 } 2984 } 2985 2986 // SATB Queue has several assumptions on whether to call the par or 2987 // non-par versions of the methods. this is why some of the code is 2988 // replicated. We should really get rid of the single-threaded version 2989 // of the code to simplify things. 2990 void CMTask::drain_satb_buffers() { 2991 if (has_aborted()) return; 2992 2993 // We set this so that the regular clock knows that we're in the 2994 // middle of draining buffers and doesn't set the abort flag when it 2995 // notices that SATB buffers are available for draining. It'd be 2996 // very counter productive if it did that. :-) 2997 _draining_satb_buffers = true; 2998 2999 CMSATBBufferClosure satb_cl(this, _g1h); 3000 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3001 3002 // This keeps claiming and applying the closure to completed buffers 3003 // until we run out of buffers or we need to abort. 3004 while (!has_aborted() && 3005 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 3006 regular_clock_call(); 3007 } 3008 3009 _draining_satb_buffers = false; 3010 3011 assert(has_aborted() || 3012 concurrent() || 3013 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3014 3015 // again, this was a potentially expensive operation, decrease the 3016 // limits to get the regular clock call early 3017 decrease_limits(); 3018 } 3019 3020 void CMTask::print_stats() { 3021 log_debug(gc, stats)("Marking Stats, task = %u, calls = %d", 3022 _worker_id, _calls); 3023 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3024 _elapsed_time_ms, _termination_time_ms); 3025 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3026 _step_times_ms.num(), _step_times_ms.avg(), 3027 _step_times_ms.sd()); 3028 log_debug(gc, stats)(" max = %1.2lfms, total = %1.2lfms", 3029 _step_times_ms.maximum(), _step_times_ms.sum()); 3030 } 3031 3032 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) { 3033 return _task_queues->steal(worker_id, hash_seed, obj); 3034 } 3035 3036 /***************************************************************************** 3037 3038 The do_marking_step(time_target_ms, ...) method is the building 3039 block of the parallel marking framework. It can be called in parallel 3040 with other invocations of do_marking_step() on different tasks 3041 (but only one per task, obviously) and concurrently with the 3042 mutator threads, or during remark, hence it eliminates the need 3043 for two versions of the code. When called during remark, it will 3044 pick up from where the task left off during the concurrent marking 3045 phase. Interestingly, tasks are also claimable during evacuation 3046 pauses too, since do_marking_step() ensures that it aborts before 3047 it needs to yield. 3048 3049 The data structures that it uses to do marking work are the 3050 following: 3051 3052 (1) Marking Bitmap. If there are gray objects that appear only 3053 on the bitmap (this happens either when dealing with an overflow 3054 or when the initial marking phase has simply marked the roots 3055 and didn't push them on the stack), then tasks claim heap 3056 regions whose bitmap they then scan to find gray objects. A 3057 global finger indicates where the end of the last claimed region 3058 is. A local finger indicates how far into the region a task has 3059 scanned. The two fingers are used to determine how to gray an 3060 object (i.e. whether simply marking it is OK, as it will be 3061 visited by a task in the future, or whether it needs to be also 3062 pushed on a stack). 3063 3064 (2) Local Queue. The local queue of the task which is accessed 3065 reasonably efficiently by the task. Other tasks can steal from 3066 it when they run out of work. Throughout the marking phase, a 3067 task attempts to keep its local queue short but not totally 3068 empty, so that entries are available for stealing by other 3069 tasks. Only when there is no more work, a task will totally 3070 drain its local queue. 3071 3072 (3) Global Mark Stack. This handles local queue overflow. During 3073 marking only sets of entries are moved between it and the local 3074 queues, as access to it requires a mutex and more fine-grain 3075 interaction with it which might cause contention. If it 3076 overflows, then the marking phase should restart and iterate 3077 over the bitmap to identify gray objects. Throughout the marking 3078 phase, tasks attempt to keep the global mark stack at a small 3079 length but not totally empty, so that entries are available for 3080 popping by other tasks. Only when there is no more work, tasks 3081 will totally drain the global mark stack. 3082 3083 (4) SATB Buffer Queue. This is where completed SATB buffers are 3084 made available. Buffers are regularly removed from this queue 3085 and scanned for roots, so that the queue doesn't get too 3086 long. During remark, all completed buffers are processed, as 3087 well as the filled in parts of any uncompleted buffers. 3088 3089 The do_marking_step() method tries to abort when the time target 3090 has been reached. There are a few other cases when the 3091 do_marking_step() method also aborts: 3092 3093 (1) When the marking phase has been aborted (after a Full GC). 3094 3095 (2) When a global overflow (on the global stack) has been 3096 triggered. Before the task aborts, it will actually sync up with 3097 the other tasks to ensure that all the marking data structures 3098 (local queues, stacks, fingers etc.) are re-initialized so that 3099 when do_marking_step() completes, the marking phase can 3100 immediately restart. 3101 3102 (3) When enough completed SATB buffers are available. The 3103 do_marking_step() method only tries to drain SATB buffers right 3104 at the beginning. So, if enough buffers are available, the 3105 marking step aborts and the SATB buffers are processed at 3106 the beginning of the next invocation. 3107 3108 (4) To yield. when we have to yield then we abort and yield 3109 right at the end of do_marking_step(). This saves us from a lot 3110 of hassle as, by yielding we might allow a Full GC. If this 3111 happens then objects will be compacted underneath our feet, the 3112 heap might shrink, etc. We save checking for this by just 3113 aborting and doing the yield right at the end. 3114 3115 From the above it follows that the do_marking_step() method should 3116 be called in a loop (or, otherwise, regularly) until it completes. 3117 3118 If a marking step completes without its has_aborted() flag being 3119 true, it means it has completed the current marking phase (and 3120 also all other marking tasks have done so and have all synced up). 3121 3122 A method called regular_clock_call() is invoked "regularly" (in 3123 sub ms intervals) throughout marking. It is this clock method that 3124 checks all the abort conditions which were mentioned above and 3125 decides when the task should abort. A work-based scheme is used to 3126 trigger this clock method: when the number of object words the 3127 marking phase has scanned or the number of references the marking 3128 phase has visited reach a given limit. Additional invocations to 3129 the method clock have been planted in a few other strategic places 3130 too. The initial reason for the clock method was to avoid calling 3131 vtime too regularly, as it is quite expensive. So, once it was in 3132 place, it was natural to piggy-back all the other conditions on it 3133 too and not constantly check them throughout the code. 3134 3135 If do_termination is true then do_marking_step will enter its 3136 termination protocol. 3137 3138 The value of is_serial must be true when do_marking_step is being 3139 called serially (i.e. by the VMThread) and do_marking_step should 3140 skip any synchronization in the termination and overflow code. 3141 Examples include the serial remark code and the serial reference 3142 processing closures. 3143 3144 The value of is_serial must be false when do_marking_step is 3145 being called by any of the worker threads in a work gang. 3146 Examples include the concurrent marking code (CMMarkingTask), 3147 the MT remark code, and the MT reference processing closures. 3148 3149 *****************************************************************************/ 3150 3151 void CMTask::do_marking_step(double time_target_ms, 3152 bool do_termination, 3153 bool is_serial) { 3154 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3155 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3156 3157 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3158 assert(_task_queues != NULL, "invariant"); 3159 assert(_task_queue != NULL, "invariant"); 3160 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 3161 3162 assert(!_claimed, 3163 "only one thread should claim this task at any one time"); 3164 3165 // OK, this doesn't safeguard again all possible scenarios, as it is 3166 // possible for two threads to set the _claimed flag at the same 3167 // time. But it is only for debugging purposes anyway and it will 3168 // catch most problems. 3169 _claimed = true; 3170 3171 _start_time_ms = os::elapsedVTime() * 1000.0; 3172 3173 // If do_stealing is true then do_marking_step will attempt to 3174 // steal work from the other CMTasks. It only makes sense to 3175 // enable stealing when the termination protocol is enabled 3176 // and do_marking_step() is not being called serially. 3177 bool do_stealing = do_termination && !is_serial; 3178 3179 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 3180 _time_target_ms = time_target_ms - diff_prediction_ms; 3181 3182 // set up the variables that are used in the work-based scheme to 3183 // call the regular clock method 3184 _words_scanned = 0; 3185 _refs_reached = 0; 3186 recalculate_limits(); 3187 3188 // clear all flags 3189 clear_has_aborted(); 3190 _has_timed_out = false; 3191 _draining_satb_buffers = false; 3192 3193 ++_calls; 3194 3195 // Set up the bitmap and oop closures. Anything that uses them is 3196 // eventually called from this method, so it is OK to allocate these 3197 // statically. 3198 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3199 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3200 set_cm_oop_closure(&cm_oop_closure); 3201 3202 if (_cm->has_overflown()) { 3203 // This can happen if the mark stack overflows during a GC pause 3204 // and this task, after a yield point, restarts. We have to abort 3205 // as we need to get into the overflow protocol which happens 3206 // right at the end of this task. 3207 set_has_aborted(); 3208 } 3209 3210 // First drain any available SATB buffers. After this, we will not 3211 // look at SATB buffers before the next invocation of this method. 3212 // If enough completed SATB buffers are queued up, the regular clock 3213 // will abort this task so that it restarts. 3214 drain_satb_buffers(); 3215 // ...then partially drain the local queue and the global stack 3216 drain_local_queue(true); 3217 drain_global_stack(true); 3218 3219 do { 3220 if (!has_aborted() && _curr_region != NULL) { 3221 // This means that we're already holding on to a region. 3222 assert(_finger != NULL, "if region is not NULL, then the finger " 3223 "should not be NULL either"); 3224 3225 // We might have restarted this task after an evacuation pause 3226 // which might have evacuated the region we're holding on to 3227 // underneath our feet. Let's read its limit again to make sure 3228 // that we do not iterate over a region of the heap that 3229 // contains garbage (update_region_limit() will also move 3230 // _finger to the start of the region if it is found empty). 3231 update_region_limit(); 3232 // We will start from _finger not from the start of the region, 3233 // as we might be restarting this task after aborting half-way 3234 // through scanning this region. In this case, _finger points to 3235 // the address where we last found a marked object. If this is a 3236 // fresh region, _finger points to start(). 3237 MemRegion mr = MemRegion(_finger, _region_limit); 3238 3239 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 3240 "humongous regions should go around loop once only"); 3241 3242 // Some special cases: 3243 // If the memory region is empty, we can just give up the region. 3244 // If the current region is humongous then we only need to check 3245 // the bitmap for the bit associated with the start of the object, 3246 // scan the object if it's live, and give up the region. 3247 // Otherwise, let's iterate over the bitmap of the part of the region 3248 // that is left. 3249 // If the iteration is successful, give up the region. 3250 if (mr.is_empty()) { 3251 giveup_current_region(); 3252 regular_clock_call(); 3253 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 3254 if (_nextMarkBitMap->isMarked(mr.start())) { 3255 // The object is marked - apply the closure 3256 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 3257 bitmap_closure.do_bit(offset); 3258 } 3259 // Even if this task aborted while scanning the humongous object 3260 // we can (and should) give up the current region. 3261 giveup_current_region(); 3262 regular_clock_call(); 3263 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3264 giveup_current_region(); 3265 regular_clock_call(); 3266 } else { 3267 assert(has_aborted(), "currently the only way to do so"); 3268 // The only way to abort the bitmap iteration is to return 3269 // false from the do_bit() method. However, inside the 3270 // do_bit() method we move the _finger to point to the 3271 // object currently being looked at. So, if we bail out, we 3272 // have definitely set _finger to something non-null. 3273 assert(_finger != NULL, "invariant"); 3274 3275 // Region iteration was actually aborted. So now _finger 3276 // points to the address of the object we last scanned. If we 3277 // leave it there, when we restart this task, we will rescan 3278 // the object. It is easy to avoid this. We move the finger by 3279 // enough to point to the next possible object header (the 3280 // bitmap knows by how much we need to move it as it knows its 3281 // granularity). 3282 assert(_finger < _region_limit, "invariant"); 3283 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 3284 // Check if bitmap iteration was aborted while scanning the last object 3285 if (new_finger >= _region_limit) { 3286 giveup_current_region(); 3287 } else { 3288 move_finger_to(new_finger); 3289 } 3290 } 3291 } 3292 // At this point we have either completed iterating over the 3293 // region we were holding on to, or we have aborted. 3294 3295 // We then partially drain the local queue and the global stack. 3296 // (Do we really need this?) 3297 drain_local_queue(true); 3298 drain_global_stack(true); 3299 3300 // Read the note on the claim_region() method on why it might 3301 // return NULL with potentially more regions available for 3302 // claiming and why we have to check out_of_regions() to determine 3303 // whether we're done or not. 3304 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 3305 // We are going to try to claim a new region. We should have 3306 // given up on the previous one. 3307 // Separated the asserts so that we know which one fires. 3308 assert(_curr_region == NULL, "invariant"); 3309 assert(_finger == NULL, "invariant"); 3310 assert(_region_limit == NULL, "invariant"); 3311 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 3312 if (claimed_region != NULL) { 3313 // Yes, we managed to claim one 3314 setup_for_region(claimed_region); 3315 assert(_curr_region == claimed_region, "invariant"); 3316 } 3317 // It is important to call the regular clock here. It might take 3318 // a while to claim a region if, for example, we hit a large 3319 // block of empty regions. So we need to call the regular clock 3320 // method once round the loop to make sure it's called 3321 // frequently enough. 3322 regular_clock_call(); 3323 } 3324 3325 if (!has_aborted() && _curr_region == NULL) { 3326 assert(_cm->out_of_regions(), 3327 "at this point we should be out of regions"); 3328 } 3329 } while ( _curr_region != NULL && !has_aborted()); 3330 3331 if (!has_aborted()) { 3332 // We cannot check whether the global stack is empty, since other 3333 // tasks might be pushing objects to it concurrently. 3334 assert(_cm->out_of_regions(), 3335 "at this point we should be out of regions"); 3336 // Try to reduce the number of available SATB buffers so that 3337 // remark has less work to do. 3338 drain_satb_buffers(); 3339 } 3340 3341 // Since we've done everything else, we can now totally drain the 3342 // local queue and global stack. 3343 drain_local_queue(false); 3344 drain_global_stack(false); 3345 3346 // Attempt at work stealing from other task's queues. 3347 if (do_stealing && !has_aborted()) { 3348 // We have not aborted. This means that we have finished all that 3349 // we could. Let's try to do some stealing... 3350 3351 // We cannot check whether the global stack is empty, since other 3352 // tasks might be pushing objects to it concurrently. 3353 assert(_cm->out_of_regions() && _task_queue->size() == 0, 3354 "only way to reach here"); 3355 while (!has_aborted()) { 3356 oop obj; 3357 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 3358 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 3359 "any stolen object should be marked"); 3360 scan_object(obj); 3361 3362 // And since we're towards the end, let's totally drain the 3363 // local queue and global stack. 3364 drain_local_queue(false); 3365 drain_global_stack(false); 3366 } else { 3367 break; 3368 } 3369 } 3370 } 3371 3372 // We still haven't aborted. Now, let's try to get into the 3373 // termination protocol. 3374 if (do_termination && !has_aborted()) { 3375 // We cannot check whether the global stack is empty, since other 3376 // tasks might be concurrently pushing objects on it. 3377 // Separated the asserts so that we know which one fires. 3378 assert(_cm->out_of_regions(), "only way to reach here"); 3379 assert(_task_queue->size() == 0, "only way to reach here"); 3380 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 3381 3382 // The CMTask class also extends the TerminatorTerminator class, 3383 // hence its should_exit_termination() method will also decide 3384 // whether to exit the termination protocol or not. 3385 bool finished = (is_serial || 3386 _cm->terminator()->offer_termination(this)); 3387 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 3388 _termination_time_ms += 3389 termination_end_time_ms - _termination_start_time_ms; 3390 3391 if (finished) { 3392 // We're all done. 3393 3394 if (_worker_id == 0) { 3395 // let's allow task 0 to do this 3396 if (concurrent()) { 3397 assert(_cm->concurrent_marking_in_progress(), "invariant"); 3398 // we need to set this to false before the next 3399 // safepoint. This way we ensure that the marking phase 3400 // doesn't observe any more heap expansions. 3401 _cm->clear_concurrent_marking_in_progress(); 3402 } 3403 } 3404 3405 // We can now guarantee that the global stack is empty, since 3406 // all other tasks have finished. We separated the guarantees so 3407 // that, if a condition is false, we can immediately find out 3408 // which one. 3409 guarantee(_cm->out_of_regions(), "only way to reach here"); 3410 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 3411 guarantee(_task_queue->size() == 0, "only way to reach here"); 3412 guarantee(!_cm->has_overflown(), "only way to reach here"); 3413 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 3414 } else { 3415 // Apparently there's more work to do. Let's abort this task. It 3416 // will restart it and we can hopefully find more things to do. 3417 set_has_aborted(); 3418 } 3419 } 3420 3421 // Mainly for debugging purposes to make sure that a pointer to the 3422 // closure which was statically allocated in this frame doesn't 3423 // escape it by accident. 3424 set_cm_oop_closure(NULL); 3425 double end_time_ms = os::elapsedVTime() * 1000.0; 3426 double elapsed_time_ms = end_time_ms - _start_time_ms; 3427 // Update the step history. 3428 _step_times_ms.add(elapsed_time_ms); 3429 3430 if (has_aborted()) { 3431 // The task was aborted for some reason. 3432 if (_has_timed_out) { 3433 double diff_ms = elapsed_time_ms - _time_target_ms; 3434 // Keep statistics of how well we did with respect to hitting 3435 // our target only if we actually timed out (if we aborted for 3436 // other reasons, then the results might get skewed). 3437 _marking_step_diffs_ms.add(diff_ms); 3438 } 3439 3440 if (_cm->has_overflown()) { 3441 // This is the interesting one. We aborted because a global 3442 // overflow was raised. This means we have to restart the 3443 // marking phase and start iterating over regions. However, in 3444 // order to do this we have to make sure that all tasks stop 3445 // what they are doing and re-initialize in a safe manner. We 3446 // will achieve this with the use of two barrier sync points. 3447 3448 if (!is_serial) { 3449 // We only need to enter the sync barrier if being called 3450 // from a parallel context 3451 _cm->enter_first_sync_barrier(_worker_id); 3452 3453 // When we exit this sync barrier we know that all tasks have 3454 // stopped doing marking work. So, it's now safe to 3455 // re-initialize our data structures. At the end of this method, 3456 // task 0 will clear the global data structures. 3457 } 3458 3459 // We clear the local state of this task... 3460 clear_region_fields(); 3461 3462 if (!is_serial) { 3463 // ...and enter the second barrier. 3464 _cm->enter_second_sync_barrier(_worker_id); 3465 } 3466 // At this point, if we're during the concurrent phase of 3467 // marking, everything has been re-initialized and we're 3468 // ready to restart. 3469 } 3470 } 3471 3472 _claimed = false; 3473 } 3474 3475 CMTask::CMTask(uint worker_id, 3476 ConcurrentMark* cm, 3477 size_t* marked_bytes, 3478 BitMap* card_bm, 3479 CMTaskQueue* task_queue, 3480 CMTaskQueueSet* task_queues) 3481 : _g1h(G1CollectedHeap::heap()), 3482 _worker_id(worker_id), _cm(cm), 3483 _claimed(false), 3484 _nextMarkBitMap(NULL), _hash_seed(17), 3485 _task_queue(task_queue), 3486 _task_queues(task_queues), 3487 _cm_oop_closure(NULL), 3488 _marked_bytes_array(marked_bytes), 3489 _card_bm(card_bm) { 3490 guarantee(task_queue != NULL, "invariant"); 3491 guarantee(task_queues != NULL, "invariant"); 3492 3493 _marking_step_diffs_ms.add(0.5); 3494 } 3495 3496 // These are formatting macros that are used below to ensure 3497 // consistent formatting. The *_H_* versions are used to format the 3498 // header for a particular value and they should be kept consistent 3499 // with the corresponding macro. Also note that most of the macros add 3500 // the necessary white space (as a prefix) which makes them a bit 3501 // easier to compose. 3502 3503 // All the output lines are prefixed with this string to be able to 3504 // identify them easily in a large log file. 3505 #define G1PPRL_LINE_PREFIX "###" 3506 3507 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 3508 #ifdef _LP64 3509 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 3510 #else // _LP64 3511 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 3512 #endif // _LP64 3513 3514 // For per-region info 3515 #define G1PPRL_TYPE_FORMAT " %-4s" 3516 #define G1PPRL_TYPE_H_FORMAT " %4s" 3517 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 3518 #define G1PPRL_BYTE_H_FORMAT " %9s" 3519 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 3520 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 3521 3522 // For summary info 3523 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 3524 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 3525 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 3526 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 3527 3528 G1PrintRegionLivenessInfoClosure:: 3529 G1PrintRegionLivenessInfoClosure(const char* phase_name) 3530 : _total_used_bytes(0), _total_capacity_bytes(0), 3531 _total_prev_live_bytes(0), _total_next_live_bytes(0), 3532 _hum_used_bytes(0), _hum_capacity_bytes(0), 3533 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 3534 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 3535 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 3536 MemRegion g1_reserved = g1h->g1_reserved(); 3537 double now = os::elapsedTime(); 3538 3539 // Print the header of the output. 3540 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 3541 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP" 3542 G1PPRL_SUM_ADDR_FORMAT("reserved") 3543 G1PPRL_SUM_BYTE_FORMAT("region-size"), 3544 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 3545 HeapRegion::GrainBytes); 3546 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3547 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3548 G1PPRL_TYPE_H_FORMAT 3549 G1PPRL_ADDR_BASE_H_FORMAT 3550 G1PPRL_BYTE_H_FORMAT 3551 G1PPRL_BYTE_H_FORMAT 3552 G1PPRL_BYTE_H_FORMAT 3553 G1PPRL_DOUBLE_H_FORMAT 3554 G1PPRL_BYTE_H_FORMAT 3555 G1PPRL_BYTE_H_FORMAT, 3556 "type", "address-range", 3557 "used", "prev-live", "next-live", "gc-eff", 3558 "remset", "code-roots"); 3559 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3560 G1PPRL_TYPE_H_FORMAT 3561 G1PPRL_ADDR_BASE_H_FORMAT 3562 G1PPRL_BYTE_H_FORMAT 3563 G1PPRL_BYTE_H_FORMAT 3564 G1PPRL_BYTE_H_FORMAT 3565 G1PPRL_DOUBLE_H_FORMAT 3566 G1PPRL_BYTE_H_FORMAT 3567 G1PPRL_BYTE_H_FORMAT, 3568 "", "", 3569 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 3570 "(bytes)", "(bytes)"); 3571 } 3572 3573 // It takes as a parameter a reference to one of the _hum_* fields, it 3574 // deduces the corresponding value for a region in a humongous region 3575 // series (either the region size, or what's left if the _hum_* field 3576 // is < the region size), and updates the _hum_* field accordingly. 3577 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 3578 size_t bytes = 0; 3579 // The > 0 check is to deal with the prev and next live bytes which 3580 // could be 0. 3581 if (*hum_bytes > 0) { 3582 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 3583 *hum_bytes -= bytes; 3584 } 3585 return bytes; 3586 } 3587 3588 // It deduces the values for a region in a humongous region series 3589 // from the _hum_* fields and updates those accordingly. It assumes 3590 // that that _hum_* fields have already been set up from the "starts 3591 // humongous" region and we visit the regions in address order. 3592 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 3593 size_t* capacity_bytes, 3594 size_t* prev_live_bytes, 3595 size_t* next_live_bytes) { 3596 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 3597 *used_bytes = get_hum_bytes(&_hum_used_bytes); 3598 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 3599 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 3600 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 3601 } 3602 3603 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 3604 const char* type = r->get_type_str(); 3605 HeapWord* bottom = r->bottom(); 3606 HeapWord* end = r->end(); 3607 size_t capacity_bytes = r->capacity(); 3608 size_t used_bytes = r->used(); 3609 size_t prev_live_bytes = r->live_bytes(); 3610 size_t next_live_bytes = r->next_live_bytes(); 3611 double gc_eff = r->gc_efficiency(); 3612 size_t remset_bytes = r->rem_set()->mem_size(); 3613 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 3614 3615 if (r->is_starts_humongous()) { 3616 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 3617 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 3618 "they should have been zeroed after the last time we used them"); 3619 // Set up the _hum_* fields. 3620 _hum_capacity_bytes = capacity_bytes; 3621 _hum_used_bytes = used_bytes; 3622 _hum_prev_live_bytes = prev_live_bytes; 3623 _hum_next_live_bytes = next_live_bytes; 3624 get_hum_bytes(&used_bytes, &capacity_bytes, 3625 &prev_live_bytes, &next_live_bytes); 3626 end = bottom + HeapRegion::GrainWords; 3627 } else if (r->is_continues_humongous()) { 3628 get_hum_bytes(&used_bytes, &capacity_bytes, 3629 &prev_live_bytes, &next_live_bytes); 3630 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 3631 } 3632 3633 _total_used_bytes += used_bytes; 3634 _total_capacity_bytes += capacity_bytes; 3635 _total_prev_live_bytes += prev_live_bytes; 3636 _total_next_live_bytes += next_live_bytes; 3637 _total_remset_bytes += remset_bytes; 3638 _total_strong_code_roots_bytes += strong_code_roots_bytes; 3639 3640 // Print a line for this particular region. 3641 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3642 G1PPRL_TYPE_FORMAT 3643 G1PPRL_ADDR_BASE_FORMAT 3644 G1PPRL_BYTE_FORMAT 3645 G1PPRL_BYTE_FORMAT 3646 G1PPRL_BYTE_FORMAT 3647 G1PPRL_DOUBLE_FORMAT 3648 G1PPRL_BYTE_FORMAT 3649 G1PPRL_BYTE_FORMAT, 3650 type, p2i(bottom), p2i(end), 3651 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 3652 remset_bytes, strong_code_roots_bytes); 3653 3654 return false; 3655 } 3656 3657 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 3658 // add static memory usages to remembered set sizes 3659 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3660 // Print the footer of the output. 3661 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3662 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3663 " SUMMARY" 3664 G1PPRL_SUM_MB_FORMAT("capacity") 3665 G1PPRL_SUM_MB_PERC_FORMAT("used") 3666 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3667 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3668 G1PPRL_SUM_MB_FORMAT("remset") 3669 G1PPRL_SUM_MB_FORMAT("code-roots"), 3670 bytes_to_mb(_total_capacity_bytes), 3671 bytes_to_mb(_total_used_bytes), 3672 perc(_total_used_bytes, _total_capacity_bytes), 3673 bytes_to_mb(_total_prev_live_bytes), 3674 perc(_total_prev_live_bytes, _total_capacity_bytes), 3675 bytes_to_mb(_total_next_live_bytes), 3676 perc(_total_next_live_bytes, _total_capacity_bytes), 3677 bytes_to_mb(_total_remset_bytes), 3678 bytes_to_mb(_total_strong_code_roots_bytes)); 3679 }