1 /* 2 * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMark.inline.hpp" 30 #include "gc/g1/concurrentMarkThread.inline.hpp" 31 #include "gc/g1/g1CollectedHeap.inline.hpp" 32 #include "gc/g1/g1CollectorPolicy.hpp" 33 #include "gc/g1/g1CollectorState.hpp" 34 #include "gc/g1/g1OopClosures.inline.hpp" 35 #include "gc/g1/g1RemSet.hpp" 36 #include "gc/g1/g1StringDedup.hpp" 37 #include "gc/g1/heapRegion.inline.hpp" 38 #include "gc/g1/heapRegionManager.inline.hpp" 39 #include "gc/g1/heapRegionRemSet.hpp" 40 #include "gc/g1/heapRegionSet.inline.hpp" 41 #include "gc/g1/suspendibleThreadSet.hpp" 42 #include "gc/shared/gcId.hpp" 43 #include "gc/shared/gcTimer.hpp" 44 #include "gc/shared/gcTrace.hpp" 45 #include "gc/shared/gcTraceTime.inline.hpp" 46 #include "gc/shared/genOopClosures.inline.hpp" 47 #include "gc/shared/referencePolicy.hpp" 48 #include "gc/shared/strongRootsScope.hpp" 49 #include "gc/shared/taskqueue.inline.hpp" 50 #include "gc/shared/vmGCOperations.hpp" 51 #include "logging/log.hpp" 52 #include "memory/allocation.hpp" 53 #include "memory/resourceArea.hpp" 54 #include "oops/oop.inline.hpp" 55 #include "runtime/atomic.inline.hpp" 56 #include "runtime/handles.inline.hpp" 57 #include "runtime/java.hpp" 58 #include "runtime/prefetch.inline.hpp" 59 #include "services/memTracker.hpp" 60 61 // Concurrent marking bit map wrapper 62 63 CMBitMapRO::CMBitMapRO(int shifter) : 64 _bm(), 65 _shifter(shifter) { 66 _bmStartWord = 0; 67 _bmWordSize = 0; 68 } 69 70 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 71 const HeapWord* limit) const { 72 // First we must round addr *up* to a possible object boundary. 73 addr = (HeapWord*)align_size_up((intptr_t)addr, 74 HeapWordSize << _shifter); 75 size_t addrOffset = heapWordToOffset(addr); 76 assert(limit != NULL, "limit must not be NULL"); 77 size_t limitOffset = heapWordToOffset(limit); 78 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 79 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 80 assert(nextAddr >= addr, "get_next_one postcondition"); 81 assert(nextAddr == limit || isMarked(nextAddr), 82 "get_next_one postcondition"); 83 return nextAddr; 84 } 85 86 #ifndef PRODUCT 87 bool CMBitMapRO::covers(MemRegion heap_rs) const { 88 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 89 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 90 "size inconsistency"); 91 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 92 _bmWordSize == heap_rs.word_size(); 93 } 94 #endif 95 96 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 97 _bm.print_on_error(st, prefix); 98 } 99 100 size_t CMBitMap::compute_size(size_t heap_size) { 101 return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); 102 } 103 104 size_t CMBitMap::mark_distance() { 105 return MinObjAlignmentInBytes * BitsPerByte; 106 } 107 108 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 109 _bmStartWord = heap.start(); 110 _bmWordSize = heap.word_size(); 111 112 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start()); 113 _bm.set_size(_bmWordSize >> _shifter); 114 115 storage->set_mapping_changed_listener(&_listener); 116 } 117 118 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 119 if (zero_filled) { 120 return; 121 } 122 // We need to clear the bitmap on commit, removing any existing information. 123 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 124 _bm->clearRange(mr); 125 } 126 127 // Closure used for clearing the given mark bitmap. 128 class ClearBitmapHRClosure : public HeapRegionClosure { 129 private: 130 ConcurrentMark* _cm; 131 CMBitMap* _bitmap; 132 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration. 133 public: 134 ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) { 135 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield."); 136 } 137 138 virtual bool doHeapRegion(HeapRegion* r) { 139 size_t const chunk_size_in_words = M / HeapWordSize; 140 141 HeapWord* cur = r->bottom(); 142 HeapWord* const end = r->end(); 143 144 while (cur < end) { 145 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 146 _bitmap->clearRange(mr); 147 148 cur += chunk_size_in_words; 149 150 // Abort iteration if after yielding the marking has been aborted. 151 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) { 152 return true; 153 } 154 // Repeat the asserts from before the start of the closure. We will do them 155 // as asserts here to minimize their overhead on the product. However, we 156 // will have them as guarantees at the beginning / end of the bitmap 157 // clearing to get some checking in the product. 158 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant"); 159 assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 160 } 161 162 return false; 163 } 164 }; 165 166 class ParClearNextMarkBitmapTask : public AbstractGangTask { 167 ClearBitmapHRClosure* _cl; 168 HeapRegionClaimer _hrclaimer; 169 bool _suspendible; // If the task is suspendible, workers must join the STS. 170 171 public: 172 ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) : 173 _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {} 174 175 void work(uint worker_id) { 176 SuspendibleThreadSetJoiner sts_join(_suspendible); 177 G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true); 178 } 179 }; 180 181 void CMBitMap::clearAll() { 182 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 183 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */); 184 uint n_workers = g1h->workers()->active_workers(); 185 ParClearNextMarkBitmapTask task(&cl, n_workers, false); 186 g1h->workers()->run_task(&task); 187 guarantee(cl.complete(), "Must have completed iteration."); 188 return; 189 } 190 191 void CMBitMap::clearRange(MemRegion mr) { 192 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 193 assert(!mr.is_empty(), "unexpected empty region"); 194 // convert address range into offset range 195 _bm.at_put_range(heapWordToOffset(mr.start()), 196 heapWordToOffset(mr.end()), false); 197 } 198 199 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 200 _base(NULL), _cm(cm) 201 {} 202 203 bool CMMarkStack::allocate(size_t capacity) { 204 // allocate a stack of the requisite depth 205 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 206 if (!rs.is_reserved()) { 207 warning("ConcurrentMark MarkStack allocation failure"); 208 return false; 209 } 210 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 211 if (!_virtual_space.initialize(rs, rs.size())) { 212 warning("ConcurrentMark MarkStack backing store failure"); 213 // Release the virtual memory reserved for the marking stack 214 rs.release(); 215 return false; 216 } 217 assert(_virtual_space.committed_size() == rs.size(), 218 "Didn't reserve backing store for all of ConcurrentMark stack?"); 219 _base = (oop*) _virtual_space.low(); 220 setEmpty(); 221 _capacity = (jint) capacity; 222 _saved_index = -1; 223 _should_expand = false; 224 return true; 225 } 226 227 void CMMarkStack::expand() { 228 // Called, during remark, if we've overflown the marking stack during marking. 229 assert(isEmpty(), "stack should been emptied while handling overflow"); 230 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 231 // Clear expansion flag 232 _should_expand = false; 233 if (_capacity == (jint) MarkStackSizeMax) { 234 log_trace(gc)("(benign) Can't expand marking stack capacity, at max size limit"); 235 return; 236 } 237 // Double capacity if possible 238 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 239 // Do not give up existing stack until we have managed to 240 // get the double capacity that we desired. 241 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 242 sizeof(oop))); 243 if (rs.is_reserved()) { 244 // Release the backing store associated with old stack 245 _virtual_space.release(); 246 // Reinitialize virtual space for new stack 247 if (!_virtual_space.initialize(rs, rs.size())) { 248 fatal("Not enough swap for expanded marking stack capacity"); 249 } 250 _base = (oop*)(_virtual_space.low()); 251 _index = 0; 252 _capacity = new_capacity; 253 } else { 254 // Failed to double capacity, continue; 255 log_trace(gc)("(benign) Failed to expand marking stack capacity from " SIZE_FORMAT "K to " SIZE_FORMAT "K", 256 _capacity / K, new_capacity / K); 257 } 258 } 259 260 void CMMarkStack::set_should_expand() { 261 // If we're resetting the marking state because of an 262 // marking stack overflow, record that we should, if 263 // possible, expand the stack. 264 _should_expand = _cm->has_overflown(); 265 } 266 267 CMMarkStack::~CMMarkStack() { 268 if (_base != NULL) { 269 _base = NULL; 270 _virtual_space.release(); 271 } 272 } 273 274 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 275 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 276 jint start = _index; 277 jint next_index = start + n; 278 if (next_index > _capacity) { 279 _overflow = true; 280 return; 281 } 282 // Otherwise. 283 _index = next_index; 284 for (int i = 0; i < n; i++) { 285 int ind = start + i; 286 assert(ind < _capacity, "By overflow test above."); 287 _base[ind] = ptr_arr[i]; 288 } 289 } 290 291 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 292 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 293 jint index = _index; 294 if (index == 0) { 295 *n = 0; 296 return false; 297 } else { 298 int k = MIN2(max, index); 299 jint new_ind = index - k; 300 for (int j = 0; j < k; j++) { 301 ptr_arr[j] = _base[new_ind + j]; 302 } 303 _index = new_ind; 304 *n = k; 305 return true; 306 } 307 } 308 309 void CMMarkStack::note_start_of_gc() { 310 assert(_saved_index == -1, 311 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 312 _saved_index = _index; 313 } 314 315 void CMMarkStack::note_end_of_gc() { 316 // This is intentionally a guarantee, instead of an assert. If we 317 // accidentally add something to the mark stack during GC, it 318 // will be a correctness issue so it's better if we crash. we'll 319 // only check this once per GC anyway, so it won't be a performance 320 // issue in any way. 321 guarantee(_saved_index == _index, 322 "saved index: %d index: %d", _saved_index, _index); 323 _saved_index = -1; 324 } 325 326 CMRootRegions::CMRootRegions() : 327 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 328 _should_abort(false), _next_survivor(NULL) { } 329 330 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 331 _young_list = g1h->young_list(); 332 _cm = cm; 333 } 334 335 void CMRootRegions::prepare_for_scan() { 336 assert(!scan_in_progress(), "pre-condition"); 337 338 // Currently, only survivors can be root regions. 339 assert(_next_survivor == NULL, "pre-condition"); 340 _next_survivor = _young_list->first_survivor_region(); 341 _scan_in_progress = (_next_survivor != NULL); 342 _should_abort = false; 343 } 344 345 HeapRegion* CMRootRegions::claim_next() { 346 if (_should_abort) { 347 // If someone has set the should_abort flag, we return NULL to 348 // force the caller to bail out of their loop. 349 return NULL; 350 } 351 352 // Currently, only survivors can be root regions. 353 HeapRegion* res = _next_survivor; 354 if (res != NULL) { 355 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 356 // Read it again in case it changed while we were waiting for the lock. 357 res = _next_survivor; 358 if (res != NULL) { 359 if (res == _young_list->last_survivor_region()) { 360 // We just claimed the last survivor so store NULL to indicate 361 // that we're done. 362 _next_survivor = NULL; 363 } else { 364 _next_survivor = res->get_next_young_region(); 365 } 366 } else { 367 // Someone else claimed the last survivor while we were trying 368 // to take the lock so nothing else to do. 369 } 370 } 371 assert(res == NULL || res->is_survivor(), "post-condition"); 372 373 return res; 374 } 375 376 void CMRootRegions::scan_finished() { 377 assert(scan_in_progress(), "pre-condition"); 378 379 // Currently, only survivors can be root regions. 380 if (!_should_abort) { 381 assert(_next_survivor == NULL, "we should have claimed all survivors"); 382 } 383 _next_survivor = NULL; 384 385 { 386 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 387 _scan_in_progress = false; 388 RootRegionScan_lock->notify_all(); 389 } 390 } 391 392 bool CMRootRegions::wait_until_scan_finished() { 393 if (!scan_in_progress()) return false; 394 395 { 396 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 397 while (scan_in_progress()) { 398 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 399 } 400 } 401 return true; 402 } 403 404 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 405 return MAX2((n_par_threads + 2) / 4, 1U); 406 } 407 408 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 409 _g1h(g1h), 410 _markBitMap1(), 411 _markBitMap2(), 412 _parallel_marking_threads(0), 413 _max_parallel_marking_threads(0), 414 _sleep_factor(0.0), 415 _marking_task_overhead(1.0), 416 _cleanup_list("Cleanup List"), 417 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 418 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> 419 CardTableModRefBS::card_shift, 420 false /* in_resource_area*/), 421 422 _prevMarkBitMap(&_markBitMap1), 423 _nextMarkBitMap(&_markBitMap2), 424 425 _markStack(this), 426 // _finger set in set_non_marking_state 427 428 _max_worker_id(ParallelGCThreads), 429 // _active_tasks set in set_non_marking_state 430 // _tasks set inside the constructor 431 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 432 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 433 434 _has_overflown(false), 435 _concurrent(false), 436 _has_aborted(false), 437 _restart_for_overflow(false), 438 _concurrent_marking_in_progress(false), 439 _concurrent_phase_started(false), 440 441 // _verbose_level set below 442 443 _init_times(), 444 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 445 _cleanup_times(), 446 _total_counting_time(0.0), 447 _total_rs_scrub_time(0.0), 448 449 _parallel_workers(NULL), 450 451 _count_card_bitmaps(NULL), 452 _count_marked_bytes(NULL), 453 _completed_initialization(false) { 454 455 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 456 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 457 458 // Create & start a ConcurrentMark thread. 459 _cmThread = new ConcurrentMarkThread(this); 460 assert(cmThread() != NULL, "CM Thread should have been created"); 461 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 462 if (_cmThread->osthread() == NULL) { 463 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 464 } 465 466 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 467 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 468 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 469 470 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 471 satb_qs.set_buffer_size(G1SATBBufferSize); 472 473 _root_regions.init(_g1h, this); 474 475 if (ConcGCThreads > ParallelGCThreads) { 476 warning("Can't have more ConcGCThreads (%u) " 477 "than ParallelGCThreads (%u).", 478 ConcGCThreads, ParallelGCThreads); 479 return; 480 } 481 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 482 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 483 // if both are set 484 _sleep_factor = 0.0; 485 _marking_task_overhead = 1.0; 486 } else if (G1MarkingOverheadPercent > 0) { 487 // We will calculate the number of parallel marking threads based 488 // on a target overhead with respect to the soft real-time goal 489 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 490 double overall_cm_overhead = 491 (double) MaxGCPauseMillis * marking_overhead / 492 (double) GCPauseIntervalMillis; 493 double cpu_ratio = 1.0 / (double) os::processor_count(); 494 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 495 double marking_task_overhead = 496 overall_cm_overhead / marking_thread_num * 497 (double) os::processor_count(); 498 double sleep_factor = 499 (1.0 - marking_task_overhead) / marking_task_overhead; 500 501 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 502 _sleep_factor = sleep_factor; 503 _marking_task_overhead = marking_task_overhead; 504 } else { 505 // Calculate the number of parallel marking threads by scaling 506 // the number of parallel GC threads. 507 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 508 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 509 _sleep_factor = 0.0; 510 _marking_task_overhead = 1.0; 511 } 512 513 assert(ConcGCThreads > 0, "Should have been set"); 514 _parallel_marking_threads = ConcGCThreads; 515 _max_parallel_marking_threads = _parallel_marking_threads; 516 517 _parallel_workers = new WorkGang("G1 Marker", 518 _max_parallel_marking_threads, false, true); 519 if (_parallel_workers == NULL) { 520 vm_exit_during_initialization("Failed necessary allocation."); 521 } else { 522 _parallel_workers->initialize_workers(); 523 } 524 525 if (FLAG_IS_DEFAULT(MarkStackSize)) { 526 size_t mark_stack_size = 527 MIN2(MarkStackSizeMax, 528 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 529 // Verify that the calculated value for MarkStackSize is in range. 530 // It would be nice to use the private utility routine from Arguments. 531 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 532 warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 533 "must be between 1 and " SIZE_FORMAT, 534 mark_stack_size, MarkStackSizeMax); 535 return; 536 } 537 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 538 } else { 539 // Verify MarkStackSize is in range. 540 if (FLAG_IS_CMDLINE(MarkStackSize)) { 541 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 542 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 543 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 544 "must be between 1 and " SIZE_FORMAT, 545 MarkStackSize, MarkStackSizeMax); 546 return; 547 } 548 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 549 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 550 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 551 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 552 MarkStackSize, MarkStackSizeMax); 553 return; 554 } 555 } 556 } 557 } 558 559 if (!_markStack.allocate(MarkStackSize)) { 560 warning("Failed to allocate CM marking stack"); 561 return; 562 } 563 564 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 565 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 566 567 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 568 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 569 570 BitMap::idx_t card_bm_size = _card_bm.size(); 571 572 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 573 _active_tasks = _max_worker_id; 574 575 uint max_regions = _g1h->max_regions(); 576 for (uint i = 0; i < _max_worker_id; ++i) { 577 CMTaskQueue* task_queue = new CMTaskQueue(); 578 task_queue->initialize(); 579 _task_queues->register_queue(i, task_queue); 580 581 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 582 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 583 584 _tasks[i] = new CMTask(i, this, 585 _count_marked_bytes[i], 586 &_count_card_bitmaps[i], 587 task_queue, _task_queues); 588 589 _accum_task_vtime[i] = 0.0; 590 } 591 592 // Calculate the card number for the bottom of the heap. Used 593 // in biasing indexes into the accounting card bitmaps. 594 _heap_bottom_card_num = 595 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 596 CardTableModRefBS::card_shift); 597 598 // Clear all the liveness counting data 599 clear_all_count_data(); 600 601 // so that the call below can read a sensible value 602 _heap_start = g1h->reserved_region().start(); 603 set_non_marking_state(); 604 _completed_initialization = true; 605 } 606 607 void ConcurrentMark::reset() { 608 // Starting values for these two. This should be called in a STW 609 // phase. 610 MemRegion reserved = _g1h->g1_reserved(); 611 _heap_start = reserved.start(); 612 _heap_end = reserved.end(); 613 614 // Separated the asserts so that we know which one fires. 615 assert(_heap_start != NULL, "heap bounds should look ok"); 616 assert(_heap_end != NULL, "heap bounds should look ok"); 617 assert(_heap_start < _heap_end, "heap bounds should look ok"); 618 619 // Reset all the marking data structures and any necessary flags 620 reset_marking_state(); 621 622 // We do reset all of them, since different phases will use 623 // different number of active threads. So, it's easiest to have all 624 // of them ready. 625 for (uint i = 0; i < _max_worker_id; ++i) { 626 _tasks[i]->reset(_nextMarkBitMap); 627 } 628 629 // we need this to make sure that the flag is on during the evac 630 // pause with initial mark piggy-backed 631 set_concurrent_marking_in_progress(); 632 } 633 634 635 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 636 _markStack.set_should_expand(); 637 _markStack.setEmpty(); // Also clears the _markStack overflow flag 638 if (clear_overflow) { 639 clear_has_overflown(); 640 } else { 641 assert(has_overflown(), "pre-condition"); 642 } 643 _finger = _heap_start; 644 645 for (uint i = 0; i < _max_worker_id; ++i) { 646 CMTaskQueue* queue = _task_queues->queue(i); 647 queue->set_empty(); 648 } 649 } 650 651 void ConcurrentMark::set_concurrency(uint active_tasks) { 652 assert(active_tasks <= _max_worker_id, "we should not have more"); 653 654 _active_tasks = active_tasks; 655 // Need to update the three data structures below according to the 656 // number of active threads for this phase. 657 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 658 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 659 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 660 } 661 662 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 663 set_concurrency(active_tasks); 664 665 _concurrent = concurrent; 666 // We propagate this to all tasks, not just the active ones. 667 for (uint i = 0; i < _max_worker_id; ++i) 668 _tasks[i]->set_concurrent(concurrent); 669 670 if (concurrent) { 671 set_concurrent_marking_in_progress(); 672 } else { 673 // We currently assume that the concurrent flag has been set to 674 // false before we start remark. At this point we should also be 675 // in a STW phase. 676 assert(!concurrent_marking_in_progress(), "invariant"); 677 assert(out_of_regions(), 678 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 679 p2i(_finger), p2i(_heap_end)); 680 } 681 } 682 683 void ConcurrentMark::set_non_marking_state() { 684 // We set the global marking state to some default values when we're 685 // not doing marking. 686 reset_marking_state(); 687 _active_tasks = 0; 688 clear_concurrent_marking_in_progress(); 689 } 690 691 ConcurrentMark::~ConcurrentMark() { 692 // The ConcurrentMark instance is never freed. 693 ShouldNotReachHere(); 694 } 695 696 void ConcurrentMark::clearNextBitmap() { 697 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 698 699 // Make sure that the concurrent mark thread looks to still be in 700 // the current cycle. 701 guarantee(cmThread()->during_cycle(), "invariant"); 702 703 // We are finishing up the current cycle by clearing the next 704 // marking bitmap and getting it ready for the next cycle. During 705 // this time no other cycle can start. So, let's make sure that this 706 // is the case. 707 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 708 709 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */); 710 ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true); 711 _parallel_workers->run_task(&task); 712 713 // Clear the liveness counting data. If the marking has been aborted, the abort() 714 // call already did that. 715 if (cl.complete()) { 716 clear_all_count_data(); 717 } 718 719 // Repeat the asserts from above. 720 guarantee(cmThread()->during_cycle(), "invariant"); 721 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 722 } 723 724 class CheckBitmapClearHRClosure : public HeapRegionClosure { 725 CMBitMap* _bitmap; 726 bool _error; 727 public: 728 CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) { 729 } 730 731 virtual bool doHeapRegion(HeapRegion* r) { 732 // This closure can be called concurrently to the mutator, so we must make sure 733 // that the result of the getNextMarkedWordAddress() call is compared to the 734 // value passed to it as limit to detect any found bits. 735 // end never changes in G1. 736 HeapWord* end = r->end(); 737 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end; 738 } 739 }; 740 741 bool ConcurrentMark::nextMarkBitmapIsClear() { 742 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 743 _g1h->heap_region_iterate(&cl); 744 return cl.complete(); 745 } 746 747 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 748 public: 749 bool doHeapRegion(HeapRegion* r) { 750 r->note_start_of_marking(); 751 return false; 752 } 753 }; 754 755 void ConcurrentMark::checkpointRootsInitialPre() { 756 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 757 G1CollectorPolicy* g1p = g1h->g1_policy(); 758 759 _has_aborted = false; 760 761 // Initialize marking structures. This has to be done in a STW phase. 762 reset(); 763 764 // For each region note start of marking. 765 NoteStartOfMarkHRClosure startcl; 766 g1h->heap_region_iterate(&startcl); 767 } 768 769 770 void ConcurrentMark::checkpointRootsInitialPost() { 771 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 772 773 // Start Concurrent Marking weak-reference discovery. 774 ReferenceProcessor* rp = g1h->ref_processor_cm(); 775 // enable ("weak") refs discovery 776 rp->enable_discovery(); 777 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 778 779 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 780 // This is the start of the marking cycle, we're expected all 781 // threads to have SATB queues with active set to false. 782 satb_mq_set.set_active_all_threads(true, /* new active value */ 783 false /* expected_active */); 784 785 _root_regions.prepare_for_scan(); 786 787 // update_g1_committed() will be called at the end of an evac pause 788 // when marking is on. So, it's also called at the end of the 789 // initial-mark pause to update the heap end, if the heap expands 790 // during it. No need to call it here. 791 } 792 793 /* 794 * Notice that in the next two methods, we actually leave the STS 795 * during the barrier sync and join it immediately afterwards. If we 796 * do not do this, the following deadlock can occur: one thread could 797 * be in the barrier sync code, waiting for the other thread to also 798 * sync up, whereas another one could be trying to yield, while also 799 * waiting for the other threads to sync up too. 800 * 801 * Note, however, that this code is also used during remark and in 802 * this case we should not attempt to leave / enter the STS, otherwise 803 * we'll either hit an assert (debug / fastdebug) or deadlock 804 * (product). So we should only leave / enter the STS if we are 805 * operating concurrently. 806 * 807 * Because the thread that does the sync barrier has left the STS, it 808 * is possible to be suspended for a Full GC or an evacuation pause 809 * could occur. This is actually safe, since the entering the sync 810 * barrier is one of the last things do_marking_step() does, and it 811 * doesn't manipulate any data structures afterwards. 812 */ 813 814 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 815 bool barrier_aborted; 816 { 817 SuspendibleThreadSetLeaver sts_leave(concurrent()); 818 barrier_aborted = !_first_overflow_barrier_sync.enter(); 819 } 820 821 // at this point everyone should have synced up and not be doing any 822 // more work 823 824 if (barrier_aborted) { 825 // If the barrier aborted we ignore the overflow condition and 826 // just abort the whole marking phase as quickly as possible. 827 return; 828 } 829 830 // If we're executing the concurrent phase of marking, reset the marking 831 // state; otherwise the marking state is reset after reference processing, 832 // during the remark pause. 833 // If we reset here as a result of an overflow during the remark we will 834 // see assertion failures from any subsequent set_concurrency_and_phase() 835 // calls. 836 if (concurrent()) { 837 // let the task associated with with worker 0 do this 838 if (worker_id == 0) { 839 // task 0 is responsible for clearing the global data structures 840 // We should be here because of an overflow. During STW we should 841 // not clear the overflow flag since we rely on it being true when 842 // we exit this method to abort the pause and restart concurrent 843 // marking. 844 reset_marking_state(true /* clear_overflow */); 845 846 log_info(gc)("Concurrent Mark reset for overflow"); 847 } 848 } 849 850 // after this, each task should reset its own data structures then 851 // then go into the second barrier 852 } 853 854 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 855 SuspendibleThreadSetLeaver sts_leave(concurrent()); 856 _second_overflow_barrier_sync.enter(); 857 858 // at this point everything should be re-initialized and ready to go 859 } 860 861 class CMConcurrentMarkingTask: public AbstractGangTask { 862 private: 863 ConcurrentMark* _cm; 864 ConcurrentMarkThread* _cmt; 865 866 public: 867 void work(uint worker_id) { 868 assert(Thread::current()->is_ConcurrentGC_thread(), 869 "this should only be done by a conc GC thread"); 870 ResourceMark rm; 871 872 double start_vtime = os::elapsedVTime(); 873 874 { 875 SuspendibleThreadSetJoiner sts_join; 876 877 assert(worker_id < _cm->active_tasks(), "invariant"); 878 CMTask* the_task = _cm->task(worker_id); 879 the_task->record_start_time(); 880 if (!_cm->has_aborted()) { 881 do { 882 double start_vtime_sec = os::elapsedVTime(); 883 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 884 885 the_task->do_marking_step(mark_step_duration_ms, 886 true /* do_termination */, 887 false /* is_serial*/); 888 889 double end_vtime_sec = os::elapsedVTime(); 890 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 891 _cm->clear_has_overflown(); 892 893 _cm->do_yield_check(worker_id); 894 895 jlong sleep_time_ms; 896 if (!_cm->has_aborted() && the_task->has_aborted()) { 897 sleep_time_ms = 898 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 899 { 900 SuspendibleThreadSetLeaver sts_leave; 901 os::sleep(Thread::current(), sleep_time_ms, false); 902 } 903 } 904 } while (!_cm->has_aborted() && the_task->has_aborted()); 905 } 906 the_task->record_end_time(); 907 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 908 } 909 910 double end_vtime = os::elapsedVTime(); 911 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 912 } 913 914 CMConcurrentMarkingTask(ConcurrentMark* cm, 915 ConcurrentMarkThread* cmt) : 916 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 917 918 ~CMConcurrentMarkingTask() { } 919 }; 920 921 // Calculates the number of active workers for a concurrent 922 // phase. 923 uint ConcurrentMark::calc_parallel_marking_threads() { 924 uint n_conc_workers = 0; 925 if (!UseDynamicNumberOfGCThreads || 926 (!FLAG_IS_DEFAULT(ConcGCThreads) && 927 !ForceDynamicNumberOfGCThreads)) { 928 n_conc_workers = max_parallel_marking_threads(); 929 } else { 930 n_conc_workers = 931 AdaptiveSizePolicy::calc_default_active_workers( 932 max_parallel_marking_threads(), 933 1, /* Minimum workers */ 934 parallel_marking_threads(), 935 Threads::number_of_non_daemon_threads()); 936 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 937 // that scaling has already gone into "_max_parallel_marking_threads". 938 } 939 assert(n_conc_workers > 0, "Always need at least 1"); 940 return n_conc_workers; 941 } 942 943 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 944 // Currently, only survivors can be root regions. 945 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 946 G1RootRegionScanClosure cl(_g1h, this, worker_id); 947 948 const uintx interval = PrefetchScanIntervalInBytes; 949 HeapWord* curr = hr->bottom(); 950 const HeapWord* end = hr->top(); 951 while (curr < end) { 952 Prefetch::read(curr, interval); 953 oop obj = oop(curr); 954 int size = obj->oop_iterate_size(&cl); 955 assert(size == obj->size(), "sanity"); 956 curr += size; 957 } 958 } 959 960 class CMRootRegionScanTask : public AbstractGangTask { 961 private: 962 ConcurrentMark* _cm; 963 964 public: 965 CMRootRegionScanTask(ConcurrentMark* cm) : 966 AbstractGangTask("Root Region Scan"), _cm(cm) { } 967 968 void work(uint worker_id) { 969 assert(Thread::current()->is_ConcurrentGC_thread(), 970 "this should only be done by a conc GC thread"); 971 972 CMRootRegions* root_regions = _cm->root_regions(); 973 HeapRegion* hr = root_regions->claim_next(); 974 while (hr != NULL) { 975 _cm->scanRootRegion(hr, worker_id); 976 hr = root_regions->claim_next(); 977 } 978 } 979 }; 980 981 void ConcurrentMark::scanRootRegions() { 982 // Start of concurrent marking. 983 ClassLoaderDataGraph::clear_claimed_marks(); 984 985 // scan_in_progress() will have been set to true only if there was 986 // at least one root region to scan. So, if it's false, we 987 // should not attempt to do any further work. 988 if (root_regions()->scan_in_progress()) { 989 GCTraceConcTime(Info, gc) tt("Concurrent Root Region Scan"); 990 991 _parallel_marking_threads = calc_parallel_marking_threads(); 992 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 993 "Maximum number of marking threads exceeded"); 994 uint active_workers = MAX2(1U, parallel_marking_threads()); 995 996 CMRootRegionScanTask task(this); 997 _parallel_workers->set_active_workers(active_workers); 998 _parallel_workers->run_task(&task); 999 1000 // It's possible that has_aborted() is true here without actually 1001 // aborting the survivor scan earlier. This is OK as it's 1002 // mainly used for sanity checking. 1003 root_regions()->scan_finished(); 1004 } 1005 } 1006 1007 void ConcurrentMark::register_concurrent_phase_start(const char* title) { 1008 assert(!_concurrent_phase_started, "Sanity"); 1009 _concurrent_phase_started = true; 1010 _g1h->gc_timer_cm()->register_gc_concurrent_start(title); 1011 } 1012 1013 void ConcurrentMark::register_concurrent_phase_end() { 1014 if (_concurrent_phase_started) { 1015 _concurrent_phase_started = false; 1016 _g1h->gc_timer_cm()->register_gc_concurrent_end(); 1017 } 1018 } 1019 1020 void ConcurrentMark::markFromRoots() { 1021 // we might be tempted to assert that: 1022 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1023 // "inconsistent argument?"); 1024 // However that wouldn't be right, because it's possible that 1025 // a safepoint is indeed in progress as a younger generation 1026 // stop-the-world GC happens even as we mark in this generation. 1027 1028 _restart_for_overflow = false; 1029 1030 // _g1h has _n_par_threads 1031 _parallel_marking_threads = calc_parallel_marking_threads(); 1032 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1033 "Maximum number of marking threads exceeded"); 1034 1035 uint active_workers = MAX2(1U, parallel_marking_threads()); 1036 assert(active_workers > 0, "Should have been set"); 1037 1038 // Parallel task terminator is set in "set_concurrency_and_phase()" 1039 set_concurrency_and_phase(active_workers, true /* concurrent */); 1040 1041 CMConcurrentMarkingTask markingTask(this, cmThread()); 1042 _parallel_workers->set_active_workers(active_workers); 1043 _parallel_workers->run_task(&markingTask); 1044 1045 print_stats(); 1046 } 1047 1048 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1049 // world is stopped at this checkpoint 1050 assert(SafepointSynchronize::is_at_safepoint(), 1051 "world should be stopped"); 1052 1053 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1054 1055 // If a full collection has happened, we shouldn't do this. 1056 if (has_aborted()) { 1057 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1058 return; 1059 } 1060 1061 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1062 1063 if (VerifyDuringGC) { 1064 HandleMark hm; // handle scope 1065 g1h->prepare_for_verify(); 1066 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1067 } 1068 g1h->check_bitmaps("Remark Start"); 1069 1070 G1CollectorPolicy* g1p = g1h->g1_policy(); 1071 g1p->record_concurrent_mark_remark_start(); 1072 1073 double start = os::elapsedTime(); 1074 1075 checkpointRootsFinalWork(); 1076 1077 double mark_work_end = os::elapsedTime(); 1078 1079 weakRefsWork(clear_all_soft_refs); 1080 1081 if (has_overflown()) { 1082 // Oops. We overflowed. Restart concurrent marking. 1083 _restart_for_overflow = true; 1084 log_develop_trace(gc)("Remark led to restart for overflow."); 1085 1086 // Verify the heap w.r.t. the previous marking bitmap. 1087 if (VerifyDuringGC) { 1088 HandleMark hm; // handle scope 1089 g1h->prepare_for_verify(); 1090 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)"); 1091 } 1092 1093 // Clear the marking state because we will be restarting 1094 // marking due to overflowing the global mark stack. 1095 reset_marking_state(); 1096 } else { 1097 { 1098 GCTraceTime(Debug, gc) trace("GC Aggregate Data", g1h->gc_timer_cm()); 1099 1100 // Aggregate the per-task counting data that we have accumulated 1101 // while marking. 1102 aggregate_count_data(); 1103 } 1104 1105 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1106 // We're done with marking. 1107 // This is the end of the marking cycle, we're expected all 1108 // threads to have SATB queues with active set to true. 1109 satb_mq_set.set_active_all_threads(false, /* new active value */ 1110 true /* expected_active */); 1111 1112 if (VerifyDuringGC) { 1113 HandleMark hm; // handle scope 1114 g1h->prepare_for_verify(); 1115 Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)"); 1116 } 1117 g1h->check_bitmaps("Remark End"); 1118 assert(!restart_for_overflow(), "sanity"); 1119 // Completely reset the marking state since marking completed 1120 set_non_marking_state(); 1121 } 1122 1123 // Expand the marking stack, if we have to and if we can. 1124 if (_markStack.should_expand()) { 1125 _markStack.expand(); 1126 } 1127 1128 // Statistics 1129 double now = os::elapsedTime(); 1130 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1131 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1132 _remark_times.add((now - start) * 1000.0); 1133 1134 g1p->record_concurrent_mark_remark_end(); 1135 1136 G1CMIsAliveClosure is_alive(g1h); 1137 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1138 } 1139 1140 // Base class of the closures that finalize and verify the 1141 // liveness counting data. 1142 class CMCountDataClosureBase: public HeapRegionClosure { 1143 protected: 1144 G1CollectedHeap* _g1h; 1145 ConcurrentMark* _cm; 1146 CardTableModRefBS* _ct_bs; 1147 1148 BitMap* _region_bm; 1149 BitMap* _card_bm; 1150 1151 // Takes a region that's not empty (i.e., it has at least one 1152 // live object in it and sets its corresponding bit on the region 1153 // bitmap to 1. 1154 void set_bit_for_region(HeapRegion* hr) { 1155 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1156 _region_bm->par_at_put(index, true); 1157 } 1158 1159 public: 1160 CMCountDataClosureBase(G1CollectedHeap* g1h, 1161 BitMap* region_bm, BitMap* card_bm): 1162 _g1h(g1h), _cm(g1h->concurrent_mark()), 1163 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 1164 _region_bm(region_bm), _card_bm(card_bm) { } 1165 }; 1166 1167 // Closure that calculates the # live objects per region. Used 1168 // for verification purposes during the cleanup pause. 1169 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1170 CMBitMapRO* _bm; 1171 size_t _region_marked_bytes; 1172 1173 public: 1174 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1175 BitMap* region_bm, BitMap* card_bm) : 1176 CMCountDataClosureBase(g1h, region_bm, card_bm), 1177 _bm(bm), _region_marked_bytes(0) { } 1178 1179 bool doHeapRegion(HeapRegion* hr) { 1180 HeapWord* ntams = hr->next_top_at_mark_start(); 1181 HeapWord* start = hr->bottom(); 1182 1183 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1184 "Preconditions not met - " 1185 "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT, 1186 p2i(start), p2i(ntams), p2i(hr->end())); 1187 1188 // Find the first marked object at or after "start". 1189 start = _bm->getNextMarkedWordAddress(start, ntams); 1190 1191 size_t marked_bytes = 0; 1192 1193 while (start < ntams) { 1194 oop obj = oop(start); 1195 int obj_sz = obj->size(); 1196 HeapWord* obj_end = start + obj_sz; 1197 1198 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1199 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1200 1201 // Note: if we're looking at the last region in heap - obj_end 1202 // could be actually just beyond the end of the heap; end_idx 1203 // will then correspond to a (non-existent) card that is also 1204 // just beyond the heap. 1205 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1206 // end of object is not card aligned - increment to cover 1207 // all the cards spanned by the object 1208 end_idx += 1; 1209 } 1210 1211 // Set the bits in the card BM for the cards spanned by this object. 1212 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1213 1214 // Add the size of this object to the number of marked bytes. 1215 marked_bytes += (size_t)obj_sz * HeapWordSize; 1216 1217 // This will happen if we are handling a humongous object that spans 1218 // several heap regions. 1219 if (obj_end > hr->end()) { 1220 break; 1221 } 1222 // Find the next marked object after this one. 1223 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1224 } 1225 1226 // Mark the allocated-since-marking portion... 1227 HeapWord* top = hr->top(); 1228 if (ntams < top) { 1229 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1230 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1231 1232 // Note: if we're looking at the last region in heap - top 1233 // could be actually just beyond the end of the heap; end_idx 1234 // will then correspond to a (non-existent) card that is also 1235 // just beyond the heap. 1236 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1237 // end of object is not card aligned - increment to cover 1238 // all the cards spanned by the object 1239 end_idx += 1; 1240 } 1241 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1242 1243 // This definitely means the region has live objects. 1244 set_bit_for_region(hr); 1245 } 1246 1247 // Update the live region bitmap. 1248 if (marked_bytes > 0) { 1249 set_bit_for_region(hr); 1250 } 1251 1252 // Set the marked bytes for the current region so that 1253 // it can be queried by a calling verification routine 1254 _region_marked_bytes = marked_bytes; 1255 1256 return false; 1257 } 1258 1259 size_t region_marked_bytes() const { return _region_marked_bytes; } 1260 }; 1261 1262 // Heap region closure used for verifying the counting data 1263 // that was accumulated concurrently and aggregated during 1264 // the remark pause. This closure is applied to the heap 1265 // regions during the STW cleanup pause. 1266 1267 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1268 G1CollectedHeap* _g1h; 1269 ConcurrentMark* _cm; 1270 CalcLiveObjectsClosure _calc_cl; 1271 BitMap* _region_bm; // Region BM to be verified 1272 BitMap* _card_bm; // Card BM to be verified 1273 1274 BitMap* _exp_region_bm; // Expected Region BM values 1275 BitMap* _exp_card_bm; // Expected card BM values 1276 1277 int _failures; 1278 1279 public: 1280 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1281 BitMap* region_bm, 1282 BitMap* card_bm, 1283 BitMap* exp_region_bm, 1284 BitMap* exp_card_bm) : 1285 _g1h(g1h), _cm(g1h->concurrent_mark()), 1286 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1287 _region_bm(region_bm), _card_bm(card_bm), 1288 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1289 _failures(0) { } 1290 1291 int failures() const { return _failures; } 1292 1293 bool doHeapRegion(HeapRegion* hr) { 1294 int failures = 0; 1295 1296 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1297 // this region and set the corresponding bits in the expected region 1298 // and card bitmaps. 1299 bool res = _calc_cl.doHeapRegion(hr); 1300 assert(res == false, "should be continuing"); 1301 1302 // Verify the marked bytes for this region. 1303 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1304 size_t act_marked_bytes = hr->next_marked_bytes(); 1305 1306 if (exp_marked_bytes > act_marked_bytes) { 1307 if (hr->is_starts_humongous()) { 1308 // For start_humongous regions, the size of the whole object will be 1309 // in exp_marked_bytes. 1310 HeapRegion* region = hr; 1311 int num_regions; 1312 for (num_regions = 0; region != NULL; num_regions++) { 1313 region = _g1h->next_region_in_humongous(region); 1314 } 1315 if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) { 1316 failures += 1; 1317 } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) { 1318 failures += 1; 1319 } 1320 } else { 1321 // We're not OK if expected marked bytes > actual marked bytes. It means 1322 // we have missed accounting some objects during the actual marking. 1323 failures += 1; 1324 } 1325 } 1326 1327 // Verify the bit, for this region, in the actual and expected 1328 // (which was just calculated) region bit maps. 1329 // We're not OK if the bit in the calculated expected region 1330 // bitmap is set and the bit in the actual region bitmap is not. 1331 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1332 1333 bool expected = _exp_region_bm->at(index); 1334 bool actual = _region_bm->at(index); 1335 if (expected && !actual) { 1336 failures += 1; 1337 } 1338 1339 // Verify that the card bit maps for the cards spanned by the current 1340 // region match. We have an error if we have a set bit in the expected 1341 // bit map and the corresponding bit in the actual bitmap is not set. 1342 1343 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1344 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1345 1346 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1347 expected = _exp_card_bm->at(i); 1348 actual = _card_bm->at(i); 1349 1350 if (expected && !actual) { 1351 failures += 1; 1352 } 1353 } 1354 1355 _failures += failures; 1356 1357 // We could stop iteration over the heap when we 1358 // find the first violating region by returning true. 1359 return false; 1360 } 1361 }; 1362 1363 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1364 protected: 1365 G1CollectedHeap* _g1h; 1366 ConcurrentMark* _cm; 1367 BitMap* _actual_region_bm; 1368 BitMap* _actual_card_bm; 1369 1370 uint _n_workers; 1371 1372 BitMap* _expected_region_bm; 1373 BitMap* _expected_card_bm; 1374 1375 int _failures; 1376 1377 HeapRegionClaimer _hrclaimer; 1378 1379 public: 1380 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1381 BitMap* region_bm, BitMap* card_bm, 1382 BitMap* expected_region_bm, BitMap* expected_card_bm) 1383 : AbstractGangTask("G1 verify final counting"), 1384 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1385 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1386 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1387 _failures(0), 1388 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1389 assert(VerifyDuringGC, "don't call this otherwise"); 1390 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1391 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1392 } 1393 1394 void work(uint worker_id) { 1395 assert(worker_id < _n_workers, "invariant"); 1396 1397 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1398 _actual_region_bm, _actual_card_bm, 1399 _expected_region_bm, 1400 _expected_card_bm); 1401 1402 _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer); 1403 1404 Atomic::add(verify_cl.failures(), &_failures); 1405 } 1406 1407 int failures() const { return _failures; } 1408 }; 1409 1410 // Closure that finalizes the liveness counting data. 1411 // Used during the cleanup pause. 1412 // Sets the bits corresponding to the interval [NTAMS, top] 1413 // (which contains the implicitly live objects) in the 1414 // card liveness bitmap. Also sets the bit for each region, 1415 // containing live data, in the region liveness bitmap. 1416 1417 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1418 public: 1419 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1420 BitMap* region_bm, 1421 BitMap* card_bm) : 1422 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1423 1424 bool doHeapRegion(HeapRegion* hr) { 1425 HeapWord* ntams = hr->next_top_at_mark_start(); 1426 HeapWord* top = hr->top(); 1427 1428 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1429 1430 // Mark the allocated-since-marking portion... 1431 if (ntams < top) { 1432 // This definitely means the region has live objects. 1433 set_bit_for_region(hr); 1434 1435 // Now set the bits in the card bitmap for [ntams, top) 1436 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1437 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1438 1439 // Note: if we're looking at the last region in heap - top 1440 // could be actually just beyond the end of the heap; end_idx 1441 // will then correspond to a (non-existent) card that is also 1442 // just beyond the heap. 1443 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1444 // end of object is not card aligned - increment to cover 1445 // all the cards spanned by the object 1446 end_idx += 1; 1447 } 1448 1449 assert(end_idx <= _card_bm->size(), 1450 "oob: end_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1451 end_idx, _card_bm->size()); 1452 assert(start_idx < _card_bm->size(), 1453 "oob: start_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1454 start_idx, _card_bm->size()); 1455 1456 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1457 } 1458 1459 // Set the bit for the region if it contains live data 1460 if (hr->next_marked_bytes() > 0) { 1461 set_bit_for_region(hr); 1462 } 1463 1464 return false; 1465 } 1466 }; 1467 1468 class G1ParFinalCountTask: public AbstractGangTask { 1469 protected: 1470 G1CollectedHeap* _g1h; 1471 ConcurrentMark* _cm; 1472 BitMap* _actual_region_bm; 1473 BitMap* _actual_card_bm; 1474 1475 uint _n_workers; 1476 HeapRegionClaimer _hrclaimer; 1477 1478 public: 1479 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1480 : AbstractGangTask("G1 final counting"), 1481 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1482 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1483 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1484 } 1485 1486 void work(uint worker_id) { 1487 assert(worker_id < _n_workers, "invariant"); 1488 1489 FinalCountDataUpdateClosure final_update_cl(_g1h, 1490 _actual_region_bm, 1491 _actual_card_bm); 1492 1493 _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer); 1494 } 1495 }; 1496 1497 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1498 G1CollectedHeap* _g1; 1499 size_t _freed_bytes; 1500 FreeRegionList* _local_cleanup_list; 1501 uint _old_regions_removed; 1502 uint _humongous_regions_removed; 1503 HRRSCleanupTask* _hrrs_cleanup_task; 1504 1505 public: 1506 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1507 FreeRegionList* local_cleanup_list, 1508 HRRSCleanupTask* hrrs_cleanup_task) : 1509 _g1(g1), 1510 _freed_bytes(0), 1511 _local_cleanup_list(local_cleanup_list), 1512 _old_regions_removed(0), 1513 _humongous_regions_removed(0), 1514 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1515 1516 size_t freed_bytes() { return _freed_bytes; } 1517 const uint old_regions_removed() { return _old_regions_removed; } 1518 const uint humongous_regions_removed() { return _humongous_regions_removed; } 1519 1520 bool doHeapRegion(HeapRegion *hr) { 1521 if (hr->is_archive()) { 1522 return false; 1523 } 1524 // We use a claim value of zero here because all regions 1525 // were claimed with value 1 in the FinalCount task. 1526 _g1->reset_gc_time_stamps(hr); 1527 hr->note_end_of_marking(); 1528 1529 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1530 _freed_bytes += hr->used(); 1531 hr->set_containing_set(NULL); 1532 if (hr->is_humongous()) { 1533 _humongous_regions_removed++; 1534 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1535 } else { 1536 _old_regions_removed++; 1537 _g1->free_region(hr, _local_cleanup_list, true); 1538 } 1539 } else { 1540 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1541 } 1542 1543 return false; 1544 } 1545 }; 1546 1547 class G1ParNoteEndTask: public AbstractGangTask { 1548 friend class G1NoteEndOfConcMarkClosure; 1549 1550 protected: 1551 G1CollectedHeap* _g1h; 1552 FreeRegionList* _cleanup_list; 1553 HeapRegionClaimer _hrclaimer; 1554 1555 public: 1556 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1557 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1558 } 1559 1560 void work(uint worker_id) { 1561 FreeRegionList local_cleanup_list("Local Cleanup List"); 1562 HRRSCleanupTask hrrs_cleanup_task; 1563 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1564 &hrrs_cleanup_task); 1565 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1566 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1567 1568 // Now update the lists 1569 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1570 { 1571 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1572 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1573 1574 // If we iterate over the global cleanup list at the end of 1575 // cleanup to do this printing we will not guarantee to only 1576 // generate output for the newly-reclaimed regions (the list 1577 // might not be empty at the beginning of cleanup; we might 1578 // still be working on its previous contents). So we do the 1579 // printing here, before we append the new regions to the global 1580 // cleanup list. 1581 1582 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1583 if (hr_printer->is_active()) { 1584 FreeRegionListIterator iter(&local_cleanup_list); 1585 while (iter.more_available()) { 1586 HeapRegion* hr = iter.get_next(); 1587 hr_printer->cleanup(hr); 1588 } 1589 } 1590 1591 _cleanup_list->add_ordered(&local_cleanup_list); 1592 assert(local_cleanup_list.is_empty(), "post-condition"); 1593 1594 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1595 } 1596 } 1597 }; 1598 1599 class G1ParScrubRemSetTask: public AbstractGangTask { 1600 protected: 1601 G1RemSet* _g1rs; 1602 BitMap* _region_bm; 1603 BitMap* _card_bm; 1604 HeapRegionClaimer _hrclaimer; 1605 1606 public: 1607 G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) : 1608 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) { 1609 } 1610 1611 void work(uint worker_id) { 1612 _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer); 1613 } 1614 1615 }; 1616 1617 void ConcurrentMark::cleanup() { 1618 // world is stopped at this checkpoint 1619 assert(SafepointSynchronize::is_at_safepoint(), 1620 "world should be stopped"); 1621 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1622 1623 // If a full collection has happened, we shouldn't do this. 1624 if (has_aborted()) { 1625 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1626 return; 1627 } 1628 1629 g1h->verify_region_sets_optional(); 1630 1631 if (VerifyDuringGC) { 1632 HandleMark hm; // handle scope 1633 g1h->prepare_for_verify(); 1634 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1635 } 1636 g1h->check_bitmaps("Cleanup Start"); 1637 1638 G1CollectorPolicy* g1p = g1h->g1_policy(); 1639 g1p->record_concurrent_mark_cleanup_start(); 1640 1641 double start = os::elapsedTime(); 1642 1643 HeapRegionRemSet::reset_for_cleanup_tasks(); 1644 1645 // Do counting once more with the world stopped for good measure. 1646 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1647 1648 g1h->workers()->run_task(&g1_par_count_task); 1649 1650 if (VerifyDuringGC) { 1651 // Verify that the counting data accumulated during marking matches 1652 // that calculated by walking the marking bitmap. 1653 1654 // Bitmaps to hold expected values 1655 BitMap expected_region_bm(_region_bm.size(), true); 1656 BitMap expected_card_bm(_card_bm.size(), true); 1657 1658 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1659 &_region_bm, 1660 &_card_bm, 1661 &expected_region_bm, 1662 &expected_card_bm); 1663 1664 g1h->workers()->run_task(&g1_par_verify_task); 1665 1666 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1667 } 1668 1669 size_t start_used_bytes = g1h->used(); 1670 g1h->collector_state()->set_mark_in_progress(false); 1671 1672 double count_end = os::elapsedTime(); 1673 double this_final_counting_time = (count_end - start); 1674 _total_counting_time += this_final_counting_time; 1675 1676 if (log_is_enabled(Trace, gc, liveness)) { 1677 G1PrintRegionLivenessInfoClosure cl("Post-Marking"); 1678 _g1h->heap_region_iterate(&cl); 1679 } 1680 1681 // Install newly created mark bitMap as "prev". 1682 swapMarkBitMaps(); 1683 1684 g1h->reset_gc_time_stamp(); 1685 1686 uint n_workers = _g1h->workers()->active_workers(); 1687 1688 // Note end of marking in all heap regions. 1689 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1690 g1h->workers()->run_task(&g1_par_note_end_task); 1691 g1h->check_gc_time_stamps(); 1692 1693 if (!cleanup_list_is_empty()) { 1694 // The cleanup list is not empty, so we'll have to process it 1695 // concurrently. Notify anyone else that might be wanting free 1696 // regions that there will be more free regions coming soon. 1697 g1h->set_free_regions_coming(); 1698 } 1699 1700 // call below, since it affects the metric by which we sort the heap 1701 // regions. 1702 if (G1ScrubRemSets) { 1703 double rs_scrub_start = os::elapsedTime(); 1704 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers); 1705 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1706 1707 double rs_scrub_end = os::elapsedTime(); 1708 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1709 _total_rs_scrub_time += this_rs_scrub_time; 1710 } 1711 1712 // this will also free any regions totally full of garbage objects, 1713 // and sort the regions. 1714 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1715 1716 // Statistics. 1717 double end = os::elapsedTime(); 1718 _cleanup_times.add((end - start) * 1000.0); 1719 1720 // Clean up will have freed any regions completely full of garbage. 1721 // Update the soft reference policy with the new heap occupancy. 1722 Universe::update_heap_info_at_gc(); 1723 1724 if (VerifyDuringGC) { 1725 HandleMark hm; // handle scope 1726 g1h->prepare_for_verify(); 1727 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)"); 1728 } 1729 1730 g1h->check_bitmaps("Cleanup End"); 1731 1732 g1h->verify_region_sets_optional(); 1733 1734 // We need to make this be a "collection" so any collection pause that 1735 // races with it goes around and waits for completeCleanup to finish. 1736 g1h->increment_total_collections(); 1737 1738 // Clean out dead classes and update Metaspace sizes. 1739 if (ClassUnloadingWithConcurrentMark) { 1740 ClassLoaderDataGraph::purge(); 1741 } 1742 MetaspaceGC::compute_new_size(); 1743 1744 // We reclaimed old regions so we should calculate the sizes to make 1745 // sure we update the old gen/space data. 1746 g1h->g1mm()->update_sizes(); 1747 g1h->allocation_context_stats().update_after_mark(); 1748 1749 g1h->trace_heap_after_concurrent_cycle(); 1750 } 1751 1752 void ConcurrentMark::completeCleanup() { 1753 if (has_aborted()) return; 1754 1755 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1756 1757 _cleanup_list.verify_optional(); 1758 FreeRegionList tmp_free_list("Tmp Free List"); 1759 1760 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1761 "cleanup list has %u entries", 1762 _cleanup_list.length()); 1763 1764 // No one else should be accessing the _cleanup_list at this point, 1765 // so it is not necessary to take any locks 1766 while (!_cleanup_list.is_empty()) { 1767 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 1768 assert(hr != NULL, "Got NULL from a non-empty list"); 1769 hr->par_clear(); 1770 tmp_free_list.add_ordered(hr); 1771 1772 // Instead of adding one region at a time to the secondary_free_list, 1773 // we accumulate them in the local list and move them a few at a 1774 // time. This also cuts down on the number of notify_all() calls 1775 // we do during this process. We'll also append the local list when 1776 // _cleanup_list is empty (which means we just removed the last 1777 // region from the _cleanup_list). 1778 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1779 _cleanup_list.is_empty()) { 1780 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1781 "appending %u entries to the secondary_free_list, " 1782 "cleanup list still has %u entries", 1783 tmp_free_list.length(), 1784 _cleanup_list.length()); 1785 1786 { 1787 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1788 g1h->secondary_free_list_add(&tmp_free_list); 1789 SecondaryFreeList_lock->notify_all(); 1790 } 1791 #ifndef PRODUCT 1792 if (G1StressConcRegionFreeing) { 1793 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1794 os::sleep(Thread::current(), (jlong) 1, false); 1795 } 1796 } 1797 #endif 1798 } 1799 } 1800 assert(tmp_free_list.is_empty(), "post-condition"); 1801 } 1802 1803 // Supporting Object and Oop closures for reference discovery 1804 // and processing in during marking 1805 1806 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1807 HeapWord* addr = (HeapWord*)obj; 1808 return addr != NULL && 1809 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1810 } 1811 1812 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1813 // Uses the CMTask associated with a worker thread (for serial reference 1814 // processing the CMTask for worker 0 is used) to preserve (mark) and 1815 // trace referent objects. 1816 // 1817 // Using the CMTask and embedded local queues avoids having the worker 1818 // threads operating on the global mark stack. This reduces the risk 1819 // of overflowing the stack - which we would rather avoid at this late 1820 // state. Also using the tasks' local queues removes the potential 1821 // of the workers interfering with each other that could occur if 1822 // operating on the global stack. 1823 1824 class G1CMKeepAliveAndDrainClosure: public OopClosure { 1825 ConcurrentMark* _cm; 1826 CMTask* _task; 1827 int _ref_counter_limit; 1828 int _ref_counter; 1829 bool _is_serial; 1830 public: 1831 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 1832 _cm(cm), _task(task), _is_serial(is_serial), 1833 _ref_counter_limit(G1RefProcDrainInterval) { 1834 assert(_ref_counter_limit > 0, "sanity"); 1835 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1836 _ref_counter = _ref_counter_limit; 1837 } 1838 1839 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1840 virtual void do_oop( oop* p) { do_oop_work(p); } 1841 1842 template <class T> void do_oop_work(T* p) { 1843 if (!_cm->has_overflown()) { 1844 oop obj = oopDesc::load_decode_heap_oop(p); 1845 _task->deal_with_reference(obj); 1846 _ref_counter--; 1847 1848 if (_ref_counter == 0) { 1849 // We have dealt with _ref_counter_limit references, pushing them 1850 // and objects reachable from them on to the local stack (and 1851 // possibly the global stack). Call CMTask::do_marking_step() to 1852 // process these entries. 1853 // 1854 // We call CMTask::do_marking_step() in a loop, which we'll exit if 1855 // there's nothing more to do (i.e. we're done with the entries that 1856 // were pushed as a result of the CMTask::deal_with_reference() calls 1857 // above) or we overflow. 1858 // 1859 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 1860 // flag while there may still be some work to do. (See the comment at 1861 // the beginning of CMTask::do_marking_step() for those conditions - 1862 // one of which is reaching the specified time target.) It is only 1863 // when CMTask::do_marking_step() returns without setting the 1864 // has_aborted() flag that the marking step has completed. 1865 do { 1866 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1867 _task->do_marking_step(mark_step_duration_ms, 1868 false /* do_termination */, 1869 _is_serial); 1870 } while (_task->has_aborted() && !_cm->has_overflown()); 1871 _ref_counter = _ref_counter_limit; 1872 } 1873 } 1874 } 1875 }; 1876 1877 // 'Drain' oop closure used by both serial and parallel reference processing. 1878 // Uses the CMTask associated with a given worker thread (for serial 1879 // reference processing the CMtask for worker 0 is used). Calls the 1880 // do_marking_step routine, with an unbelievably large timeout value, 1881 // to drain the marking data structures of the remaining entries 1882 // added by the 'keep alive' oop closure above. 1883 1884 class G1CMDrainMarkingStackClosure: public VoidClosure { 1885 ConcurrentMark* _cm; 1886 CMTask* _task; 1887 bool _is_serial; 1888 public: 1889 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 1890 _cm(cm), _task(task), _is_serial(is_serial) { 1891 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1892 } 1893 1894 void do_void() { 1895 do { 1896 // We call CMTask::do_marking_step() to completely drain the local 1897 // and global marking stacks of entries pushed by the 'keep alive' 1898 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 1899 // 1900 // CMTask::do_marking_step() is called in a loop, which we'll exit 1901 // if there's nothing more to do (i.e. we've completely drained the 1902 // entries that were pushed as a a result of applying the 'keep alive' 1903 // closure to the entries on the discovered ref lists) or we overflow 1904 // the global marking stack. 1905 // 1906 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 1907 // flag while there may still be some work to do. (See the comment at 1908 // the beginning of CMTask::do_marking_step() for those conditions - 1909 // one of which is reaching the specified time target.) It is only 1910 // when CMTask::do_marking_step() returns without setting the 1911 // has_aborted() flag that the marking step has completed. 1912 1913 _task->do_marking_step(1000000000.0 /* something very large */, 1914 true /* do_termination */, 1915 _is_serial); 1916 } while (_task->has_aborted() && !_cm->has_overflown()); 1917 } 1918 }; 1919 1920 // Implementation of AbstractRefProcTaskExecutor for parallel 1921 // reference processing at the end of G1 concurrent marking 1922 1923 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 1924 private: 1925 G1CollectedHeap* _g1h; 1926 ConcurrentMark* _cm; 1927 WorkGang* _workers; 1928 uint _active_workers; 1929 1930 public: 1931 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 1932 ConcurrentMark* cm, 1933 WorkGang* workers, 1934 uint n_workers) : 1935 _g1h(g1h), _cm(cm), 1936 _workers(workers), _active_workers(n_workers) { } 1937 1938 // Executes the given task using concurrent marking worker threads. 1939 virtual void execute(ProcessTask& task); 1940 virtual void execute(EnqueueTask& task); 1941 }; 1942 1943 class G1CMRefProcTaskProxy: public AbstractGangTask { 1944 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 1945 ProcessTask& _proc_task; 1946 G1CollectedHeap* _g1h; 1947 ConcurrentMark* _cm; 1948 1949 public: 1950 G1CMRefProcTaskProxy(ProcessTask& proc_task, 1951 G1CollectedHeap* g1h, 1952 ConcurrentMark* cm) : 1953 AbstractGangTask("Process reference objects in parallel"), 1954 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 1955 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1956 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 1957 } 1958 1959 virtual void work(uint worker_id) { 1960 ResourceMark rm; 1961 HandleMark hm; 1962 CMTask* task = _cm->task(worker_id); 1963 G1CMIsAliveClosure g1_is_alive(_g1h); 1964 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 1965 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 1966 1967 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 1968 } 1969 }; 1970 1971 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 1972 assert(_workers != NULL, "Need parallel worker threads."); 1973 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1974 1975 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 1976 1977 // We need to reset the concurrency level before each 1978 // proxy task execution, so that the termination protocol 1979 // and overflow handling in CMTask::do_marking_step() knows 1980 // how many workers to wait for. 1981 _cm->set_concurrency(_active_workers); 1982 _workers->run_task(&proc_task_proxy); 1983 } 1984 1985 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 1986 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 1987 EnqueueTask& _enq_task; 1988 1989 public: 1990 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 1991 AbstractGangTask("Enqueue reference objects in parallel"), 1992 _enq_task(enq_task) { } 1993 1994 virtual void work(uint worker_id) { 1995 _enq_task.work(worker_id); 1996 } 1997 }; 1998 1999 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2000 assert(_workers != NULL, "Need parallel worker threads."); 2001 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2002 2003 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2004 2005 // Not strictly necessary but... 2006 // 2007 // We need to reset the concurrency level before each 2008 // proxy task execution, so that the termination protocol 2009 // and overflow handling in CMTask::do_marking_step() knows 2010 // how many workers to wait for. 2011 _cm->set_concurrency(_active_workers); 2012 _workers->run_task(&enq_task_proxy); 2013 } 2014 2015 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 2016 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 2017 } 2018 2019 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2020 if (has_overflown()) { 2021 // Skip processing the discovered references if we have 2022 // overflown the global marking stack. Reference objects 2023 // only get discovered once so it is OK to not 2024 // de-populate the discovered reference lists. We could have, 2025 // but the only benefit would be that, when marking restarts, 2026 // less reference objects are discovered. 2027 return; 2028 } 2029 2030 ResourceMark rm; 2031 HandleMark hm; 2032 2033 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2034 2035 // Is alive closure. 2036 G1CMIsAliveClosure g1_is_alive(g1h); 2037 2038 // Inner scope to exclude the cleaning of the string and symbol 2039 // tables from the displayed time. 2040 { 2041 GCTraceTime(Debug, gc) trace("GC Ref Proc", g1h->gc_timer_cm()); 2042 2043 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2044 2045 // See the comment in G1CollectedHeap::ref_processing_init() 2046 // about how reference processing currently works in G1. 2047 2048 // Set the soft reference policy 2049 rp->setup_policy(clear_all_soft_refs); 2050 assert(_markStack.isEmpty(), "mark stack should be empty"); 2051 2052 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2053 // in serial reference processing. Note these closures are also 2054 // used for serially processing (by the the current thread) the 2055 // JNI references during parallel reference processing. 2056 // 2057 // These closures do not need to synchronize with the worker 2058 // threads involved in parallel reference processing as these 2059 // instances are executed serially by the current thread (e.g. 2060 // reference processing is not multi-threaded and is thus 2061 // performed by the current thread instead of a gang worker). 2062 // 2063 // The gang tasks involved in parallel reference processing create 2064 // their own instances of these closures, which do their own 2065 // synchronization among themselves. 2066 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2067 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2068 2069 // We need at least one active thread. If reference processing 2070 // is not multi-threaded we use the current (VMThread) thread, 2071 // otherwise we use the work gang from the G1CollectedHeap and 2072 // we utilize all the worker threads we can. 2073 bool processing_is_mt = rp->processing_is_mt(); 2074 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2075 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2076 2077 // Parallel processing task executor. 2078 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2079 g1h->workers(), active_workers); 2080 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2081 2082 // Set the concurrency level. The phase was already set prior to 2083 // executing the remark task. 2084 set_concurrency(active_workers); 2085 2086 // Set the degree of MT processing here. If the discovery was done MT, 2087 // the number of threads involved during discovery could differ from 2088 // the number of active workers. This is OK as long as the discovered 2089 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2090 rp->set_active_mt_degree(active_workers); 2091 2092 // Process the weak references. 2093 const ReferenceProcessorStats& stats = 2094 rp->process_discovered_references(&g1_is_alive, 2095 &g1_keep_alive, 2096 &g1_drain_mark_stack, 2097 executor, 2098 g1h->gc_timer_cm()); 2099 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2100 2101 // The do_oop work routines of the keep_alive and drain_marking_stack 2102 // oop closures will set the has_overflown flag if we overflow the 2103 // global marking stack. 2104 2105 assert(_markStack.overflow() || _markStack.isEmpty(), 2106 "mark stack should be empty (unless it overflowed)"); 2107 2108 if (_markStack.overflow()) { 2109 // This should have been done already when we tried to push an 2110 // entry on to the global mark stack. But let's do it again. 2111 set_has_overflown(); 2112 } 2113 2114 assert(rp->num_q() == active_workers, "why not"); 2115 2116 rp->enqueue_discovered_references(executor); 2117 2118 rp->verify_no_references_recorded(); 2119 assert(!rp->discovery_enabled(), "Post condition"); 2120 } 2121 2122 if (has_overflown()) { 2123 // We can not trust g1_is_alive if the marking stack overflowed 2124 return; 2125 } 2126 2127 assert(_markStack.isEmpty(), "Marking should have completed"); 2128 2129 // Unload Klasses, String, Symbols, Code Cache, etc. 2130 { 2131 GCTraceTime(Debug, gc) trace("Unloading", g1h->gc_timer_cm()); 2132 2133 if (ClassUnloadingWithConcurrentMark) { 2134 bool purged_classes; 2135 2136 { 2137 GCTraceTime(Trace, gc) trace("System Dictionary Unloading", g1h->gc_timer_cm()); 2138 purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */); 2139 } 2140 2141 { 2142 GCTraceTime(Trace, gc) trace("Parallel Unloading", g1h->gc_timer_cm()); 2143 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2144 } 2145 } 2146 2147 if (G1StringDedup::is_enabled()) { 2148 GCTraceTime(Trace, gc) trace("String Deduplication Unlink", g1h->gc_timer_cm()); 2149 G1StringDedup::unlink(&g1_is_alive); 2150 } 2151 } 2152 } 2153 2154 void ConcurrentMark::swapMarkBitMaps() { 2155 CMBitMapRO* temp = _prevMarkBitMap; 2156 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2157 _nextMarkBitMap = (CMBitMap*) temp; 2158 } 2159 2160 // Closure for marking entries in SATB buffers. 2161 class CMSATBBufferClosure : public SATBBufferClosure { 2162 private: 2163 CMTask* _task; 2164 G1CollectedHeap* _g1h; 2165 2166 // This is very similar to CMTask::deal_with_reference, but with 2167 // more relaxed requirements for the argument, so this must be more 2168 // circumspect about treating the argument as an object. 2169 void do_entry(void* entry) const { 2170 _task->increment_refs_reached(); 2171 HeapRegion* hr = _g1h->heap_region_containing(entry); 2172 if (entry < hr->next_top_at_mark_start()) { 2173 // Until we get here, we don't know whether entry refers to a valid 2174 // object; it could instead have been a stale reference. 2175 oop obj = static_cast<oop>(entry); 2176 assert(obj->is_oop(true /* ignore mark word */), 2177 "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)); 2178 _task->make_reference_grey(obj, hr); 2179 } 2180 } 2181 2182 public: 2183 CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h) 2184 : _task(task), _g1h(g1h) { } 2185 2186 virtual void do_buffer(void** buffer, size_t size) { 2187 for (size_t i = 0; i < size; ++i) { 2188 do_entry(buffer[i]); 2189 } 2190 } 2191 }; 2192 2193 class G1RemarkThreadsClosure : public ThreadClosure { 2194 CMSATBBufferClosure _cm_satb_cl; 2195 G1CMOopClosure _cm_cl; 2196 MarkingCodeBlobClosure _code_cl; 2197 int _thread_parity; 2198 2199 public: 2200 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) : 2201 _cm_satb_cl(task, g1h), 2202 _cm_cl(g1h, g1h->concurrent_mark(), task), 2203 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2204 _thread_parity(Threads::thread_claim_parity()) {} 2205 2206 void do_thread(Thread* thread) { 2207 if (thread->is_Java_thread()) { 2208 if (thread->claim_oops_do(true, _thread_parity)) { 2209 JavaThread* jt = (JavaThread*)thread; 2210 2211 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2212 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2213 // * Alive if on the stack of an executing method 2214 // * Weakly reachable otherwise 2215 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2216 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2217 jt->nmethods_do(&_code_cl); 2218 2219 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 2220 } 2221 } else if (thread->is_VM_thread()) { 2222 if (thread->claim_oops_do(true, _thread_parity)) { 2223 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 2224 } 2225 } 2226 } 2227 }; 2228 2229 class CMRemarkTask: public AbstractGangTask { 2230 private: 2231 ConcurrentMark* _cm; 2232 public: 2233 void work(uint worker_id) { 2234 // Since all available tasks are actually started, we should 2235 // only proceed if we're supposed to be active. 2236 if (worker_id < _cm->active_tasks()) { 2237 CMTask* task = _cm->task(worker_id); 2238 task->record_start_time(); 2239 { 2240 ResourceMark rm; 2241 HandleMark hm; 2242 2243 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 2244 Threads::threads_do(&threads_f); 2245 } 2246 2247 do { 2248 task->do_marking_step(1000000000.0 /* something very large */, 2249 true /* do_termination */, 2250 false /* is_serial */); 2251 } while (task->has_aborted() && !_cm->has_overflown()); 2252 // If we overflow, then we do not want to restart. We instead 2253 // want to abort remark and do concurrent marking again. 2254 task->record_end_time(); 2255 } 2256 } 2257 2258 CMRemarkTask(ConcurrentMark* cm, uint active_workers) : 2259 AbstractGangTask("Par Remark"), _cm(cm) { 2260 _cm->terminator()->reset_for_reuse(active_workers); 2261 } 2262 }; 2263 2264 void ConcurrentMark::checkpointRootsFinalWork() { 2265 ResourceMark rm; 2266 HandleMark hm; 2267 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2268 2269 GCTraceTime(Debug, gc) trace("Finalize Marking", g1h->gc_timer_cm()); 2270 2271 g1h->ensure_parsability(false); 2272 2273 // this is remark, so we'll use up all active threads 2274 uint active_workers = g1h->workers()->active_workers(); 2275 set_concurrency_and_phase(active_workers, false /* concurrent */); 2276 // Leave _parallel_marking_threads at it's 2277 // value originally calculated in the ConcurrentMark 2278 // constructor and pass values of the active workers 2279 // through the gang in the task. 2280 2281 { 2282 StrongRootsScope srs(active_workers); 2283 2284 CMRemarkTask remarkTask(this, active_workers); 2285 // We will start all available threads, even if we decide that the 2286 // active_workers will be fewer. The extra ones will just bail out 2287 // immediately. 2288 g1h->workers()->run_task(&remarkTask); 2289 } 2290 2291 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2292 guarantee(has_overflown() || 2293 satb_mq_set.completed_buffers_num() == 0, 2294 "Invariant: has_overflown = %s, num buffers = %d", 2295 BOOL_TO_STR(has_overflown()), 2296 satb_mq_set.completed_buffers_num()); 2297 2298 print_stats(); 2299 } 2300 2301 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2302 // Note we are overriding the read-only view of the prev map here, via 2303 // the cast. 2304 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2305 } 2306 2307 HeapRegion* 2308 ConcurrentMark::claim_region(uint worker_id) { 2309 // "checkpoint" the finger 2310 HeapWord* finger = _finger; 2311 2312 // _heap_end will not change underneath our feet; it only changes at 2313 // yield points. 2314 while (finger < _heap_end) { 2315 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2316 2317 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 2318 2319 // Above heap_region_containing may return NULL as we always scan claim 2320 // until the end of the heap. In this case, just jump to the next region. 2321 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 2322 2323 // Is the gap between reading the finger and doing the CAS too long? 2324 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2325 if (res == finger && curr_region != NULL) { 2326 // we succeeded 2327 HeapWord* bottom = curr_region->bottom(); 2328 HeapWord* limit = curr_region->next_top_at_mark_start(); 2329 2330 // notice that _finger == end cannot be guaranteed here since, 2331 // someone else might have moved the finger even further 2332 assert(_finger >= end, "the finger should have moved forward"); 2333 2334 if (limit > bottom) { 2335 return curr_region; 2336 } else { 2337 assert(limit == bottom, 2338 "the region limit should be at bottom"); 2339 // we return NULL and the caller should try calling 2340 // claim_region() again. 2341 return NULL; 2342 } 2343 } else { 2344 assert(_finger > finger, "the finger should have moved forward"); 2345 // read it again 2346 finger = _finger; 2347 } 2348 } 2349 2350 return NULL; 2351 } 2352 2353 #ifndef PRODUCT 2354 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 2355 private: 2356 G1CollectedHeap* _g1h; 2357 const char* _phase; 2358 int _info; 2359 2360 public: 2361 VerifyNoCSetOops(const char* phase, int info = -1) : 2362 _g1h(G1CollectedHeap::heap()), 2363 _phase(phase), 2364 _info(info) 2365 { } 2366 2367 void operator()(oop obj) const { 2368 guarantee(obj->is_oop(), 2369 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 2370 p2i(obj), _phase, _info); 2371 guarantee(!_g1h->obj_in_cs(obj), 2372 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 2373 p2i(obj), _phase, _info); 2374 } 2375 }; 2376 2377 void ConcurrentMark::verify_no_cset_oops() { 2378 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2379 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 2380 return; 2381 } 2382 2383 // Verify entries on the global mark stack 2384 _markStack.iterate(VerifyNoCSetOops("Stack")); 2385 2386 // Verify entries on the task queues 2387 for (uint i = 0; i < _max_worker_id; ++i) { 2388 CMTaskQueue* queue = _task_queues->queue(i); 2389 queue->iterate(VerifyNoCSetOops("Queue", i)); 2390 } 2391 2392 // Verify the global finger 2393 HeapWord* global_finger = finger(); 2394 if (global_finger != NULL && global_finger < _heap_end) { 2395 // Since we always iterate over all regions, we might get a NULL HeapRegion 2396 // here. 2397 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 2398 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 2399 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 2400 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 2401 } 2402 2403 // Verify the task fingers 2404 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2405 for (uint i = 0; i < parallel_marking_threads(); ++i) { 2406 CMTask* task = _tasks[i]; 2407 HeapWord* task_finger = task->finger(); 2408 if (task_finger != NULL && task_finger < _heap_end) { 2409 // See above note on the global finger verification. 2410 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 2411 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 2412 !task_hr->in_collection_set(), 2413 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 2414 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 2415 } 2416 } 2417 } 2418 #endif // PRODUCT 2419 2420 // Aggregate the counting data that was constructed concurrently 2421 // with marking. 2422 class AggregateCountDataHRClosure: public HeapRegionClosure { 2423 G1CollectedHeap* _g1h; 2424 ConcurrentMark* _cm; 2425 CardTableModRefBS* _ct_bs; 2426 BitMap* _cm_card_bm; 2427 uint _max_worker_id; 2428 2429 public: 2430 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2431 BitMap* cm_card_bm, 2432 uint max_worker_id) : 2433 _g1h(g1h), _cm(g1h->concurrent_mark()), 2434 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 2435 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 2436 2437 bool doHeapRegion(HeapRegion* hr) { 2438 HeapWord* start = hr->bottom(); 2439 HeapWord* limit = hr->next_top_at_mark_start(); 2440 HeapWord* end = hr->end(); 2441 2442 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2443 "Preconditions not met - " 2444 "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", " 2445 "top: " PTR_FORMAT ", end: " PTR_FORMAT, 2446 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())); 2447 2448 assert(hr->next_marked_bytes() == 0, "Precondition"); 2449 2450 if (start == limit) { 2451 // NTAMS of this region has not been set so nothing to do. 2452 return false; 2453 } 2454 2455 // 'start' should be in the heap. 2456 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2457 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 2458 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2459 2460 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2461 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2462 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2463 2464 // If ntams is not card aligned then we bump card bitmap index 2465 // for limit so that we get the all the cards spanned by 2466 // the object ending at ntams. 2467 // Note: if this is the last region in the heap then ntams 2468 // could be actually just beyond the end of the the heap; 2469 // limit_idx will then correspond to a (non-existent) card 2470 // that is also outside the heap. 2471 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2472 limit_idx += 1; 2473 } 2474 2475 assert(limit_idx <= end_idx, "or else use atomics"); 2476 2477 // Aggregate the "stripe" in the count data associated with hr. 2478 uint hrm_index = hr->hrm_index(); 2479 size_t marked_bytes = 0; 2480 2481 for (uint i = 0; i < _max_worker_id; i += 1) { 2482 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2483 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2484 2485 // Fetch the marked_bytes in this region for task i and 2486 // add it to the running total for this region. 2487 marked_bytes += marked_bytes_array[hrm_index]; 2488 2489 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 2490 // into the global card bitmap. 2491 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2492 2493 while (scan_idx < limit_idx) { 2494 assert(task_card_bm->at(scan_idx) == true, "should be"); 2495 _cm_card_bm->set_bit(scan_idx); 2496 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2497 2498 // BitMap::get_next_one_offset() can handle the case when 2499 // its left_offset parameter is greater than its right_offset 2500 // parameter. It does, however, have an early exit if 2501 // left_offset == right_offset. So let's limit the value 2502 // passed in for left offset here. 2503 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2504 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2505 } 2506 } 2507 2508 // Update the marked bytes for this region. 2509 hr->add_to_marked_bytes(marked_bytes); 2510 2511 // Next heap region 2512 return false; 2513 } 2514 }; 2515 2516 class G1AggregateCountDataTask: public AbstractGangTask { 2517 protected: 2518 G1CollectedHeap* _g1h; 2519 ConcurrentMark* _cm; 2520 BitMap* _cm_card_bm; 2521 uint _max_worker_id; 2522 uint _active_workers; 2523 HeapRegionClaimer _hrclaimer; 2524 2525 public: 2526 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2527 ConcurrentMark* cm, 2528 BitMap* cm_card_bm, 2529 uint max_worker_id, 2530 uint n_workers) : 2531 AbstractGangTask("Count Aggregation"), 2532 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2533 _max_worker_id(max_worker_id), 2534 _active_workers(n_workers), 2535 _hrclaimer(_active_workers) { 2536 } 2537 2538 void work(uint worker_id) { 2539 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 2540 2541 _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer); 2542 } 2543 }; 2544 2545 2546 void ConcurrentMark::aggregate_count_data() { 2547 uint n_workers = _g1h->workers()->active_workers(); 2548 2549 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 2550 _max_worker_id, n_workers); 2551 2552 _g1h->workers()->run_task(&g1_par_agg_task); 2553 } 2554 2555 // Clear the per-worker arrays used to store the per-region counting data 2556 void ConcurrentMark::clear_all_count_data() { 2557 // Clear the global card bitmap - it will be filled during 2558 // liveness count aggregation (during remark) and the 2559 // final counting task. 2560 _card_bm.clear(); 2561 2562 // Clear the global region bitmap - it will be filled as part 2563 // of the final counting task. 2564 _region_bm.clear(); 2565 2566 uint max_regions = _g1h->max_regions(); 2567 assert(_max_worker_id > 0, "uninitialized"); 2568 2569 for (uint i = 0; i < _max_worker_id; i += 1) { 2570 BitMap* task_card_bm = count_card_bitmap_for(i); 2571 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 2572 2573 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 2574 assert(marked_bytes_array != NULL, "uninitialized"); 2575 2576 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 2577 task_card_bm->clear(); 2578 } 2579 } 2580 2581 void ConcurrentMark::print_stats() { 2582 if (!log_is_enabled(Debug, gc, stats)) { 2583 return; 2584 } 2585 log_debug(gc, stats)("---------------------------------------------------------------------"); 2586 for (size_t i = 0; i < _active_tasks; ++i) { 2587 _tasks[i]->print_stats(); 2588 log_debug(gc, stats)("---------------------------------------------------------------------"); 2589 } 2590 } 2591 2592 // abandon current marking iteration due to a Full GC 2593 void ConcurrentMark::abort() { 2594 if (!cmThread()->during_cycle() || _has_aborted) { 2595 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2596 return; 2597 } 2598 2599 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2600 // concurrent bitmap clearing. 2601 _nextMarkBitMap->clearAll(); 2602 2603 // Note we cannot clear the previous marking bitmap here 2604 // since VerifyDuringGC verifies the objects marked during 2605 // a full GC against the previous bitmap. 2606 2607 // Clear the liveness counting data 2608 clear_all_count_data(); 2609 // Empty mark stack 2610 reset_marking_state(); 2611 for (uint i = 0; i < _max_worker_id; ++i) { 2612 _tasks[i]->clear_region_fields(); 2613 } 2614 _first_overflow_barrier_sync.abort(); 2615 _second_overflow_barrier_sync.abort(); 2616 _has_aborted = true; 2617 2618 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2619 satb_mq_set.abandon_partial_marking(); 2620 // This can be called either during or outside marking, we'll read 2621 // the expected_active value from the SATB queue set. 2622 satb_mq_set.set_active_all_threads( 2623 false, /* new active value */ 2624 satb_mq_set.is_active() /* expected_active */); 2625 2626 _g1h->trace_heap_after_concurrent_cycle(); 2627 2628 // Close any open concurrent phase timing 2629 register_concurrent_phase_end(); 2630 2631 _g1h->register_concurrent_cycle_end(); 2632 } 2633 2634 static void print_ms_time_info(const char* prefix, const char* name, 2635 NumberSeq& ns) { 2636 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2637 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2638 if (ns.num() > 0) { 2639 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2640 prefix, ns.sd(), ns.maximum()); 2641 } 2642 } 2643 2644 void ConcurrentMark::print_summary_info() { 2645 LogHandle(gc, marking) log; 2646 if (!log.is_trace()) { 2647 return; 2648 } 2649 2650 log.trace(" Concurrent marking:"); 2651 print_ms_time_info(" ", "init marks", _init_times); 2652 print_ms_time_info(" ", "remarks", _remark_times); 2653 { 2654 print_ms_time_info(" ", "final marks", _remark_mark_times); 2655 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2656 2657 } 2658 print_ms_time_info(" ", "cleanups", _cleanup_times); 2659 log.trace(" Final counting total time = %8.2f s (avg = %8.2f ms).", 2660 _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2661 if (G1ScrubRemSets) { 2662 log.trace(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2663 _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2664 } 2665 log.trace(" Total stop_world time = %8.2f s.", 2666 (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); 2667 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).", 2668 cmThread()->vtime_accum(), cmThread()->vtime_mark_accum()); 2669 } 2670 2671 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2672 _parallel_workers->print_worker_threads_on(st); 2673 } 2674 2675 void ConcurrentMark::print_on_error(outputStream* st) const { 2676 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2677 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 2678 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 2679 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 2680 } 2681 2682 // We take a break if someone is trying to stop the world. 2683 bool ConcurrentMark::do_yield_check(uint worker_id) { 2684 if (SuspendibleThreadSet::should_yield()) { 2685 if (worker_id == 0) { 2686 _g1h->g1_policy()->record_concurrent_pause(); 2687 } 2688 SuspendibleThreadSet::yield(); 2689 return true; 2690 } else { 2691 return false; 2692 } 2693 } 2694 2695 // Closure for iteration over bitmaps 2696 class CMBitMapClosure : public BitMapClosure { 2697 private: 2698 // the bitmap that is being iterated over 2699 CMBitMap* _nextMarkBitMap; 2700 ConcurrentMark* _cm; 2701 CMTask* _task; 2702 2703 public: 2704 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 2705 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 2706 2707 bool do_bit(size_t offset) { 2708 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 2709 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 2710 assert( addr < _cm->finger(), "invariant"); 2711 assert(addr >= _task->finger(), "invariant"); 2712 2713 // We move that task's local finger along. 2714 _task->move_finger_to(addr); 2715 2716 _task->scan_object(oop(addr)); 2717 // we only partially drain the local queue and global stack 2718 _task->drain_local_queue(true); 2719 _task->drain_global_stack(true); 2720 2721 // if the has_aborted flag has been raised, we need to bail out of 2722 // the iteration 2723 return !_task->has_aborted(); 2724 } 2725 }; 2726 2727 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2728 ReferenceProcessor* result = NULL; 2729 if (G1UseConcMarkReferenceProcessing) { 2730 result = g1h->ref_processor_cm(); 2731 assert(result != NULL, "should not be NULL"); 2732 } 2733 return result; 2734 } 2735 2736 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2737 ConcurrentMark* cm, 2738 CMTask* task) 2739 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2740 _g1h(g1h), _cm(cm), _task(task) 2741 { } 2742 2743 void CMTask::setup_for_region(HeapRegion* hr) { 2744 assert(hr != NULL, 2745 "claim_region() should have filtered out NULL regions"); 2746 _curr_region = hr; 2747 _finger = hr->bottom(); 2748 update_region_limit(); 2749 } 2750 2751 void CMTask::update_region_limit() { 2752 HeapRegion* hr = _curr_region; 2753 HeapWord* bottom = hr->bottom(); 2754 HeapWord* limit = hr->next_top_at_mark_start(); 2755 2756 if (limit == bottom) { 2757 // The region was collected underneath our feet. 2758 // We set the finger to bottom to ensure that the bitmap 2759 // iteration that will follow this will not do anything. 2760 // (this is not a condition that holds when we set the region up, 2761 // as the region is not supposed to be empty in the first place) 2762 _finger = bottom; 2763 } else if (limit >= _region_limit) { 2764 assert(limit >= _finger, "peace of mind"); 2765 } else { 2766 assert(limit < _region_limit, "only way to get here"); 2767 // This can happen under some pretty unusual circumstances. An 2768 // evacuation pause empties the region underneath our feet (NTAMS 2769 // at bottom). We then do some allocation in the region (NTAMS 2770 // stays at bottom), followed by the region being used as a GC 2771 // alloc region (NTAMS will move to top() and the objects 2772 // originally below it will be grayed). All objects now marked in 2773 // the region are explicitly grayed, if below the global finger, 2774 // and we do not need in fact to scan anything else. So, we simply 2775 // set _finger to be limit to ensure that the bitmap iteration 2776 // doesn't do anything. 2777 _finger = limit; 2778 } 2779 2780 _region_limit = limit; 2781 } 2782 2783 void CMTask::giveup_current_region() { 2784 assert(_curr_region != NULL, "invariant"); 2785 clear_region_fields(); 2786 } 2787 2788 void CMTask::clear_region_fields() { 2789 // Values for these three fields that indicate that we're not 2790 // holding on to a region. 2791 _curr_region = NULL; 2792 _finger = NULL; 2793 _region_limit = NULL; 2794 } 2795 2796 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2797 if (cm_oop_closure == NULL) { 2798 assert(_cm_oop_closure != NULL, "invariant"); 2799 } else { 2800 assert(_cm_oop_closure == NULL, "invariant"); 2801 } 2802 _cm_oop_closure = cm_oop_closure; 2803 } 2804 2805 void CMTask::reset(CMBitMap* nextMarkBitMap) { 2806 guarantee(nextMarkBitMap != NULL, "invariant"); 2807 _nextMarkBitMap = nextMarkBitMap; 2808 clear_region_fields(); 2809 2810 _calls = 0; 2811 _elapsed_time_ms = 0.0; 2812 _termination_time_ms = 0.0; 2813 _termination_start_time_ms = 0.0; 2814 } 2815 2816 bool CMTask::should_exit_termination() { 2817 regular_clock_call(); 2818 // This is called when we are in the termination protocol. We should 2819 // quit if, for some reason, this task wants to abort or the global 2820 // stack is not empty (this means that we can get work from it). 2821 return !_cm->mark_stack_empty() || has_aborted(); 2822 } 2823 2824 void CMTask::reached_limit() { 2825 assert(_words_scanned >= _words_scanned_limit || 2826 _refs_reached >= _refs_reached_limit , 2827 "shouldn't have been called otherwise"); 2828 regular_clock_call(); 2829 } 2830 2831 void CMTask::regular_clock_call() { 2832 if (has_aborted()) return; 2833 2834 // First, we need to recalculate the words scanned and refs reached 2835 // limits for the next clock call. 2836 recalculate_limits(); 2837 2838 // During the regular clock call we do the following 2839 2840 // (1) If an overflow has been flagged, then we abort. 2841 if (_cm->has_overflown()) { 2842 set_has_aborted(); 2843 return; 2844 } 2845 2846 // If we are not concurrent (i.e. we're doing remark) we don't need 2847 // to check anything else. The other steps are only needed during 2848 // the concurrent marking phase. 2849 if (!concurrent()) return; 2850 2851 // (2) If marking has been aborted for Full GC, then we also abort. 2852 if (_cm->has_aborted()) { 2853 set_has_aborted(); 2854 return; 2855 } 2856 2857 double curr_time_ms = os::elapsedVTime() * 1000.0; 2858 2859 // (4) We check whether we should yield. If we have to, then we abort. 2860 if (SuspendibleThreadSet::should_yield()) { 2861 // We should yield. To do this we abort the task. The caller is 2862 // responsible for yielding. 2863 set_has_aborted(); 2864 return; 2865 } 2866 2867 // (5) We check whether we've reached our time quota. If we have, 2868 // then we abort. 2869 double elapsed_time_ms = curr_time_ms - _start_time_ms; 2870 if (elapsed_time_ms > _time_target_ms) { 2871 set_has_aborted(); 2872 _has_timed_out = true; 2873 return; 2874 } 2875 2876 // (6) Finally, we check whether there are enough completed STAB 2877 // buffers available for processing. If there are, we abort. 2878 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2879 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 2880 // we do need to process SATB buffers, we'll abort and restart 2881 // the marking task to do so 2882 set_has_aborted(); 2883 return; 2884 } 2885 } 2886 2887 void CMTask::recalculate_limits() { 2888 _real_words_scanned_limit = _words_scanned + words_scanned_period; 2889 _words_scanned_limit = _real_words_scanned_limit; 2890 2891 _real_refs_reached_limit = _refs_reached + refs_reached_period; 2892 _refs_reached_limit = _real_refs_reached_limit; 2893 } 2894 2895 void CMTask::decrease_limits() { 2896 // This is called when we believe that we're going to do an infrequent 2897 // operation which will increase the per byte scanned cost (i.e. move 2898 // entries to/from the global stack). It basically tries to decrease the 2899 // scanning limit so that the clock is called earlier. 2900 2901 _words_scanned_limit = _real_words_scanned_limit - 2902 3 * words_scanned_period / 4; 2903 _refs_reached_limit = _real_refs_reached_limit - 2904 3 * refs_reached_period / 4; 2905 } 2906 2907 void CMTask::move_entries_to_global_stack() { 2908 // local array where we'll store the entries that will be popped 2909 // from the local queue 2910 oop buffer[global_stack_transfer_size]; 2911 2912 int n = 0; 2913 oop obj; 2914 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 2915 buffer[n] = obj; 2916 ++n; 2917 } 2918 2919 if (n > 0) { 2920 // we popped at least one entry from the local queue 2921 2922 if (!_cm->mark_stack_push(buffer, n)) { 2923 set_has_aborted(); 2924 } 2925 } 2926 2927 // this operation was quite expensive, so decrease the limits 2928 decrease_limits(); 2929 } 2930 2931 void CMTask::get_entries_from_global_stack() { 2932 // local array where we'll store the entries that will be popped 2933 // from the global stack. 2934 oop buffer[global_stack_transfer_size]; 2935 int n; 2936 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 2937 assert(n <= global_stack_transfer_size, 2938 "we should not pop more than the given limit"); 2939 if (n > 0) { 2940 // yes, we did actually pop at least one entry 2941 for (int i = 0; i < n; ++i) { 2942 bool success = _task_queue->push(buffer[i]); 2943 // We only call this when the local queue is empty or under a 2944 // given target limit. So, we do not expect this push to fail. 2945 assert(success, "invariant"); 2946 } 2947 } 2948 2949 // this operation was quite expensive, so decrease the limits 2950 decrease_limits(); 2951 } 2952 2953 void CMTask::drain_local_queue(bool partially) { 2954 if (has_aborted()) return; 2955 2956 // Decide what the target size is, depending whether we're going to 2957 // drain it partially (so that other tasks can steal if they run out 2958 // of things to do) or totally (at the very end). 2959 size_t target_size; 2960 if (partially) { 2961 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 2962 } else { 2963 target_size = 0; 2964 } 2965 2966 if (_task_queue->size() > target_size) { 2967 oop obj; 2968 bool ret = _task_queue->pop_local(obj); 2969 while (ret) { 2970 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 2971 assert(!_g1h->is_on_master_free_list( 2972 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 2973 2974 scan_object(obj); 2975 2976 if (_task_queue->size() <= target_size || has_aborted()) { 2977 ret = false; 2978 } else { 2979 ret = _task_queue->pop_local(obj); 2980 } 2981 } 2982 } 2983 } 2984 2985 void CMTask::drain_global_stack(bool partially) { 2986 if (has_aborted()) return; 2987 2988 // We have a policy to drain the local queue before we attempt to 2989 // drain the global stack. 2990 assert(partially || _task_queue->size() == 0, "invariant"); 2991 2992 // Decide what the target size is, depending whether we're going to 2993 // drain it partially (so that other tasks can steal if they run out 2994 // of things to do) or totally (at the very end). Notice that, 2995 // because we move entries from the global stack in chunks or 2996 // because another task might be doing the same, we might in fact 2997 // drop below the target. But, this is not a problem. 2998 size_t target_size; 2999 if (partially) { 3000 target_size = _cm->partial_mark_stack_size_target(); 3001 } else { 3002 target_size = 0; 3003 } 3004 3005 if (_cm->mark_stack_size() > target_size) { 3006 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3007 get_entries_from_global_stack(); 3008 drain_local_queue(partially); 3009 } 3010 } 3011 } 3012 3013 // SATB Queue has several assumptions on whether to call the par or 3014 // non-par versions of the methods. this is why some of the code is 3015 // replicated. We should really get rid of the single-threaded version 3016 // of the code to simplify things. 3017 void CMTask::drain_satb_buffers() { 3018 if (has_aborted()) return; 3019 3020 // We set this so that the regular clock knows that we're in the 3021 // middle of draining buffers and doesn't set the abort flag when it 3022 // notices that SATB buffers are available for draining. It'd be 3023 // very counter productive if it did that. :-) 3024 _draining_satb_buffers = true; 3025 3026 CMSATBBufferClosure satb_cl(this, _g1h); 3027 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3028 3029 // This keeps claiming and applying the closure to completed buffers 3030 // until we run out of buffers or we need to abort. 3031 while (!has_aborted() && 3032 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 3033 regular_clock_call(); 3034 } 3035 3036 _draining_satb_buffers = false; 3037 3038 assert(has_aborted() || 3039 concurrent() || 3040 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3041 3042 // again, this was a potentially expensive operation, decrease the 3043 // limits to get the regular clock call early 3044 decrease_limits(); 3045 } 3046 3047 void CMTask::print_stats() { 3048 log_debug(gc, stats)("Marking Stats, task = %u, calls = %d", 3049 _worker_id, _calls); 3050 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3051 _elapsed_time_ms, _termination_time_ms); 3052 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3053 _step_times_ms.num(), _step_times_ms.avg(), 3054 _step_times_ms.sd()); 3055 log_debug(gc, stats)(" max = %1.2lfms, total = %1.2lfms", 3056 _step_times_ms.maximum(), _step_times_ms.sum()); 3057 } 3058 3059 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) { 3060 return _task_queues->steal(worker_id, hash_seed, obj); 3061 } 3062 3063 /***************************************************************************** 3064 3065 The do_marking_step(time_target_ms, ...) method is the building 3066 block of the parallel marking framework. It can be called in parallel 3067 with other invocations of do_marking_step() on different tasks 3068 (but only one per task, obviously) and concurrently with the 3069 mutator threads, or during remark, hence it eliminates the need 3070 for two versions of the code. When called during remark, it will 3071 pick up from where the task left off during the concurrent marking 3072 phase. Interestingly, tasks are also claimable during evacuation 3073 pauses too, since do_marking_step() ensures that it aborts before 3074 it needs to yield. 3075 3076 The data structures that it uses to do marking work are the 3077 following: 3078 3079 (1) Marking Bitmap. If there are gray objects that appear only 3080 on the bitmap (this happens either when dealing with an overflow 3081 or when the initial marking phase has simply marked the roots 3082 and didn't push them on the stack), then tasks claim heap 3083 regions whose bitmap they then scan to find gray objects. A 3084 global finger indicates where the end of the last claimed region 3085 is. A local finger indicates how far into the region a task has 3086 scanned. The two fingers are used to determine how to gray an 3087 object (i.e. whether simply marking it is OK, as it will be 3088 visited by a task in the future, or whether it needs to be also 3089 pushed on a stack). 3090 3091 (2) Local Queue. The local queue of the task which is accessed 3092 reasonably efficiently by the task. Other tasks can steal from 3093 it when they run out of work. Throughout the marking phase, a 3094 task attempts to keep its local queue short but not totally 3095 empty, so that entries are available for stealing by other 3096 tasks. Only when there is no more work, a task will totally 3097 drain its local queue. 3098 3099 (3) Global Mark Stack. This handles local queue overflow. During 3100 marking only sets of entries are moved between it and the local 3101 queues, as access to it requires a mutex and more fine-grain 3102 interaction with it which might cause contention. If it 3103 overflows, then the marking phase should restart and iterate 3104 over the bitmap to identify gray objects. Throughout the marking 3105 phase, tasks attempt to keep the global mark stack at a small 3106 length but not totally empty, so that entries are available for 3107 popping by other tasks. Only when there is no more work, tasks 3108 will totally drain the global mark stack. 3109 3110 (4) SATB Buffer Queue. This is where completed SATB buffers are 3111 made available. Buffers are regularly removed from this queue 3112 and scanned for roots, so that the queue doesn't get too 3113 long. During remark, all completed buffers are processed, as 3114 well as the filled in parts of any uncompleted buffers. 3115 3116 The do_marking_step() method tries to abort when the time target 3117 has been reached. There are a few other cases when the 3118 do_marking_step() method also aborts: 3119 3120 (1) When the marking phase has been aborted (after a Full GC). 3121 3122 (2) When a global overflow (on the global stack) has been 3123 triggered. Before the task aborts, it will actually sync up with 3124 the other tasks to ensure that all the marking data structures 3125 (local queues, stacks, fingers etc.) are re-initialized so that 3126 when do_marking_step() completes, the marking phase can 3127 immediately restart. 3128 3129 (3) When enough completed SATB buffers are available. The 3130 do_marking_step() method only tries to drain SATB buffers right 3131 at the beginning. So, if enough buffers are available, the 3132 marking step aborts and the SATB buffers are processed at 3133 the beginning of the next invocation. 3134 3135 (4) To yield. when we have to yield then we abort and yield 3136 right at the end of do_marking_step(). This saves us from a lot 3137 of hassle as, by yielding we might allow a Full GC. If this 3138 happens then objects will be compacted underneath our feet, the 3139 heap might shrink, etc. We save checking for this by just 3140 aborting and doing the yield right at the end. 3141 3142 From the above it follows that the do_marking_step() method should 3143 be called in a loop (or, otherwise, regularly) until it completes. 3144 3145 If a marking step completes without its has_aborted() flag being 3146 true, it means it has completed the current marking phase (and 3147 also all other marking tasks have done so and have all synced up). 3148 3149 A method called regular_clock_call() is invoked "regularly" (in 3150 sub ms intervals) throughout marking. It is this clock method that 3151 checks all the abort conditions which were mentioned above and 3152 decides when the task should abort. A work-based scheme is used to 3153 trigger this clock method: when the number of object words the 3154 marking phase has scanned or the number of references the marking 3155 phase has visited reach a given limit. Additional invocations to 3156 the method clock have been planted in a few other strategic places 3157 too. The initial reason for the clock method was to avoid calling 3158 vtime too regularly, as it is quite expensive. So, once it was in 3159 place, it was natural to piggy-back all the other conditions on it 3160 too and not constantly check them throughout the code. 3161 3162 If do_termination is true then do_marking_step will enter its 3163 termination protocol. 3164 3165 The value of is_serial must be true when do_marking_step is being 3166 called serially (i.e. by the VMThread) and do_marking_step should 3167 skip any synchronization in the termination and overflow code. 3168 Examples include the serial remark code and the serial reference 3169 processing closures. 3170 3171 The value of is_serial must be false when do_marking_step is 3172 being called by any of the worker threads in a work gang. 3173 Examples include the concurrent marking code (CMMarkingTask), 3174 the MT remark code, and the MT reference processing closures. 3175 3176 *****************************************************************************/ 3177 3178 void CMTask::do_marking_step(double time_target_ms, 3179 bool do_termination, 3180 bool is_serial) { 3181 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3182 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3183 3184 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3185 assert(_task_queues != NULL, "invariant"); 3186 assert(_task_queue != NULL, "invariant"); 3187 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 3188 3189 assert(!_claimed, 3190 "only one thread should claim this task at any one time"); 3191 3192 // OK, this doesn't safeguard again all possible scenarios, as it is 3193 // possible for two threads to set the _claimed flag at the same 3194 // time. But it is only for debugging purposes anyway and it will 3195 // catch most problems. 3196 _claimed = true; 3197 3198 _start_time_ms = os::elapsedVTime() * 1000.0; 3199 3200 // If do_stealing is true then do_marking_step will attempt to 3201 // steal work from the other CMTasks. It only makes sense to 3202 // enable stealing when the termination protocol is enabled 3203 // and do_marking_step() is not being called serially. 3204 bool do_stealing = do_termination && !is_serial; 3205 3206 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 3207 _time_target_ms = time_target_ms - diff_prediction_ms; 3208 3209 // set up the variables that are used in the work-based scheme to 3210 // call the regular clock method 3211 _words_scanned = 0; 3212 _refs_reached = 0; 3213 recalculate_limits(); 3214 3215 // clear all flags 3216 clear_has_aborted(); 3217 _has_timed_out = false; 3218 _draining_satb_buffers = false; 3219 3220 ++_calls; 3221 3222 // Set up the bitmap and oop closures. Anything that uses them is 3223 // eventually called from this method, so it is OK to allocate these 3224 // statically. 3225 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3226 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3227 set_cm_oop_closure(&cm_oop_closure); 3228 3229 if (_cm->has_overflown()) { 3230 // This can happen if the mark stack overflows during a GC pause 3231 // and this task, after a yield point, restarts. We have to abort 3232 // as we need to get into the overflow protocol which happens 3233 // right at the end of this task. 3234 set_has_aborted(); 3235 } 3236 3237 // First drain any available SATB buffers. After this, we will not 3238 // look at SATB buffers before the next invocation of this method. 3239 // If enough completed SATB buffers are queued up, the regular clock 3240 // will abort this task so that it restarts. 3241 drain_satb_buffers(); 3242 // ...then partially drain the local queue and the global stack 3243 drain_local_queue(true); 3244 drain_global_stack(true); 3245 3246 do { 3247 if (!has_aborted() && _curr_region != NULL) { 3248 // This means that we're already holding on to a region. 3249 assert(_finger != NULL, "if region is not NULL, then the finger " 3250 "should not be NULL either"); 3251 3252 // We might have restarted this task after an evacuation pause 3253 // which might have evacuated the region we're holding on to 3254 // underneath our feet. Let's read its limit again to make sure 3255 // that we do not iterate over a region of the heap that 3256 // contains garbage (update_region_limit() will also move 3257 // _finger to the start of the region if it is found empty). 3258 update_region_limit(); 3259 // We will start from _finger not from the start of the region, 3260 // as we might be restarting this task after aborting half-way 3261 // through scanning this region. In this case, _finger points to 3262 // the address where we last found a marked object. If this is a 3263 // fresh region, _finger points to start(). 3264 MemRegion mr = MemRegion(_finger, _region_limit); 3265 3266 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 3267 "humongous regions should go around loop once only"); 3268 3269 // Some special cases: 3270 // If the memory region is empty, we can just give up the region. 3271 // If the current region is humongous then we only need to check 3272 // the bitmap for the bit associated with the start of the object, 3273 // scan the object if it's live, and give up the region. 3274 // Otherwise, let's iterate over the bitmap of the part of the region 3275 // that is left. 3276 // If the iteration is successful, give up the region. 3277 if (mr.is_empty()) { 3278 giveup_current_region(); 3279 regular_clock_call(); 3280 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 3281 if (_nextMarkBitMap->isMarked(mr.start())) { 3282 // The object is marked - apply the closure 3283 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 3284 bitmap_closure.do_bit(offset); 3285 } 3286 // Even if this task aborted while scanning the humongous object 3287 // we can (and should) give up the current region. 3288 giveup_current_region(); 3289 regular_clock_call(); 3290 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3291 giveup_current_region(); 3292 regular_clock_call(); 3293 } else { 3294 assert(has_aborted(), "currently the only way to do so"); 3295 // The only way to abort the bitmap iteration is to return 3296 // false from the do_bit() method. However, inside the 3297 // do_bit() method we move the _finger to point to the 3298 // object currently being looked at. So, if we bail out, we 3299 // have definitely set _finger to something non-null. 3300 assert(_finger != NULL, "invariant"); 3301 3302 // Region iteration was actually aborted. So now _finger 3303 // points to the address of the object we last scanned. If we 3304 // leave it there, when we restart this task, we will rescan 3305 // the object. It is easy to avoid this. We move the finger by 3306 // enough to point to the next possible object header (the 3307 // bitmap knows by how much we need to move it as it knows its 3308 // granularity). 3309 assert(_finger < _region_limit, "invariant"); 3310 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 3311 // Check if bitmap iteration was aborted while scanning the last object 3312 if (new_finger >= _region_limit) { 3313 giveup_current_region(); 3314 } else { 3315 move_finger_to(new_finger); 3316 } 3317 } 3318 } 3319 // At this point we have either completed iterating over the 3320 // region we were holding on to, or we have aborted. 3321 3322 // We then partially drain the local queue and the global stack. 3323 // (Do we really need this?) 3324 drain_local_queue(true); 3325 drain_global_stack(true); 3326 3327 // Read the note on the claim_region() method on why it might 3328 // return NULL with potentially more regions available for 3329 // claiming and why we have to check out_of_regions() to determine 3330 // whether we're done or not. 3331 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 3332 // We are going to try to claim a new region. We should have 3333 // given up on the previous one. 3334 // Separated the asserts so that we know which one fires. 3335 assert(_curr_region == NULL, "invariant"); 3336 assert(_finger == NULL, "invariant"); 3337 assert(_region_limit == NULL, "invariant"); 3338 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 3339 if (claimed_region != NULL) { 3340 // Yes, we managed to claim one 3341 setup_for_region(claimed_region); 3342 assert(_curr_region == claimed_region, "invariant"); 3343 } 3344 // It is important to call the regular clock here. It might take 3345 // a while to claim a region if, for example, we hit a large 3346 // block of empty regions. So we need to call the regular clock 3347 // method once round the loop to make sure it's called 3348 // frequently enough. 3349 regular_clock_call(); 3350 } 3351 3352 if (!has_aborted() && _curr_region == NULL) { 3353 assert(_cm->out_of_regions(), 3354 "at this point we should be out of regions"); 3355 } 3356 } while ( _curr_region != NULL && !has_aborted()); 3357 3358 if (!has_aborted()) { 3359 // We cannot check whether the global stack is empty, since other 3360 // tasks might be pushing objects to it concurrently. 3361 assert(_cm->out_of_regions(), 3362 "at this point we should be out of regions"); 3363 // Try to reduce the number of available SATB buffers so that 3364 // remark has less work to do. 3365 drain_satb_buffers(); 3366 } 3367 3368 // Since we've done everything else, we can now totally drain the 3369 // local queue and global stack. 3370 drain_local_queue(false); 3371 drain_global_stack(false); 3372 3373 // Attempt at work stealing from other task's queues. 3374 if (do_stealing && !has_aborted()) { 3375 // We have not aborted. This means that we have finished all that 3376 // we could. Let's try to do some stealing... 3377 3378 // We cannot check whether the global stack is empty, since other 3379 // tasks might be pushing objects to it concurrently. 3380 assert(_cm->out_of_regions() && _task_queue->size() == 0, 3381 "only way to reach here"); 3382 while (!has_aborted()) { 3383 oop obj; 3384 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 3385 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 3386 "any stolen object should be marked"); 3387 scan_object(obj); 3388 3389 // And since we're towards the end, let's totally drain the 3390 // local queue and global stack. 3391 drain_local_queue(false); 3392 drain_global_stack(false); 3393 } else { 3394 break; 3395 } 3396 } 3397 } 3398 3399 // We still haven't aborted. Now, let's try to get into the 3400 // termination protocol. 3401 if (do_termination && !has_aborted()) { 3402 // We cannot check whether the global stack is empty, since other 3403 // tasks might be concurrently pushing objects on it. 3404 // Separated the asserts so that we know which one fires. 3405 assert(_cm->out_of_regions(), "only way to reach here"); 3406 assert(_task_queue->size() == 0, "only way to reach here"); 3407 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 3408 3409 // The CMTask class also extends the TerminatorTerminator class, 3410 // hence its should_exit_termination() method will also decide 3411 // whether to exit the termination protocol or not. 3412 bool finished = (is_serial || 3413 _cm->terminator()->offer_termination(this)); 3414 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 3415 _termination_time_ms += 3416 termination_end_time_ms - _termination_start_time_ms; 3417 3418 if (finished) { 3419 // We're all done. 3420 3421 if (_worker_id == 0) { 3422 // let's allow task 0 to do this 3423 if (concurrent()) { 3424 assert(_cm->concurrent_marking_in_progress(), "invariant"); 3425 // we need to set this to false before the next 3426 // safepoint. This way we ensure that the marking phase 3427 // doesn't observe any more heap expansions. 3428 _cm->clear_concurrent_marking_in_progress(); 3429 } 3430 } 3431 3432 // We can now guarantee that the global stack is empty, since 3433 // all other tasks have finished. We separated the guarantees so 3434 // that, if a condition is false, we can immediately find out 3435 // which one. 3436 guarantee(_cm->out_of_regions(), "only way to reach here"); 3437 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 3438 guarantee(_task_queue->size() == 0, "only way to reach here"); 3439 guarantee(!_cm->has_overflown(), "only way to reach here"); 3440 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 3441 } else { 3442 // Apparently there's more work to do. Let's abort this task. It 3443 // will restart it and we can hopefully find more things to do. 3444 set_has_aborted(); 3445 } 3446 } 3447 3448 // Mainly for debugging purposes to make sure that a pointer to the 3449 // closure which was statically allocated in this frame doesn't 3450 // escape it by accident. 3451 set_cm_oop_closure(NULL); 3452 double end_time_ms = os::elapsedVTime() * 1000.0; 3453 double elapsed_time_ms = end_time_ms - _start_time_ms; 3454 // Update the step history. 3455 _step_times_ms.add(elapsed_time_ms); 3456 3457 if (has_aborted()) { 3458 // The task was aborted for some reason. 3459 if (_has_timed_out) { 3460 double diff_ms = elapsed_time_ms - _time_target_ms; 3461 // Keep statistics of how well we did with respect to hitting 3462 // our target only if we actually timed out (if we aborted for 3463 // other reasons, then the results might get skewed). 3464 _marking_step_diffs_ms.add(diff_ms); 3465 } 3466 3467 if (_cm->has_overflown()) { 3468 // This is the interesting one. We aborted because a global 3469 // overflow was raised. This means we have to restart the 3470 // marking phase and start iterating over regions. However, in 3471 // order to do this we have to make sure that all tasks stop 3472 // what they are doing and re-initialize in a safe manner. We 3473 // will achieve this with the use of two barrier sync points. 3474 3475 if (!is_serial) { 3476 // We only need to enter the sync barrier if being called 3477 // from a parallel context 3478 _cm->enter_first_sync_barrier(_worker_id); 3479 3480 // When we exit this sync barrier we know that all tasks have 3481 // stopped doing marking work. So, it's now safe to 3482 // re-initialize our data structures. At the end of this method, 3483 // task 0 will clear the global data structures. 3484 } 3485 3486 // We clear the local state of this task... 3487 clear_region_fields(); 3488 3489 if (!is_serial) { 3490 // ...and enter the second barrier. 3491 _cm->enter_second_sync_barrier(_worker_id); 3492 } 3493 // At this point, if we're during the concurrent phase of 3494 // marking, everything has been re-initialized and we're 3495 // ready to restart. 3496 } 3497 } 3498 3499 _claimed = false; 3500 } 3501 3502 CMTask::CMTask(uint worker_id, 3503 ConcurrentMark* cm, 3504 size_t* marked_bytes, 3505 BitMap* card_bm, 3506 CMTaskQueue* task_queue, 3507 CMTaskQueueSet* task_queues) 3508 : _g1h(G1CollectedHeap::heap()), 3509 _worker_id(worker_id), _cm(cm), 3510 _claimed(false), 3511 _nextMarkBitMap(NULL), _hash_seed(17), 3512 _task_queue(task_queue), 3513 _task_queues(task_queues), 3514 _cm_oop_closure(NULL), 3515 _marked_bytes_array(marked_bytes), 3516 _card_bm(card_bm) { 3517 guarantee(task_queue != NULL, "invariant"); 3518 guarantee(task_queues != NULL, "invariant"); 3519 3520 _marking_step_diffs_ms.add(0.5); 3521 } 3522 3523 // These are formatting macros that are used below to ensure 3524 // consistent formatting. The *_H_* versions are used to format the 3525 // header for a particular value and they should be kept consistent 3526 // with the corresponding macro. Also note that most of the macros add 3527 // the necessary white space (as a prefix) which makes them a bit 3528 // easier to compose. 3529 3530 // All the output lines are prefixed with this string to be able to 3531 // identify them easily in a large log file. 3532 #define G1PPRL_LINE_PREFIX "###" 3533 3534 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 3535 #ifdef _LP64 3536 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 3537 #else // _LP64 3538 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 3539 #endif // _LP64 3540 3541 // For per-region info 3542 #define G1PPRL_TYPE_FORMAT " %-4s" 3543 #define G1PPRL_TYPE_H_FORMAT " %4s" 3544 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 3545 #define G1PPRL_BYTE_H_FORMAT " %9s" 3546 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 3547 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 3548 3549 // For summary info 3550 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 3551 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 3552 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 3553 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 3554 3555 G1PrintRegionLivenessInfoClosure:: 3556 G1PrintRegionLivenessInfoClosure(const char* phase_name) 3557 : _total_used_bytes(0), _total_capacity_bytes(0), 3558 _total_prev_live_bytes(0), _total_next_live_bytes(0), 3559 _hum_used_bytes(0), _hum_capacity_bytes(0), 3560 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 3561 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 3562 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 3563 MemRegion g1_reserved = g1h->g1_reserved(); 3564 double now = os::elapsedTime(); 3565 3566 // Print the header of the output. 3567 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 3568 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP" 3569 G1PPRL_SUM_ADDR_FORMAT("reserved") 3570 G1PPRL_SUM_BYTE_FORMAT("region-size"), 3571 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 3572 HeapRegion::GrainBytes); 3573 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3574 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3575 G1PPRL_TYPE_H_FORMAT 3576 G1PPRL_ADDR_BASE_H_FORMAT 3577 G1PPRL_BYTE_H_FORMAT 3578 G1PPRL_BYTE_H_FORMAT 3579 G1PPRL_BYTE_H_FORMAT 3580 G1PPRL_DOUBLE_H_FORMAT 3581 G1PPRL_BYTE_H_FORMAT 3582 G1PPRL_BYTE_H_FORMAT, 3583 "type", "address-range", 3584 "used", "prev-live", "next-live", "gc-eff", 3585 "remset", "code-roots"); 3586 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3587 G1PPRL_TYPE_H_FORMAT 3588 G1PPRL_ADDR_BASE_H_FORMAT 3589 G1PPRL_BYTE_H_FORMAT 3590 G1PPRL_BYTE_H_FORMAT 3591 G1PPRL_BYTE_H_FORMAT 3592 G1PPRL_DOUBLE_H_FORMAT 3593 G1PPRL_BYTE_H_FORMAT 3594 G1PPRL_BYTE_H_FORMAT, 3595 "", "", 3596 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 3597 "(bytes)", "(bytes)"); 3598 } 3599 3600 // It takes as a parameter a reference to one of the _hum_* fields, it 3601 // deduces the corresponding value for a region in a humongous region 3602 // series (either the region size, or what's left if the _hum_* field 3603 // is < the region size), and updates the _hum_* field accordingly. 3604 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 3605 size_t bytes = 0; 3606 // The > 0 check is to deal with the prev and next live bytes which 3607 // could be 0. 3608 if (*hum_bytes > 0) { 3609 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 3610 *hum_bytes -= bytes; 3611 } 3612 return bytes; 3613 } 3614 3615 // It deduces the values for a region in a humongous region series 3616 // from the _hum_* fields and updates those accordingly. It assumes 3617 // that that _hum_* fields have already been set up from the "starts 3618 // humongous" region and we visit the regions in address order. 3619 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 3620 size_t* capacity_bytes, 3621 size_t* prev_live_bytes, 3622 size_t* next_live_bytes) { 3623 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 3624 *used_bytes = get_hum_bytes(&_hum_used_bytes); 3625 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 3626 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 3627 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 3628 } 3629 3630 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 3631 const char* type = r->get_type_str(); 3632 HeapWord* bottom = r->bottom(); 3633 HeapWord* end = r->end(); 3634 size_t capacity_bytes = r->capacity(); 3635 size_t used_bytes = r->used(); 3636 size_t prev_live_bytes = r->live_bytes(); 3637 size_t next_live_bytes = r->next_live_bytes(); 3638 double gc_eff = r->gc_efficiency(); 3639 size_t remset_bytes = r->rem_set()->mem_size(); 3640 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 3641 3642 if (r->is_starts_humongous()) { 3643 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 3644 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 3645 "they should have been zeroed after the last time we used them"); 3646 // Set up the _hum_* fields. 3647 _hum_capacity_bytes = capacity_bytes; 3648 _hum_used_bytes = used_bytes; 3649 _hum_prev_live_bytes = prev_live_bytes; 3650 _hum_next_live_bytes = next_live_bytes; 3651 get_hum_bytes(&used_bytes, &capacity_bytes, 3652 &prev_live_bytes, &next_live_bytes); 3653 end = bottom + HeapRegion::GrainWords; 3654 } else if (r->is_continues_humongous()) { 3655 get_hum_bytes(&used_bytes, &capacity_bytes, 3656 &prev_live_bytes, &next_live_bytes); 3657 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 3658 } 3659 3660 _total_used_bytes += used_bytes; 3661 _total_capacity_bytes += capacity_bytes; 3662 _total_prev_live_bytes += prev_live_bytes; 3663 _total_next_live_bytes += next_live_bytes; 3664 _total_remset_bytes += remset_bytes; 3665 _total_strong_code_roots_bytes += strong_code_roots_bytes; 3666 3667 // Print a line for this particular region. 3668 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3669 G1PPRL_TYPE_FORMAT 3670 G1PPRL_ADDR_BASE_FORMAT 3671 G1PPRL_BYTE_FORMAT 3672 G1PPRL_BYTE_FORMAT 3673 G1PPRL_BYTE_FORMAT 3674 G1PPRL_DOUBLE_FORMAT 3675 G1PPRL_BYTE_FORMAT 3676 G1PPRL_BYTE_FORMAT, 3677 type, p2i(bottom), p2i(end), 3678 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 3679 remset_bytes, strong_code_roots_bytes); 3680 3681 return false; 3682 } 3683 3684 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 3685 // add static memory usages to remembered set sizes 3686 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3687 // Print the footer of the output. 3688 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3689 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3690 " SUMMARY" 3691 G1PPRL_SUM_MB_FORMAT("capacity") 3692 G1PPRL_SUM_MB_PERC_FORMAT("used") 3693 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3694 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3695 G1PPRL_SUM_MB_FORMAT("remset") 3696 G1PPRL_SUM_MB_FORMAT("code-roots"), 3697 bytes_to_mb(_total_capacity_bytes), 3698 bytes_to_mb(_total_used_bytes), 3699 perc(_total_used_bytes, _total_capacity_bytes), 3700 bytes_to_mb(_total_prev_live_bytes), 3701 perc(_total_prev_live_bytes, _total_capacity_bytes), 3702 bytes_to_mb(_total_next_live_bytes), 3703 perc(_total_next_live_bytes, _total_capacity_bytes), 3704 bytes_to_mb(_total_remset_bytes), 3705 bytes_to_mb(_total_strong_code_roots_bytes)); 3706 }