1 /* 2 * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMark.inline.hpp" 30 #include "gc/g1/concurrentMarkThread.inline.hpp" 31 #include "gc/g1/g1CollectedHeap.inline.hpp" 32 #include "gc/g1/g1CollectorPolicy.hpp" 33 #include "gc/g1/g1CollectorState.hpp" 34 #include "gc/g1/g1ErgoVerbose.hpp" 35 #include "gc/g1/g1Log.hpp" 36 #include "gc/g1/g1OopClosures.inline.hpp" 37 #include "gc/g1/g1RemSet.hpp" 38 #include "gc/g1/g1StringDedup.hpp" 39 #include "gc/g1/heapRegion.inline.hpp" 40 #include "gc/g1/heapRegionManager.inline.hpp" 41 #include "gc/g1/heapRegionRemSet.hpp" 42 #include "gc/g1/heapRegionSet.inline.hpp" 43 #include "gc/g1/suspendibleThreadSet.hpp" 44 #include "gc/shared/gcId.hpp" 45 #include "gc/shared/gcTimer.hpp" 46 #include "gc/shared/gcTrace.hpp" 47 #include "gc/shared/gcTraceTime.hpp" 48 #include "gc/shared/genOopClosures.inline.hpp" 49 #include "gc/shared/referencePolicy.hpp" 50 #include "gc/shared/strongRootsScope.hpp" 51 #include "gc/shared/taskqueue.inline.hpp" 52 #include "gc/shared/vmGCOperations.hpp" 53 #include "memory/allocation.hpp" 54 #include "memory/resourceArea.hpp" 55 #include "oops/oop.inline.hpp" 56 #include "runtime/atomic.inline.hpp" 57 #include "runtime/handles.inline.hpp" 58 #include "runtime/java.hpp" 59 #include "runtime/prefetch.inline.hpp" 60 #include "services/memTracker.hpp" 61 62 // Concurrent marking bit map wrapper 63 64 CMBitMapRO::CMBitMapRO(int shifter) : 65 _bm(), 66 _shifter(shifter) { 67 _bmStartWord = 0; 68 _bmWordSize = 0; 69 } 70 71 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 72 const HeapWord* limit) const { 73 // First we must round addr *up* to a possible object boundary. 74 addr = (HeapWord*)align_size_up((intptr_t)addr, 75 HeapWordSize << _shifter); 76 size_t addrOffset = heapWordToOffset(addr); 77 if (limit == NULL) { 78 limit = _bmStartWord + _bmWordSize; 79 } 80 size_t limitOffset = heapWordToOffset(limit); 81 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 82 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 83 assert(nextAddr >= addr, "get_next_one postcondition"); 84 assert(nextAddr == limit || isMarked(nextAddr), 85 "get_next_one postcondition"); 86 return nextAddr; 87 } 88 89 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr, 90 const HeapWord* limit) const { 91 size_t addrOffset = heapWordToOffset(addr); 92 if (limit == NULL) { 93 limit = _bmStartWord + _bmWordSize; 94 } 95 size_t limitOffset = heapWordToOffset(limit); 96 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 97 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 98 assert(nextAddr >= addr, "get_next_one postcondition"); 99 assert(nextAddr == limit || !isMarked(nextAddr), 100 "get_next_one postcondition"); 101 return nextAddr; 102 } 103 104 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 105 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 106 return (int) (diff >> _shifter); 107 } 108 109 #ifndef PRODUCT 110 bool CMBitMapRO::covers(MemRegion heap_rs) const { 111 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 112 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 113 "size inconsistency"); 114 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 115 _bmWordSize == heap_rs.word_size(); 116 } 117 #endif 118 119 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 120 _bm.print_on_error(st, prefix); 121 } 122 123 size_t CMBitMap::compute_size(size_t heap_size) { 124 return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); 125 } 126 127 size_t CMBitMap::mark_distance() { 128 return MinObjAlignmentInBytes * BitsPerByte; 129 } 130 131 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 132 _bmStartWord = heap.start(); 133 _bmWordSize = heap.word_size(); 134 135 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start()); 136 _bm.set_size(_bmWordSize >> _shifter); 137 138 storage->set_mapping_changed_listener(&_listener); 139 } 140 141 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 142 if (zero_filled) { 143 return; 144 } 145 // We need to clear the bitmap on commit, removing any existing information. 146 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 147 _bm->clearRange(mr); 148 } 149 150 // Closure used for clearing the given mark bitmap. 151 class ClearBitmapHRClosure : public HeapRegionClosure { 152 private: 153 ConcurrentMark* _cm; 154 CMBitMap* _bitmap; 155 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration. 156 public: 157 ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) { 158 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield."); 159 } 160 161 virtual bool doHeapRegion(HeapRegion* r) { 162 size_t const chunk_size_in_words = M / HeapWordSize; 163 164 HeapWord* cur = r->bottom(); 165 HeapWord* const end = r->end(); 166 167 while (cur < end) { 168 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 169 _bitmap->clearRange(mr); 170 171 cur += chunk_size_in_words; 172 173 // Abort iteration if after yielding the marking has been aborted. 174 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) { 175 return true; 176 } 177 // Repeat the asserts from before the start of the closure. We will do them 178 // as asserts here to minimize their overhead on the product. However, we 179 // will have them as guarantees at the beginning / end of the bitmap 180 // clearing to get some checking in the product. 181 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant"); 182 assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 183 } 184 185 return false; 186 } 187 }; 188 189 class ParClearNextMarkBitmapTask : public AbstractGangTask { 190 ClearBitmapHRClosure* _cl; 191 HeapRegionClaimer _hrclaimer; 192 bool _suspendible; // If the task is suspendible, workers must join the STS. 193 194 public: 195 ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) : 196 _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {} 197 198 void work(uint worker_id) { 199 SuspendibleThreadSetJoiner sts_join(_suspendible); 200 G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true); 201 } 202 }; 203 204 void CMBitMap::clearAll() { 205 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 206 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */); 207 uint n_workers = g1h->workers()->active_workers(); 208 ParClearNextMarkBitmapTask task(&cl, n_workers, false); 209 g1h->workers()->run_task(&task); 210 guarantee(cl.complete(), "Must have completed iteration."); 211 return; 212 } 213 214 void CMBitMap::markRange(MemRegion mr) { 215 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 216 assert(!mr.is_empty(), "unexpected empty region"); 217 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 218 ((HeapWord *) mr.end())), 219 "markRange memory region end is not card aligned"); 220 // convert address range into offset range 221 _bm.at_put_range(heapWordToOffset(mr.start()), 222 heapWordToOffset(mr.end()), true); 223 } 224 225 void CMBitMap::clearRange(MemRegion mr) { 226 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 227 assert(!mr.is_empty(), "unexpected empty region"); 228 // convert address range into offset range 229 _bm.at_put_range(heapWordToOffset(mr.start()), 230 heapWordToOffset(mr.end()), false); 231 } 232 233 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 234 HeapWord* end_addr) { 235 HeapWord* start = getNextMarkedWordAddress(addr); 236 start = MIN2(start, end_addr); 237 HeapWord* end = getNextUnmarkedWordAddress(start); 238 end = MIN2(end, end_addr); 239 assert(start <= end, "Consistency check"); 240 MemRegion mr(start, end); 241 if (!mr.is_empty()) { 242 clearRange(mr); 243 } 244 return mr; 245 } 246 247 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 248 _base(NULL), _cm(cm) 249 {} 250 251 bool CMMarkStack::allocate(size_t capacity) { 252 // allocate a stack of the requisite depth 253 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 254 if (!rs.is_reserved()) { 255 warning("ConcurrentMark MarkStack allocation failure"); 256 return false; 257 } 258 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 259 if (!_virtual_space.initialize(rs, rs.size())) { 260 warning("ConcurrentMark MarkStack backing store failure"); 261 // Release the virtual memory reserved for the marking stack 262 rs.release(); 263 return false; 264 } 265 assert(_virtual_space.committed_size() == rs.size(), 266 "Didn't reserve backing store for all of ConcurrentMark stack?"); 267 _base = (oop*) _virtual_space.low(); 268 setEmpty(); 269 _capacity = (jint) capacity; 270 _saved_index = -1; 271 _should_expand = false; 272 return true; 273 } 274 275 void CMMarkStack::expand() { 276 // Called, during remark, if we've overflown the marking stack during marking. 277 assert(isEmpty(), "stack should been emptied while handling overflow"); 278 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 279 // Clear expansion flag 280 _should_expand = false; 281 if (_capacity == (jint) MarkStackSizeMax) { 282 if (PrintGCDetails && Verbose) { 283 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 284 } 285 return; 286 } 287 // Double capacity if possible 288 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 289 // Do not give up existing stack until we have managed to 290 // get the double capacity that we desired. 291 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 292 sizeof(oop))); 293 if (rs.is_reserved()) { 294 // Release the backing store associated with old stack 295 _virtual_space.release(); 296 // Reinitialize virtual space for new stack 297 if (!_virtual_space.initialize(rs, rs.size())) { 298 fatal("Not enough swap for expanded marking stack capacity"); 299 } 300 _base = (oop*)(_virtual_space.low()); 301 _index = 0; 302 _capacity = new_capacity; 303 } else { 304 if (PrintGCDetails && Verbose) { 305 // Failed to double capacity, continue; 306 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 307 SIZE_FORMAT "K to " SIZE_FORMAT "K", 308 _capacity / K, new_capacity / K); 309 } 310 } 311 } 312 313 void CMMarkStack::set_should_expand() { 314 // If we're resetting the marking state because of an 315 // marking stack overflow, record that we should, if 316 // possible, expand the stack. 317 _should_expand = _cm->has_overflown(); 318 } 319 320 CMMarkStack::~CMMarkStack() { 321 if (_base != NULL) { 322 _base = NULL; 323 _virtual_space.release(); 324 } 325 } 326 327 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 328 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 329 jint start = _index; 330 jint next_index = start + n; 331 if (next_index > _capacity) { 332 _overflow = true; 333 return; 334 } 335 // Otherwise. 336 _index = next_index; 337 for (int i = 0; i < n; i++) { 338 int ind = start + i; 339 assert(ind < _capacity, "By overflow test above."); 340 _base[ind] = ptr_arr[i]; 341 } 342 } 343 344 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 345 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 346 jint index = _index; 347 if (index == 0) { 348 *n = 0; 349 return false; 350 } else { 351 int k = MIN2(max, index); 352 jint new_ind = index - k; 353 for (int j = 0; j < k; j++) { 354 ptr_arr[j] = _base[new_ind + j]; 355 } 356 _index = new_ind; 357 *n = k; 358 return true; 359 } 360 } 361 362 void CMMarkStack::note_start_of_gc() { 363 assert(_saved_index == -1, 364 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 365 _saved_index = _index; 366 } 367 368 void CMMarkStack::note_end_of_gc() { 369 // This is intentionally a guarantee, instead of an assert. If we 370 // accidentally add something to the mark stack during GC, it 371 // will be a correctness issue so it's better if we crash. we'll 372 // only check this once per GC anyway, so it won't be a performance 373 // issue in any way. 374 guarantee(_saved_index == _index, 375 "saved index: %d index: %d", _saved_index, _index); 376 _saved_index = -1; 377 } 378 379 CMRootRegions::CMRootRegions() : 380 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 381 _should_abort(false), _next_survivor(NULL) { } 382 383 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 384 _young_list = g1h->young_list(); 385 _cm = cm; 386 } 387 388 void CMRootRegions::prepare_for_scan() { 389 assert(!scan_in_progress(), "pre-condition"); 390 391 // Currently, only survivors can be root regions. 392 assert(_next_survivor == NULL, "pre-condition"); 393 _next_survivor = _young_list->first_survivor_region(); 394 _scan_in_progress = (_next_survivor != NULL); 395 _should_abort = false; 396 } 397 398 HeapRegion* CMRootRegions::claim_next() { 399 if (_should_abort) { 400 // If someone has set the should_abort flag, we return NULL to 401 // force the caller to bail out of their loop. 402 return NULL; 403 } 404 405 // Currently, only survivors can be root regions. 406 HeapRegion* res = _next_survivor; 407 if (res != NULL) { 408 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 409 // Read it again in case it changed while we were waiting for the lock. 410 res = _next_survivor; 411 if (res != NULL) { 412 if (res == _young_list->last_survivor_region()) { 413 // We just claimed the last survivor so store NULL to indicate 414 // that we're done. 415 _next_survivor = NULL; 416 } else { 417 _next_survivor = res->get_next_young_region(); 418 } 419 } else { 420 // Someone else claimed the last survivor while we were trying 421 // to take the lock so nothing else to do. 422 } 423 } 424 assert(res == NULL || res->is_survivor(), "post-condition"); 425 426 return res; 427 } 428 429 void CMRootRegions::scan_finished() { 430 assert(scan_in_progress(), "pre-condition"); 431 432 // Currently, only survivors can be root regions. 433 if (!_should_abort) { 434 assert(_next_survivor == NULL, "we should have claimed all survivors"); 435 } 436 _next_survivor = NULL; 437 438 { 439 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 440 _scan_in_progress = false; 441 RootRegionScan_lock->notify_all(); 442 } 443 } 444 445 bool CMRootRegions::wait_until_scan_finished() { 446 if (!scan_in_progress()) return false; 447 448 { 449 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 450 while (scan_in_progress()) { 451 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 452 } 453 } 454 return true; 455 } 456 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 459 #endif // _MSC_VER 460 461 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 462 return MAX2((n_par_threads + 2) / 4, 1U); 463 } 464 465 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 466 _g1h(g1h), 467 _markBitMap1(), 468 _markBitMap2(), 469 _parallel_marking_threads(0), 470 _max_parallel_marking_threads(0), 471 _sleep_factor(0.0), 472 _marking_task_overhead(1.0), 473 _cleanup_sleep_factor(0.0), 474 _cleanup_task_overhead(1.0), 475 _cleanup_list("Cleanup List"), 476 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 477 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> 478 CardTableModRefBS::card_shift, 479 false /* in_resource_area*/), 480 481 _prevMarkBitMap(&_markBitMap1), 482 _nextMarkBitMap(&_markBitMap2), 483 484 _markStack(this), 485 // _finger set in set_non_marking_state 486 487 _max_worker_id(ParallelGCThreads), 488 // _active_tasks set in set_non_marking_state 489 // _tasks set inside the constructor 490 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 491 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 492 493 _has_overflown(false), 494 _concurrent(false), 495 _has_aborted(false), 496 _restart_for_overflow(false), 497 _concurrent_marking_in_progress(false), 498 499 // _verbose_level set below 500 501 _init_times(), 502 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 503 _cleanup_times(), 504 _total_counting_time(0.0), 505 _total_rs_scrub_time(0.0), 506 507 _parallel_workers(NULL), 508 509 _count_card_bitmaps(NULL), 510 _count_marked_bytes(NULL), 511 _completed_initialization(false) { 512 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 513 if (verbose_level < no_verbose) { 514 verbose_level = no_verbose; 515 } 516 if (verbose_level > high_verbose) { 517 verbose_level = high_verbose; 518 } 519 _verbose_level = verbose_level; 520 521 if (verbose_low()) { 522 gclog_or_tty->print_cr("[global] init, heap start = " PTR_FORMAT ", " 523 "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end)); 524 } 525 526 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 527 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 528 529 // Create & start a ConcurrentMark thread. 530 _cmThread = new ConcurrentMarkThread(this); 531 assert(cmThread() != NULL, "CM Thread should have been created"); 532 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 533 if (_cmThread->osthread() == NULL) { 534 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 535 } 536 537 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 538 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 539 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 540 541 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 542 satb_qs.set_buffer_size(G1SATBBufferSize); 543 544 _root_regions.init(_g1h, this); 545 546 if (ConcGCThreads > ParallelGCThreads) { 547 warning("Can't have more ConcGCThreads (%u) " 548 "than ParallelGCThreads (%u).", 549 ConcGCThreads, ParallelGCThreads); 550 return; 551 } 552 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 553 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 554 // if both are set 555 _sleep_factor = 0.0; 556 _marking_task_overhead = 1.0; 557 } else if (G1MarkingOverheadPercent > 0) { 558 // We will calculate the number of parallel marking threads based 559 // on a target overhead with respect to the soft real-time goal 560 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 561 double overall_cm_overhead = 562 (double) MaxGCPauseMillis * marking_overhead / 563 (double) GCPauseIntervalMillis; 564 double cpu_ratio = 1.0 / (double) os::processor_count(); 565 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 566 double marking_task_overhead = 567 overall_cm_overhead / marking_thread_num * 568 (double) os::processor_count(); 569 double sleep_factor = 570 (1.0 - marking_task_overhead) / marking_task_overhead; 571 572 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 573 _sleep_factor = sleep_factor; 574 _marking_task_overhead = marking_task_overhead; 575 } else { 576 // Calculate the number of parallel marking threads by scaling 577 // the number of parallel GC threads. 578 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 579 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 580 _sleep_factor = 0.0; 581 _marking_task_overhead = 1.0; 582 } 583 584 assert(ConcGCThreads > 0, "Should have been set"); 585 _parallel_marking_threads = ConcGCThreads; 586 _max_parallel_marking_threads = _parallel_marking_threads; 587 588 if (parallel_marking_threads() > 1) { 589 _cleanup_task_overhead = 1.0; 590 } else { 591 _cleanup_task_overhead = marking_task_overhead(); 592 } 593 _cleanup_sleep_factor = 594 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 595 596 #if 0 597 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 598 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 599 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 600 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 601 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 602 #endif 603 604 _parallel_workers = new WorkGang("G1 Marker", 605 _max_parallel_marking_threads, false, true); 606 if (_parallel_workers == NULL) { 607 vm_exit_during_initialization("Failed necessary allocation."); 608 } else { 609 _parallel_workers->initialize_workers(); 610 } 611 612 if (FLAG_IS_DEFAULT(MarkStackSize)) { 613 size_t mark_stack_size = 614 MIN2(MarkStackSizeMax, 615 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 616 // Verify that the calculated value for MarkStackSize is in range. 617 // It would be nice to use the private utility routine from Arguments. 618 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 619 warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 620 "must be between 1 and " SIZE_FORMAT, 621 mark_stack_size, MarkStackSizeMax); 622 return; 623 } 624 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 625 } else { 626 // Verify MarkStackSize is in range. 627 if (FLAG_IS_CMDLINE(MarkStackSize)) { 628 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 629 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 630 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 631 "must be between 1 and " SIZE_FORMAT, 632 MarkStackSize, MarkStackSizeMax); 633 return; 634 } 635 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 636 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 637 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 638 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 639 MarkStackSize, MarkStackSizeMax); 640 return; 641 } 642 } 643 } 644 } 645 646 if (!_markStack.allocate(MarkStackSize)) { 647 warning("Failed to allocate CM marking stack"); 648 return; 649 } 650 651 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 652 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 653 654 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 655 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 656 657 BitMap::idx_t card_bm_size = _card_bm.size(); 658 659 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 660 _active_tasks = _max_worker_id; 661 662 uint max_regions = _g1h->max_regions(); 663 for (uint i = 0; i < _max_worker_id; ++i) { 664 CMTaskQueue* task_queue = new CMTaskQueue(); 665 task_queue->initialize(); 666 _task_queues->register_queue(i, task_queue); 667 668 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 669 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 670 671 _tasks[i] = new CMTask(i, this, 672 _count_marked_bytes[i], 673 &_count_card_bitmaps[i], 674 task_queue, _task_queues); 675 676 _accum_task_vtime[i] = 0.0; 677 } 678 679 // Calculate the card number for the bottom of the heap. Used 680 // in biasing indexes into the accounting card bitmaps. 681 _heap_bottom_card_num = 682 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 683 CardTableModRefBS::card_shift); 684 685 // Clear all the liveness counting data 686 clear_all_count_data(); 687 688 // so that the call below can read a sensible value 689 _heap_start = g1h->reserved_region().start(); 690 set_non_marking_state(); 691 _completed_initialization = true; 692 } 693 694 void ConcurrentMark::reset() { 695 // Starting values for these two. This should be called in a STW 696 // phase. 697 MemRegion reserved = _g1h->g1_reserved(); 698 _heap_start = reserved.start(); 699 _heap_end = reserved.end(); 700 701 // Separated the asserts so that we know which one fires. 702 assert(_heap_start != NULL, "heap bounds should look ok"); 703 assert(_heap_end != NULL, "heap bounds should look ok"); 704 assert(_heap_start < _heap_end, "heap bounds should look ok"); 705 706 // Reset all the marking data structures and any necessary flags 707 reset_marking_state(); 708 709 if (verbose_low()) { 710 gclog_or_tty->print_cr("[global] resetting"); 711 } 712 713 // We do reset all of them, since different phases will use 714 // different number of active threads. So, it's easiest to have all 715 // of them ready. 716 for (uint i = 0; i < _max_worker_id; ++i) { 717 _tasks[i]->reset(_nextMarkBitMap); 718 } 719 720 // we need this to make sure that the flag is on during the evac 721 // pause with initial mark piggy-backed 722 set_concurrent_marking_in_progress(); 723 } 724 725 726 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 727 _markStack.set_should_expand(); 728 _markStack.setEmpty(); // Also clears the _markStack overflow flag 729 if (clear_overflow) { 730 clear_has_overflown(); 731 } else { 732 assert(has_overflown(), "pre-condition"); 733 } 734 _finger = _heap_start; 735 736 for (uint i = 0; i < _max_worker_id; ++i) { 737 CMTaskQueue* queue = _task_queues->queue(i); 738 queue->set_empty(); 739 } 740 } 741 742 void ConcurrentMark::set_concurrency(uint active_tasks) { 743 assert(active_tasks <= _max_worker_id, "we should not have more"); 744 745 _active_tasks = active_tasks; 746 // Need to update the three data structures below according to the 747 // number of active threads for this phase. 748 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 749 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 750 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 751 } 752 753 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 754 set_concurrency(active_tasks); 755 756 _concurrent = concurrent; 757 // We propagate this to all tasks, not just the active ones. 758 for (uint i = 0; i < _max_worker_id; ++i) 759 _tasks[i]->set_concurrent(concurrent); 760 761 if (concurrent) { 762 set_concurrent_marking_in_progress(); 763 } else { 764 // We currently assume that the concurrent flag has been set to 765 // false before we start remark. At this point we should also be 766 // in a STW phase. 767 assert(!concurrent_marking_in_progress(), "invariant"); 768 assert(out_of_regions(), 769 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 770 p2i(_finger), p2i(_heap_end)); 771 } 772 } 773 774 void ConcurrentMark::set_non_marking_state() { 775 // We set the global marking state to some default values when we're 776 // not doing marking. 777 reset_marking_state(); 778 _active_tasks = 0; 779 clear_concurrent_marking_in_progress(); 780 } 781 782 ConcurrentMark::~ConcurrentMark() { 783 // The ConcurrentMark instance is never freed. 784 ShouldNotReachHere(); 785 } 786 787 void ConcurrentMark::clearNextBitmap() { 788 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 789 790 // Make sure that the concurrent mark thread looks to still be in 791 // the current cycle. 792 guarantee(cmThread()->during_cycle(), "invariant"); 793 794 // We are finishing up the current cycle by clearing the next 795 // marking bitmap and getting it ready for the next cycle. During 796 // this time no other cycle can start. So, let's make sure that this 797 // is the case. 798 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 799 800 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */); 801 ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true); 802 _parallel_workers->run_task(&task); 803 804 // Clear the liveness counting data. If the marking has been aborted, the abort() 805 // call already did that. 806 if (cl.complete()) { 807 clear_all_count_data(); 808 } 809 810 // Repeat the asserts from above. 811 guarantee(cmThread()->during_cycle(), "invariant"); 812 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 813 } 814 815 class CheckBitmapClearHRClosure : public HeapRegionClosure { 816 CMBitMap* _bitmap; 817 bool _error; 818 public: 819 CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) { 820 } 821 822 virtual bool doHeapRegion(HeapRegion* r) { 823 // This closure can be called concurrently to the mutator, so we must make sure 824 // that the result of the getNextMarkedWordAddress() call is compared to the 825 // value passed to it as limit to detect any found bits. 826 HeapWord* end = r->end(); 827 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end; 828 } 829 }; 830 831 bool ConcurrentMark::nextMarkBitmapIsClear() { 832 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 833 _g1h->heap_region_iterate(&cl); 834 return cl.complete(); 835 } 836 837 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 838 public: 839 bool doHeapRegion(HeapRegion* r) { 840 r->note_start_of_marking(); 841 return false; 842 } 843 }; 844 845 void ConcurrentMark::checkpointRootsInitialPre() { 846 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 847 G1CollectorPolicy* g1p = g1h->g1_policy(); 848 849 _has_aborted = false; 850 851 // Initialize marking structures. This has to be done in a STW phase. 852 reset(); 853 854 // For each region note start of marking. 855 NoteStartOfMarkHRClosure startcl; 856 g1h->heap_region_iterate(&startcl); 857 } 858 859 860 void ConcurrentMark::checkpointRootsInitialPost() { 861 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 862 863 // If we force an overflow during remark, the remark operation will 864 // actually abort and we'll restart concurrent marking. If we always 865 // force an overflow during remark we'll never actually complete the 866 // marking phase. So, we initialize this here, at the start of the 867 // cycle, so that at the remaining overflow number will decrease at 868 // every remark and we'll eventually not need to cause one. 869 force_overflow_stw()->init(); 870 871 // Start Concurrent Marking weak-reference discovery. 872 ReferenceProcessor* rp = g1h->ref_processor_cm(); 873 // enable ("weak") refs discovery 874 rp->enable_discovery(); 875 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 876 877 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 878 // This is the start of the marking cycle, we're expected all 879 // threads to have SATB queues with active set to false. 880 satb_mq_set.set_active_all_threads(true, /* new active value */ 881 false /* expected_active */); 882 883 _root_regions.prepare_for_scan(); 884 885 // update_g1_committed() will be called at the end of an evac pause 886 // when marking is on. So, it's also called at the end of the 887 // initial-mark pause to update the heap end, if the heap expands 888 // during it. No need to call it here. 889 } 890 891 /* 892 * Notice that in the next two methods, we actually leave the STS 893 * during the barrier sync and join it immediately afterwards. If we 894 * do not do this, the following deadlock can occur: one thread could 895 * be in the barrier sync code, waiting for the other thread to also 896 * sync up, whereas another one could be trying to yield, while also 897 * waiting for the other threads to sync up too. 898 * 899 * Note, however, that this code is also used during remark and in 900 * this case we should not attempt to leave / enter the STS, otherwise 901 * we'll either hit an assert (debug / fastdebug) or deadlock 902 * (product). So we should only leave / enter the STS if we are 903 * operating concurrently. 904 * 905 * Because the thread that does the sync barrier has left the STS, it 906 * is possible to be suspended for a Full GC or an evacuation pause 907 * could occur. This is actually safe, since the entering the sync 908 * barrier is one of the last things do_marking_step() does, and it 909 * doesn't manipulate any data structures afterwards. 910 */ 911 912 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 913 bool barrier_aborted; 914 915 if (verbose_low()) { 916 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id); 917 } 918 919 { 920 SuspendibleThreadSetLeaver sts_leave(concurrent()); 921 barrier_aborted = !_first_overflow_barrier_sync.enter(); 922 } 923 924 // at this point everyone should have synced up and not be doing any 925 // more work 926 927 if (verbose_low()) { 928 if (barrier_aborted) { 929 gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id); 930 } else { 931 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); 932 } 933 } 934 935 if (barrier_aborted) { 936 // If the barrier aborted we ignore the overflow condition and 937 // just abort the whole marking phase as quickly as possible. 938 return; 939 } 940 941 // If we're executing the concurrent phase of marking, reset the marking 942 // state; otherwise the marking state is reset after reference processing, 943 // during the remark pause. 944 // If we reset here as a result of an overflow during the remark we will 945 // see assertion failures from any subsequent set_concurrency_and_phase() 946 // calls. 947 if (concurrent()) { 948 // let the task associated with with worker 0 do this 949 if (worker_id == 0) { 950 // task 0 is responsible for clearing the global data structures 951 // We should be here because of an overflow. During STW we should 952 // not clear the overflow flag since we rely on it being true when 953 // we exit this method to abort the pause and restart concurrent 954 // marking. 955 reset_marking_state(true /* clear_overflow */); 956 force_overflow()->update(); 957 958 if (G1Log::fine()) { 959 gclog_or_tty->gclog_stamp(); 960 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 961 } 962 } 963 } 964 965 // after this, each task should reset its own data structures then 966 // then go into the second barrier 967 } 968 969 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 970 bool barrier_aborted; 971 972 if (verbose_low()) { 973 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id); 974 } 975 976 { 977 SuspendibleThreadSetLeaver sts_leave(concurrent()); 978 barrier_aborted = !_second_overflow_barrier_sync.enter(); 979 } 980 981 // at this point everything should be re-initialized and ready to go 982 983 if (verbose_low()) { 984 if (barrier_aborted) { 985 gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id); 986 } else { 987 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); 988 } 989 } 990 } 991 992 #ifndef PRODUCT 993 void ForceOverflowSettings::init() { 994 _num_remaining = G1ConcMarkForceOverflow; 995 _force = false; 996 update(); 997 } 998 999 void ForceOverflowSettings::update() { 1000 if (_num_remaining > 0) { 1001 _num_remaining -= 1; 1002 _force = true; 1003 } else { 1004 _force = false; 1005 } 1006 } 1007 1008 bool ForceOverflowSettings::should_force() { 1009 if (_force) { 1010 _force = false; 1011 return true; 1012 } else { 1013 return false; 1014 } 1015 } 1016 #endif // !PRODUCT 1017 1018 class CMConcurrentMarkingTask: public AbstractGangTask { 1019 private: 1020 ConcurrentMark* _cm; 1021 ConcurrentMarkThread* _cmt; 1022 1023 public: 1024 void work(uint worker_id) { 1025 assert(Thread::current()->is_ConcurrentGC_thread(), 1026 "this should only be done by a conc GC thread"); 1027 ResourceMark rm; 1028 1029 double start_vtime = os::elapsedVTime(); 1030 1031 { 1032 SuspendibleThreadSetJoiner sts_join; 1033 1034 assert(worker_id < _cm->active_tasks(), "invariant"); 1035 CMTask* the_task = _cm->task(worker_id); 1036 the_task->record_start_time(); 1037 if (!_cm->has_aborted()) { 1038 do { 1039 double start_vtime_sec = os::elapsedVTime(); 1040 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1041 1042 the_task->do_marking_step(mark_step_duration_ms, 1043 true /* do_termination */, 1044 false /* is_serial*/); 1045 1046 double end_vtime_sec = os::elapsedVTime(); 1047 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1048 _cm->clear_has_overflown(); 1049 1050 _cm->do_yield_check(worker_id); 1051 1052 jlong sleep_time_ms; 1053 if (!_cm->has_aborted() && the_task->has_aborted()) { 1054 sleep_time_ms = 1055 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1056 { 1057 SuspendibleThreadSetLeaver sts_leave; 1058 os::sleep(Thread::current(), sleep_time_ms, false); 1059 } 1060 } 1061 } while (!_cm->has_aborted() && the_task->has_aborted()); 1062 } 1063 the_task->record_end_time(); 1064 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1065 } 1066 1067 double end_vtime = os::elapsedVTime(); 1068 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1069 } 1070 1071 CMConcurrentMarkingTask(ConcurrentMark* cm, 1072 ConcurrentMarkThread* cmt) : 1073 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1074 1075 ~CMConcurrentMarkingTask() { } 1076 }; 1077 1078 // Calculates the number of active workers for a concurrent 1079 // phase. 1080 uint ConcurrentMark::calc_parallel_marking_threads() { 1081 uint n_conc_workers = 0; 1082 if (!UseDynamicNumberOfGCThreads || 1083 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1084 !ForceDynamicNumberOfGCThreads)) { 1085 n_conc_workers = max_parallel_marking_threads(); 1086 } else { 1087 n_conc_workers = 1088 AdaptiveSizePolicy::calc_default_active_workers( 1089 max_parallel_marking_threads(), 1090 1, /* Minimum workers */ 1091 parallel_marking_threads(), 1092 Threads::number_of_non_daemon_threads()); 1093 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1094 // that scaling has already gone into "_max_parallel_marking_threads". 1095 } 1096 assert(n_conc_workers > 0, "Always need at least 1"); 1097 return n_conc_workers; 1098 } 1099 1100 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1101 // Currently, only survivors can be root regions. 1102 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1103 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1104 1105 const uintx interval = PrefetchScanIntervalInBytes; 1106 HeapWord* curr = hr->bottom(); 1107 const HeapWord* end = hr->top(); 1108 while (curr < end) { 1109 Prefetch::read(curr, interval); 1110 oop obj = oop(curr); 1111 int size = obj->oop_iterate_size(&cl); 1112 assert(size == obj->size(), "sanity"); 1113 curr += size; 1114 } 1115 } 1116 1117 class CMRootRegionScanTask : public AbstractGangTask { 1118 private: 1119 ConcurrentMark* _cm; 1120 1121 public: 1122 CMRootRegionScanTask(ConcurrentMark* cm) : 1123 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1124 1125 void work(uint worker_id) { 1126 assert(Thread::current()->is_ConcurrentGC_thread(), 1127 "this should only be done by a conc GC thread"); 1128 1129 CMRootRegions* root_regions = _cm->root_regions(); 1130 HeapRegion* hr = root_regions->claim_next(); 1131 while (hr != NULL) { 1132 _cm->scanRootRegion(hr, worker_id); 1133 hr = root_regions->claim_next(); 1134 } 1135 } 1136 }; 1137 1138 void ConcurrentMark::scanRootRegions() { 1139 double scan_start = os::elapsedTime(); 1140 1141 // Start of concurrent marking. 1142 ClassLoaderDataGraph::clear_claimed_marks(); 1143 1144 // scan_in_progress() will have been set to true only if there was 1145 // at least one root region to scan. So, if it's false, we 1146 // should not attempt to do any further work. 1147 if (root_regions()->scan_in_progress()) { 1148 if (G1Log::fine()) { 1149 gclog_or_tty->gclog_stamp(); 1150 gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]"); 1151 } 1152 1153 _parallel_marking_threads = calc_parallel_marking_threads(); 1154 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1155 "Maximum number of marking threads exceeded"); 1156 uint active_workers = MAX2(1U, parallel_marking_threads()); 1157 1158 CMRootRegionScanTask task(this); 1159 _parallel_workers->set_active_workers(active_workers); 1160 _parallel_workers->run_task(&task); 1161 1162 if (G1Log::fine()) { 1163 gclog_or_tty->gclog_stamp(); 1164 gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf secs]", os::elapsedTime() - scan_start); 1165 } 1166 1167 // It's possible that has_aborted() is true here without actually 1168 // aborting the survivor scan earlier. This is OK as it's 1169 // mainly used for sanity checking. 1170 root_regions()->scan_finished(); 1171 } 1172 } 1173 1174 void ConcurrentMark::markFromRoots() { 1175 // we might be tempted to assert that: 1176 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1177 // "inconsistent argument?"); 1178 // However that wouldn't be right, because it's possible that 1179 // a safepoint is indeed in progress as a younger generation 1180 // stop-the-world GC happens even as we mark in this generation. 1181 1182 _restart_for_overflow = false; 1183 force_overflow_conc()->init(); 1184 1185 // _g1h has _n_par_threads 1186 _parallel_marking_threads = calc_parallel_marking_threads(); 1187 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1188 "Maximum number of marking threads exceeded"); 1189 1190 uint active_workers = MAX2(1U, parallel_marking_threads()); 1191 assert(active_workers > 0, "Should have been set"); 1192 1193 // Parallel task terminator is set in "set_concurrency_and_phase()" 1194 set_concurrency_and_phase(active_workers, true /* concurrent */); 1195 1196 CMConcurrentMarkingTask markingTask(this, cmThread()); 1197 _parallel_workers->set_active_workers(active_workers); 1198 _parallel_workers->run_task(&markingTask); 1199 print_stats(); 1200 } 1201 1202 // Helper class to get rid of some boilerplate code. 1203 class G1CMTraceTime : public StackObj { 1204 GCTraceTimeImpl _gc_trace_time; 1205 static bool doit_and_prepend(bool doit) { 1206 if (doit) { 1207 gclog_or_tty->put(' '); 1208 } 1209 return doit; 1210 } 1211 1212 public: 1213 G1CMTraceTime(const char* title, bool doit) 1214 : _gc_trace_time(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm()) { 1215 } 1216 }; 1217 1218 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1219 // world is stopped at this checkpoint 1220 assert(SafepointSynchronize::is_at_safepoint(), 1221 "world should be stopped"); 1222 1223 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1224 1225 // If a full collection has happened, we shouldn't do this. 1226 if (has_aborted()) { 1227 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1228 return; 1229 } 1230 1231 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1232 1233 if (VerifyDuringGC) { 1234 HandleMark hm; // handle scope 1235 g1h->prepare_for_verify(); 1236 Universe::verify(VerifyOption_G1UsePrevMarking, 1237 " VerifyDuringGC:(before)"); 1238 } 1239 g1h->check_bitmaps("Remark Start"); 1240 1241 G1CollectorPolicy* g1p = g1h->g1_policy(); 1242 g1p->record_concurrent_mark_remark_start(); 1243 1244 double start = os::elapsedTime(); 1245 1246 checkpointRootsFinalWork(); 1247 1248 double mark_work_end = os::elapsedTime(); 1249 1250 weakRefsWork(clear_all_soft_refs); 1251 1252 if (has_overflown()) { 1253 // Oops. We overflowed. Restart concurrent marking. 1254 _restart_for_overflow = true; 1255 if (G1TraceMarkStackOverflow) { 1256 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1257 } 1258 1259 // Verify the heap w.r.t. the previous marking bitmap. 1260 if (VerifyDuringGC) { 1261 HandleMark hm; // handle scope 1262 g1h->prepare_for_verify(); 1263 Universe::verify(VerifyOption_G1UsePrevMarking, 1264 " VerifyDuringGC:(overflow)"); 1265 } 1266 1267 // Clear the marking state because we will be restarting 1268 // marking due to overflowing the global mark stack. 1269 reset_marking_state(); 1270 } else { 1271 { 1272 G1CMTraceTime trace("GC aggregate-data", G1Log::finer()); 1273 1274 // Aggregate the per-task counting data that we have accumulated 1275 // while marking. 1276 aggregate_count_data(); 1277 } 1278 1279 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1280 // We're done with marking. 1281 // This is the end of the marking cycle, we're expected all 1282 // threads to have SATB queues with active set to true. 1283 satb_mq_set.set_active_all_threads(false, /* new active value */ 1284 true /* expected_active */); 1285 1286 if (VerifyDuringGC) { 1287 HandleMark hm; // handle scope 1288 g1h->prepare_for_verify(); 1289 Universe::verify(VerifyOption_G1UseNextMarking, 1290 " VerifyDuringGC:(after)"); 1291 } 1292 g1h->check_bitmaps("Remark End"); 1293 assert(!restart_for_overflow(), "sanity"); 1294 // Completely reset the marking state since marking completed 1295 set_non_marking_state(); 1296 } 1297 1298 // Expand the marking stack, if we have to and if we can. 1299 if (_markStack.should_expand()) { 1300 _markStack.expand(); 1301 } 1302 1303 // Statistics 1304 double now = os::elapsedTime(); 1305 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1306 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1307 _remark_times.add((now - start) * 1000.0); 1308 1309 g1p->record_concurrent_mark_remark_end(); 1310 1311 G1CMIsAliveClosure is_alive(g1h); 1312 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1313 } 1314 1315 // Base class of the closures that finalize and verify the 1316 // liveness counting data. 1317 class CMCountDataClosureBase: public HeapRegionClosure { 1318 protected: 1319 G1CollectedHeap* _g1h; 1320 ConcurrentMark* _cm; 1321 CardTableModRefBS* _ct_bs; 1322 1323 BitMap* _region_bm; 1324 BitMap* _card_bm; 1325 1326 // Takes a region that's not empty (i.e., it has at least one 1327 // live object in it and sets its corresponding bit on the region 1328 // bitmap to 1. 1329 void set_bit_for_region(HeapRegion* hr) { 1330 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1331 _region_bm->par_at_put(index, true); 1332 } 1333 1334 public: 1335 CMCountDataClosureBase(G1CollectedHeap* g1h, 1336 BitMap* region_bm, BitMap* card_bm): 1337 _g1h(g1h), _cm(g1h->concurrent_mark()), 1338 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 1339 _region_bm(region_bm), _card_bm(card_bm) { } 1340 }; 1341 1342 // Closure that calculates the # live objects per region. Used 1343 // for verification purposes during the cleanup pause. 1344 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1345 CMBitMapRO* _bm; 1346 size_t _region_marked_bytes; 1347 1348 public: 1349 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1350 BitMap* region_bm, BitMap* card_bm) : 1351 CMCountDataClosureBase(g1h, region_bm, card_bm), 1352 _bm(bm), _region_marked_bytes(0) { } 1353 1354 bool doHeapRegion(HeapRegion* hr) { 1355 HeapWord* ntams = hr->next_top_at_mark_start(); 1356 HeapWord* start = hr->bottom(); 1357 1358 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1359 "Preconditions not met - " 1360 "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT, 1361 p2i(start), p2i(ntams), p2i(hr->end())); 1362 1363 // Find the first marked object at or after "start". 1364 start = _bm->getNextMarkedWordAddress(start, ntams); 1365 1366 size_t marked_bytes = 0; 1367 1368 while (start < ntams) { 1369 oop obj = oop(start); 1370 int obj_sz = obj->size(); 1371 HeapWord* obj_end = start + obj_sz; 1372 1373 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1374 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1375 1376 // Note: if we're looking at the last region in heap - obj_end 1377 // could be actually just beyond the end of the heap; end_idx 1378 // will then correspond to a (non-existent) card that is also 1379 // just beyond the heap. 1380 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1381 // end of object is not card aligned - increment to cover 1382 // all the cards spanned by the object 1383 end_idx += 1; 1384 } 1385 1386 // Set the bits in the card BM for the cards spanned by this object. 1387 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1388 1389 // Add the size of this object to the number of marked bytes. 1390 marked_bytes += (size_t)obj_sz * HeapWordSize; 1391 1392 if (obj_end > hr->end()) { 1393 break; 1394 } 1395 // Find the next marked object after this one. 1396 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1397 } 1398 1399 // Mark the allocated-since-marking portion... 1400 HeapWord* top = hr->top(); 1401 if (ntams < top) { 1402 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1403 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1404 1405 // Note: if we're looking at the last region in heap - top 1406 // could be actually just beyond the end of the heap; end_idx 1407 // will then correspond to a (non-existent) card that is also 1408 // just beyond the heap. 1409 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1410 // end of object is not card aligned - increment to cover 1411 // all the cards spanned by the object 1412 end_idx += 1; 1413 } 1414 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1415 1416 // This definitely means the region has live objects. 1417 set_bit_for_region(hr); 1418 } 1419 1420 // Update the live region bitmap. 1421 if (marked_bytes > 0) { 1422 set_bit_for_region(hr); 1423 } 1424 1425 // Set the marked bytes for the current region so that 1426 // it can be queried by a calling verification routine 1427 _region_marked_bytes = marked_bytes; 1428 1429 return false; 1430 } 1431 1432 size_t region_marked_bytes() const { return _region_marked_bytes; } 1433 }; 1434 1435 // Heap region closure used for verifying the counting data 1436 // that was accumulated concurrently and aggregated during 1437 // the remark pause. This closure is applied to the heap 1438 // regions during the STW cleanup pause. 1439 1440 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1441 G1CollectedHeap* _g1h; 1442 ConcurrentMark* _cm; 1443 CalcLiveObjectsClosure _calc_cl; 1444 BitMap* _region_bm; // Region BM to be verified 1445 BitMap* _card_bm; // Card BM to be verified 1446 bool _verbose; // verbose output? 1447 1448 BitMap* _exp_region_bm; // Expected Region BM values 1449 BitMap* _exp_card_bm; // Expected card BM values 1450 1451 int _failures; 1452 1453 public: 1454 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1455 BitMap* region_bm, 1456 BitMap* card_bm, 1457 BitMap* exp_region_bm, 1458 BitMap* exp_card_bm, 1459 bool verbose) : 1460 _g1h(g1h), _cm(g1h->concurrent_mark()), 1461 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1462 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1463 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1464 _failures(0) { } 1465 1466 int failures() const { return _failures; } 1467 1468 bool doHeapRegion(HeapRegion* hr) { 1469 int failures = 0; 1470 1471 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1472 // this region and set the corresponding bits in the expected region 1473 // and card bitmaps. 1474 bool res = _calc_cl.doHeapRegion(hr); 1475 assert(res == false, "should be continuing"); 1476 1477 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1478 Mutex::_no_safepoint_check_flag); 1479 1480 // Verify the marked bytes for this region. 1481 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1482 size_t act_marked_bytes = hr->next_marked_bytes(); 1483 1484 // We're not OK if expected marked bytes > actual marked bytes. It means 1485 // we have missed accounting some objects during the actual marking. 1486 if (exp_marked_bytes > act_marked_bytes) { 1487 if (_verbose) { 1488 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1489 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1490 hr->hrm_index(), exp_marked_bytes, act_marked_bytes); 1491 } 1492 failures += 1; 1493 } 1494 1495 // Verify the bit, for this region, in the actual and expected 1496 // (which was just calculated) region bit maps. 1497 // We're not OK if the bit in the calculated expected region 1498 // bitmap is set and the bit in the actual region bitmap is not. 1499 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1500 1501 bool expected = _exp_region_bm->at(index); 1502 bool actual = _region_bm->at(index); 1503 if (expected && !actual) { 1504 if (_verbose) { 1505 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1506 "expected: %s, actual: %s", 1507 hr->hrm_index(), 1508 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1509 } 1510 failures += 1; 1511 } 1512 1513 // Verify that the card bit maps for the cards spanned by the current 1514 // region match. We have an error if we have a set bit in the expected 1515 // bit map and the corresponding bit in the actual bitmap is not set. 1516 1517 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1518 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1519 1520 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1521 expected = _exp_card_bm->at(i); 1522 actual = _card_bm->at(i); 1523 1524 if (expected && !actual) { 1525 if (_verbose) { 1526 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1527 "expected: %s, actual: %s", 1528 hr->hrm_index(), i, 1529 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1530 } 1531 failures += 1; 1532 } 1533 } 1534 1535 if (failures > 0 && _verbose) { 1536 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1537 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1538 HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()), 1539 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1540 } 1541 1542 _failures += failures; 1543 1544 // We could stop iteration over the heap when we 1545 // find the first violating region by returning true. 1546 return false; 1547 } 1548 }; 1549 1550 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1551 protected: 1552 G1CollectedHeap* _g1h; 1553 ConcurrentMark* _cm; 1554 BitMap* _actual_region_bm; 1555 BitMap* _actual_card_bm; 1556 1557 uint _n_workers; 1558 1559 BitMap* _expected_region_bm; 1560 BitMap* _expected_card_bm; 1561 1562 int _failures; 1563 bool _verbose; 1564 1565 HeapRegionClaimer _hrclaimer; 1566 1567 public: 1568 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1569 BitMap* region_bm, BitMap* card_bm, 1570 BitMap* expected_region_bm, BitMap* expected_card_bm) 1571 : AbstractGangTask("G1 verify final counting"), 1572 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1573 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1574 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1575 _failures(0), _verbose(false), 1576 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1577 assert(VerifyDuringGC, "don't call this otherwise"); 1578 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1579 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1580 1581 _verbose = _cm->verbose_medium(); 1582 } 1583 1584 void work(uint worker_id) { 1585 assert(worker_id < _n_workers, "invariant"); 1586 1587 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1588 _actual_region_bm, _actual_card_bm, 1589 _expected_region_bm, 1590 _expected_card_bm, 1591 _verbose); 1592 1593 _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer); 1594 1595 Atomic::add(verify_cl.failures(), &_failures); 1596 } 1597 1598 int failures() const { return _failures; } 1599 }; 1600 1601 // Closure that finalizes the liveness counting data. 1602 // Used during the cleanup pause. 1603 // Sets the bits corresponding to the interval [NTAMS, top] 1604 // (which contains the implicitly live objects) in the 1605 // card liveness bitmap. Also sets the bit for each region, 1606 // containing live data, in the region liveness bitmap. 1607 1608 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1609 public: 1610 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1611 BitMap* region_bm, 1612 BitMap* card_bm) : 1613 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1614 1615 bool doHeapRegion(HeapRegion* hr) { 1616 HeapWord* ntams = hr->next_top_at_mark_start(); 1617 HeapWord* top = hr->top(); 1618 1619 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1620 1621 // Mark the allocated-since-marking portion... 1622 if (ntams < top) { 1623 // This definitely means the region has live objects. 1624 set_bit_for_region(hr); 1625 1626 // Now set the bits in the card bitmap for [ntams, top) 1627 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1628 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1629 1630 // Note: if we're looking at the last region in heap - top 1631 // could be actually just beyond the end of the heap; end_idx 1632 // will then correspond to a (non-existent) card that is also 1633 // just beyond the heap. 1634 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1635 // end of object is not card aligned - increment to cover 1636 // all the cards spanned by the object 1637 end_idx += 1; 1638 } 1639 1640 assert(end_idx <= _card_bm->size(), 1641 "oob: end_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1642 end_idx, _card_bm->size()); 1643 assert(start_idx < _card_bm->size(), 1644 "oob: start_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1645 start_idx, _card_bm->size()); 1646 1647 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1648 } 1649 1650 // Set the bit for the region if it contains live data 1651 if (hr->next_marked_bytes() > 0) { 1652 set_bit_for_region(hr); 1653 } 1654 1655 return false; 1656 } 1657 }; 1658 1659 class G1ParFinalCountTask: public AbstractGangTask { 1660 protected: 1661 G1CollectedHeap* _g1h; 1662 ConcurrentMark* _cm; 1663 BitMap* _actual_region_bm; 1664 BitMap* _actual_card_bm; 1665 1666 uint _n_workers; 1667 HeapRegionClaimer _hrclaimer; 1668 1669 public: 1670 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1671 : AbstractGangTask("G1 final counting"), 1672 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1673 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1674 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1675 } 1676 1677 void work(uint worker_id) { 1678 assert(worker_id < _n_workers, "invariant"); 1679 1680 FinalCountDataUpdateClosure final_update_cl(_g1h, 1681 _actual_region_bm, 1682 _actual_card_bm); 1683 1684 _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer); 1685 } 1686 }; 1687 1688 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1689 G1CollectedHeap* _g1; 1690 size_t _freed_bytes; 1691 FreeRegionList* _local_cleanup_list; 1692 HeapRegionSetCount _old_regions_removed; 1693 HeapRegionSetCount _humongous_regions_removed; 1694 HRRSCleanupTask* _hrrs_cleanup_task; 1695 1696 public: 1697 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1698 FreeRegionList* local_cleanup_list, 1699 HRRSCleanupTask* hrrs_cleanup_task) : 1700 _g1(g1), 1701 _freed_bytes(0), 1702 _local_cleanup_list(local_cleanup_list), 1703 _old_regions_removed(), 1704 _humongous_regions_removed(), 1705 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1706 1707 size_t freed_bytes() { return _freed_bytes; } 1708 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; } 1709 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } 1710 1711 bool doHeapRegion(HeapRegion *hr) { 1712 if (hr->is_archive()) { 1713 return false; 1714 } 1715 // We use a claim value of zero here because all regions 1716 // were claimed with value 1 in the FinalCount task. 1717 _g1->reset_gc_time_stamps(hr); 1718 hr->note_end_of_marking(); 1719 1720 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1721 _freed_bytes += hr->used(); 1722 hr->set_containing_set(NULL); 1723 if (hr->is_humongous()) { 1724 _humongous_regions_removed.increment(1u, hr->capacity()); 1725 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1726 } else { 1727 _old_regions_removed.increment(1u, hr->capacity()); 1728 _g1->free_region(hr, _local_cleanup_list, true); 1729 } 1730 } else { 1731 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1732 } 1733 1734 return false; 1735 } 1736 }; 1737 1738 class G1ParNoteEndTask: public AbstractGangTask { 1739 friend class G1NoteEndOfConcMarkClosure; 1740 1741 protected: 1742 G1CollectedHeap* _g1h; 1743 FreeRegionList* _cleanup_list; 1744 HeapRegionClaimer _hrclaimer; 1745 1746 public: 1747 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1748 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1749 } 1750 1751 void work(uint worker_id) { 1752 FreeRegionList local_cleanup_list("Local Cleanup List"); 1753 HRRSCleanupTask hrrs_cleanup_task; 1754 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1755 &hrrs_cleanup_task); 1756 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1757 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1758 1759 // Now update the lists 1760 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1761 { 1762 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1763 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1764 1765 // If we iterate over the global cleanup list at the end of 1766 // cleanup to do this printing we will not guarantee to only 1767 // generate output for the newly-reclaimed regions (the list 1768 // might not be empty at the beginning of cleanup; we might 1769 // still be working on its previous contents). So we do the 1770 // printing here, before we append the new regions to the global 1771 // cleanup list. 1772 1773 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1774 if (hr_printer->is_active()) { 1775 FreeRegionListIterator iter(&local_cleanup_list); 1776 while (iter.more_available()) { 1777 HeapRegion* hr = iter.get_next(); 1778 hr_printer->cleanup(hr); 1779 } 1780 } 1781 1782 _cleanup_list->add_ordered(&local_cleanup_list); 1783 assert(local_cleanup_list.is_empty(), "post-condition"); 1784 1785 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1786 } 1787 } 1788 }; 1789 1790 class G1ParScrubRemSetTask: public AbstractGangTask { 1791 protected: 1792 G1RemSet* _g1rs; 1793 BitMap* _region_bm; 1794 BitMap* _card_bm; 1795 HeapRegionClaimer _hrclaimer; 1796 1797 public: 1798 G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) : 1799 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) { 1800 } 1801 1802 void work(uint worker_id) { 1803 _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer); 1804 } 1805 1806 }; 1807 1808 void ConcurrentMark::cleanup() { 1809 // world is stopped at this checkpoint 1810 assert(SafepointSynchronize::is_at_safepoint(), 1811 "world should be stopped"); 1812 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1813 1814 // If a full collection has happened, we shouldn't do this. 1815 if (has_aborted()) { 1816 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1817 return; 1818 } 1819 1820 g1h->verify_region_sets_optional(); 1821 1822 if (VerifyDuringGC) { 1823 HandleMark hm; // handle scope 1824 g1h->prepare_for_verify(); 1825 Universe::verify(VerifyOption_G1UsePrevMarking, 1826 " VerifyDuringGC:(before)"); 1827 } 1828 g1h->check_bitmaps("Cleanup Start"); 1829 1830 G1CollectorPolicy* g1p = g1h->g1_policy(); 1831 g1p->record_concurrent_mark_cleanup_start(); 1832 1833 double start = os::elapsedTime(); 1834 1835 HeapRegionRemSet::reset_for_cleanup_tasks(); 1836 1837 // Do counting once more with the world stopped for good measure. 1838 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1839 1840 g1h->workers()->run_task(&g1_par_count_task); 1841 1842 if (VerifyDuringGC) { 1843 // Verify that the counting data accumulated during marking matches 1844 // that calculated by walking the marking bitmap. 1845 1846 // Bitmaps to hold expected values 1847 BitMap expected_region_bm(_region_bm.size(), true); 1848 BitMap expected_card_bm(_card_bm.size(), true); 1849 1850 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1851 &_region_bm, 1852 &_card_bm, 1853 &expected_region_bm, 1854 &expected_card_bm); 1855 1856 g1h->workers()->run_task(&g1_par_verify_task); 1857 1858 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1859 } 1860 1861 size_t start_used_bytes = g1h->used(); 1862 g1h->collector_state()->set_mark_in_progress(false); 1863 1864 double count_end = os::elapsedTime(); 1865 double this_final_counting_time = (count_end - start); 1866 _total_counting_time += this_final_counting_time; 1867 1868 if (G1PrintRegionLivenessInfo) { 1869 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1870 _g1h->heap_region_iterate(&cl); 1871 } 1872 1873 // Install newly created mark bitMap as "prev". 1874 swapMarkBitMaps(); 1875 1876 g1h->reset_gc_time_stamp(); 1877 1878 uint n_workers = _g1h->workers()->active_workers(); 1879 1880 // Note end of marking in all heap regions. 1881 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1882 g1h->workers()->run_task(&g1_par_note_end_task); 1883 g1h->check_gc_time_stamps(); 1884 1885 if (!cleanup_list_is_empty()) { 1886 // The cleanup list is not empty, so we'll have to process it 1887 // concurrently. Notify anyone else that might be wanting free 1888 // regions that there will be more free regions coming soon. 1889 g1h->set_free_regions_coming(); 1890 } 1891 1892 // call below, since it affects the metric by which we sort the heap 1893 // regions. 1894 if (G1ScrubRemSets) { 1895 double rs_scrub_start = os::elapsedTime(); 1896 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers); 1897 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1898 1899 double rs_scrub_end = os::elapsedTime(); 1900 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1901 _total_rs_scrub_time += this_rs_scrub_time; 1902 } 1903 1904 // this will also free any regions totally full of garbage objects, 1905 // and sort the regions. 1906 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1907 1908 // Statistics. 1909 double end = os::elapsedTime(); 1910 _cleanup_times.add((end - start) * 1000.0); 1911 1912 if (G1Log::fine()) { 1913 g1h->g1_policy()->print_heap_transition(start_used_bytes); 1914 } 1915 1916 // Clean up will have freed any regions completely full of garbage. 1917 // Update the soft reference policy with the new heap occupancy. 1918 Universe::update_heap_info_at_gc(); 1919 1920 if (VerifyDuringGC) { 1921 HandleMark hm; // handle scope 1922 g1h->prepare_for_verify(); 1923 Universe::verify(VerifyOption_G1UsePrevMarking, 1924 " VerifyDuringGC:(after)"); 1925 } 1926 1927 g1h->check_bitmaps("Cleanup End"); 1928 1929 g1h->verify_region_sets_optional(); 1930 1931 // We need to make this be a "collection" so any collection pause that 1932 // races with it goes around and waits for completeCleanup to finish. 1933 g1h->increment_total_collections(); 1934 1935 // Clean out dead classes and update Metaspace sizes. 1936 if (ClassUnloadingWithConcurrentMark) { 1937 ClassLoaderDataGraph::purge(); 1938 } 1939 MetaspaceGC::compute_new_size(); 1940 1941 // We reclaimed old regions so we should calculate the sizes to make 1942 // sure we update the old gen/space data. 1943 g1h->g1mm()->update_sizes(); 1944 g1h->allocation_context_stats().update_after_mark(); 1945 1946 g1h->trace_heap_after_concurrent_cycle(); 1947 } 1948 1949 void ConcurrentMark::completeCleanup() { 1950 if (has_aborted()) return; 1951 1952 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1953 1954 _cleanup_list.verify_optional(); 1955 FreeRegionList tmp_free_list("Tmp Free List"); 1956 1957 if (G1ConcRegionFreeingVerbose) { 1958 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1959 "cleanup list has %u entries", 1960 _cleanup_list.length()); 1961 } 1962 1963 // No one else should be accessing the _cleanup_list at this point, 1964 // so it is not necessary to take any locks 1965 while (!_cleanup_list.is_empty()) { 1966 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 1967 assert(hr != NULL, "Got NULL from a non-empty list"); 1968 hr->par_clear(); 1969 tmp_free_list.add_ordered(hr); 1970 1971 // Instead of adding one region at a time to the secondary_free_list, 1972 // we accumulate them in the local list and move them a few at a 1973 // time. This also cuts down on the number of notify_all() calls 1974 // we do during this process. We'll also append the local list when 1975 // _cleanup_list is empty (which means we just removed the last 1976 // region from the _cleanup_list). 1977 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1978 _cleanup_list.is_empty()) { 1979 if (G1ConcRegionFreeingVerbose) { 1980 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1981 "appending %u entries to the secondary_free_list, " 1982 "cleanup list still has %u entries", 1983 tmp_free_list.length(), 1984 _cleanup_list.length()); 1985 } 1986 1987 { 1988 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1989 g1h->secondary_free_list_add(&tmp_free_list); 1990 SecondaryFreeList_lock->notify_all(); 1991 } 1992 #ifndef PRODUCT 1993 if (G1StressConcRegionFreeing) { 1994 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1995 os::sleep(Thread::current(), (jlong) 1, false); 1996 } 1997 } 1998 #endif 1999 } 2000 } 2001 assert(tmp_free_list.is_empty(), "post-condition"); 2002 } 2003 2004 // Supporting Object and Oop closures for reference discovery 2005 // and processing in during marking 2006 2007 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2008 HeapWord* addr = (HeapWord*)obj; 2009 return addr != NULL && 2010 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2011 } 2012 2013 // 'Keep Alive' oop closure used by both serial parallel reference processing. 2014 // Uses the CMTask associated with a worker thread (for serial reference 2015 // processing the CMTask for worker 0 is used) to preserve (mark) and 2016 // trace referent objects. 2017 // 2018 // Using the CMTask and embedded local queues avoids having the worker 2019 // threads operating on the global mark stack. This reduces the risk 2020 // of overflowing the stack - which we would rather avoid at this late 2021 // state. Also using the tasks' local queues removes the potential 2022 // of the workers interfering with each other that could occur if 2023 // operating on the global stack. 2024 2025 class G1CMKeepAliveAndDrainClosure: public OopClosure { 2026 ConcurrentMark* _cm; 2027 CMTask* _task; 2028 int _ref_counter_limit; 2029 int _ref_counter; 2030 bool _is_serial; 2031 public: 2032 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2033 _cm(cm), _task(task), _is_serial(is_serial), 2034 _ref_counter_limit(G1RefProcDrainInterval) { 2035 assert(_ref_counter_limit > 0, "sanity"); 2036 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2037 _ref_counter = _ref_counter_limit; 2038 } 2039 2040 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2041 virtual void do_oop( oop* p) { do_oop_work(p); } 2042 2043 template <class T> void do_oop_work(T* p) { 2044 if (!_cm->has_overflown()) { 2045 oop obj = oopDesc::load_decode_heap_oop(p); 2046 if (_cm->verbose_high()) { 2047 gclog_or_tty->print_cr("\t[%u] we're looking at location " 2048 "*" PTR_FORMAT " = " PTR_FORMAT, 2049 _task->worker_id(), p2i(p), p2i((void*) obj)); 2050 } 2051 2052 _task->deal_with_reference(obj); 2053 _ref_counter--; 2054 2055 if (_ref_counter == 0) { 2056 // We have dealt with _ref_counter_limit references, pushing them 2057 // and objects reachable from them on to the local stack (and 2058 // possibly the global stack). Call CMTask::do_marking_step() to 2059 // process these entries. 2060 // 2061 // We call CMTask::do_marking_step() in a loop, which we'll exit if 2062 // there's nothing more to do (i.e. we're done with the entries that 2063 // were pushed as a result of the CMTask::deal_with_reference() calls 2064 // above) or we overflow. 2065 // 2066 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2067 // flag while there may still be some work to do. (See the comment at 2068 // the beginning of CMTask::do_marking_step() for those conditions - 2069 // one of which is reaching the specified time target.) It is only 2070 // when CMTask::do_marking_step() returns without setting the 2071 // has_aborted() flag that the marking step has completed. 2072 do { 2073 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2074 _task->do_marking_step(mark_step_duration_ms, 2075 false /* do_termination */, 2076 _is_serial); 2077 } while (_task->has_aborted() && !_cm->has_overflown()); 2078 _ref_counter = _ref_counter_limit; 2079 } 2080 } else { 2081 if (_cm->verbose_high()) { 2082 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id()); 2083 } 2084 } 2085 } 2086 }; 2087 2088 // 'Drain' oop closure used by both serial and parallel reference processing. 2089 // Uses the CMTask associated with a given worker thread (for serial 2090 // reference processing the CMtask for worker 0 is used). Calls the 2091 // do_marking_step routine, with an unbelievably large timeout value, 2092 // to drain the marking data structures of the remaining entries 2093 // added by the 'keep alive' oop closure above. 2094 2095 class G1CMDrainMarkingStackClosure: public VoidClosure { 2096 ConcurrentMark* _cm; 2097 CMTask* _task; 2098 bool _is_serial; 2099 public: 2100 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2101 _cm(cm), _task(task), _is_serial(is_serial) { 2102 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2103 } 2104 2105 void do_void() { 2106 do { 2107 if (_cm->verbose_high()) { 2108 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s", 2109 _task->worker_id(), BOOL_TO_STR(_is_serial)); 2110 } 2111 2112 // We call CMTask::do_marking_step() to completely drain the local 2113 // and global marking stacks of entries pushed by the 'keep alive' 2114 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2115 // 2116 // CMTask::do_marking_step() is called in a loop, which we'll exit 2117 // if there's nothing more to do (i.e. we've completely drained the 2118 // entries that were pushed as a a result of applying the 'keep alive' 2119 // closure to the entries on the discovered ref lists) or we overflow 2120 // the global marking stack. 2121 // 2122 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2123 // flag while there may still be some work to do. (See the comment at 2124 // the beginning of CMTask::do_marking_step() for those conditions - 2125 // one of which is reaching the specified time target.) It is only 2126 // when CMTask::do_marking_step() returns without setting the 2127 // has_aborted() flag that the marking step has completed. 2128 2129 _task->do_marking_step(1000000000.0 /* something very large */, 2130 true /* do_termination */, 2131 _is_serial); 2132 } while (_task->has_aborted() && !_cm->has_overflown()); 2133 } 2134 }; 2135 2136 // Implementation of AbstractRefProcTaskExecutor for parallel 2137 // reference processing at the end of G1 concurrent marking 2138 2139 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2140 private: 2141 G1CollectedHeap* _g1h; 2142 ConcurrentMark* _cm; 2143 WorkGang* _workers; 2144 uint _active_workers; 2145 2146 public: 2147 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2148 ConcurrentMark* cm, 2149 WorkGang* workers, 2150 uint n_workers) : 2151 _g1h(g1h), _cm(cm), 2152 _workers(workers), _active_workers(n_workers) { } 2153 2154 // Executes the given task using concurrent marking worker threads. 2155 virtual void execute(ProcessTask& task); 2156 virtual void execute(EnqueueTask& task); 2157 }; 2158 2159 class G1CMRefProcTaskProxy: public AbstractGangTask { 2160 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2161 ProcessTask& _proc_task; 2162 G1CollectedHeap* _g1h; 2163 ConcurrentMark* _cm; 2164 2165 public: 2166 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2167 G1CollectedHeap* g1h, 2168 ConcurrentMark* cm) : 2169 AbstractGangTask("Process reference objects in parallel"), 2170 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2171 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2172 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2173 } 2174 2175 virtual void work(uint worker_id) { 2176 ResourceMark rm; 2177 HandleMark hm; 2178 CMTask* task = _cm->task(worker_id); 2179 G1CMIsAliveClosure g1_is_alive(_g1h); 2180 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2181 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2182 2183 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2184 } 2185 }; 2186 2187 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2188 assert(_workers != NULL, "Need parallel worker threads."); 2189 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2190 2191 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2192 2193 // We need to reset the concurrency level before each 2194 // proxy task execution, so that the termination protocol 2195 // and overflow handling in CMTask::do_marking_step() knows 2196 // how many workers to wait for. 2197 _cm->set_concurrency(_active_workers); 2198 _workers->run_task(&proc_task_proxy); 2199 } 2200 2201 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2202 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2203 EnqueueTask& _enq_task; 2204 2205 public: 2206 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2207 AbstractGangTask("Enqueue reference objects in parallel"), 2208 _enq_task(enq_task) { } 2209 2210 virtual void work(uint worker_id) { 2211 _enq_task.work(worker_id); 2212 } 2213 }; 2214 2215 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2216 assert(_workers != NULL, "Need parallel worker threads."); 2217 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2218 2219 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2220 2221 // Not strictly necessary but... 2222 // 2223 // We need to reset the concurrency level before each 2224 // proxy task execution, so that the termination protocol 2225 // and overflow handling in CMTask::do_marking_step() knows 2226 // how many workers to wait for. 2227 _cm->set_concurrency(_active_workers); 2228 _workers->run_task(&enq_task_proxy); 2229 } 2230 2231 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 2232 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 2233 } 2234 2235 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2236 if (has_overflown()) { 2237 // Skip processing the discovered references if we have 2238 // overflown the global marking stack. Reference objects 2239 // only get discovered once so it is OK to not 2240 // de-populate the discovered reference lists. We could have, 2241 // but the only benefit would be that, when marking restarts, 2242 // less reference objects are discovered. 2243 return; 2244 } 2245 2246 ResourceMark rm; 2247 HandleMark hm; 2248 2249 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2250 2251 // Is alive closure. 2252 G1CMIsAliveClosure g1_is_alive(g1h); 2253 2254 // Inner scope to exclude the cleaning of the string and symbol 2255 // tables from the displayed time. 2256 { 2257 G1CMTraceTime t("GC ref-proc", G1Log::finer()); 2258 2259 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2260 2261 // See the comment in G1CollectedHeap::ref_processing_init() 2262 // about how reference processing currently works in G1. 2263 2264 // Set the soft reference policy 2265 rp->setup_policy(clear_all_soft_refs); 2266 assert(_markStack.isEmpty(), "mark stack should be empty"); 2267 2268 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2269 // in serial reference processing. Note these closures are also 2270 // used for serially processing (by the the current thread) the 2271 // JNI references during parallel reference processing. 2272 // 2273 // These closures do not need to synchronize with the worker 2274 // threads involved in parallel reference processing as these 2275 // instances are executed serially by the current thread (e.g. 2276 // reference processing is not multi-threaded and is thus 2277 // performed by the current thread instead of a gang worker). 2278 // 2279 // The gang tasks involved in parallel reference processing create 2280 // their own instances of these closures, which do their own 2281 // synchronization among themselves. 2282 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2283 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2284 2285 // We need at least one active thread. If reference processing 2286 // is not multi-threaded we use the current (VMThread) thread, 2287 // otherwise we use the work gang from the G1CollectedHeap and 2288 // we utilize all the worker threads we can. 2289 bool processing_is_mt = rp->processing_is_mt(); 2290 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2291 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2292 2293 // Parallel processing task executor. 2294 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2295 g1h->workers(), active_workers); 2296 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2297 2298 // Set the concurrency level. The phase was already set prior to 2299 // executing the remark task. 2300 set_concurrency(active_workers); 2301 2302 // Set the degree of MT processing here. If the discovery was done MT, 2303 // the number of threads involved during discovery could differ from 2304 // the number of active workers. This is OK as long as the discovered 2305 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2306 rp->set_active_mt_degree(active_workers); 2307 2308 // Process the weak references. 2309 const ReferenceProcessorStats& stats = 2310 rp->process_discovered_references(&g1_is_alive, 2311 &g1_keep_alive, 2312 &g1_drain_mark_stack, 2313 executor, 2314 g1h->gc_timer_cm()); 2315 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2316 2317 // The do_oop work routines of the keep_alive and drain_marking_stack 2318 // oop closures will set the has_overflown flag if we overflow the 2319 // global marking stack. 2320 2321 assert(_markStack.overflow() || _markStack.isEmpty(), 2322 "mark stack should be empty (unless it overflowed)"); 2323 2324 if (_markStack.overflow()) { 2325 // This should have been done already when we tried to push an 2326 // entry on to the global mark stack. But let's do it again. 2327 set_has_overflown(); 2328 } 2329 2330 assert(rp->num_q() == active_workers, "why not"); 2331 2332 rp->enqueue_discovered_references(executor); 2333 2334 rp->verify_no_references_recorded(); 2335 assert(!rp->discovery_enabled(), "Post condition"); 2336 } 2337 2338 if (has_overflown()) { 2339 // We can not trust g1_is_alive if the marking stack overflowed 2340 return; 2341 } 2342 2343 assert(_markStack.isEmpty(), "Marking should have completed"); 2344 2345 // Unload Klasses, String, Symbols, Code Cache, etc. 2346 { 2347 G1CMTraceTime trace("Unloading", G1Log::finer()); 2348 2349 if (ClassUnloadingWithConcurrentMark) { 2350 bool purged_classes; 2351 2352 { 2353 G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest()); 2354 purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */); 2355 } 2356 2357 { 2358 G1CMTraceTime trace("Parallel Unloading", G1Log::finest()); 2359 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2360 } 2361 } 2362 2363 if (G1StringDedup::is_enabled()) { 2364 G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest()); 2365 G1StringDedup::unlink(&g1_is_alive); 2366 } 2367 } 2368 } 2369 2370 void ConcurrentMark::swapMarkBitMaps() { 2371 CMBitMapRO* temp = _prevMarkBitMap; 2372 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2373 _nextMarkBitMap = (CMBitMap*) temp; 2374 } 2375 2376 // Closure for marking entries in SATB buffers. 2377 class CMSATBBufferClosure : public SATBBufferClosure { 2378 private: 2379 CMTask* _task; 2380 G1CollectedHeap* _g1h; 2381 2382 // This is very similar to CMTask::deal_with_reference, but with 2383 // more relaxed requirements for the argument, so this must be more 2384 // circumspect about treating the argument as an object. 2385 void do_entry(void* entry) const { 2386 _task->increment_refs_reached(); 2387 HeapRegion* hr = _g1h->heap_region_containing(entry); 2388 if (entry < hr->next_top_at_mark_start()) { 2389 // Until we get here, we don't know whether entry refers to a valid 2390 // object; it could instead have been a stale reference. 2391 oop obj = static_cast<oop>(entry); 2392 assert(obj->is_oop(true /* ignore mark word */), 2393 "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)); 2394 _task->make_reference_grey(obj, hr); 2395 } 2396 } 2397 2398 public: 2399 CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h) 2400 : _task(task), _g1h(g1h) { } 2401 2402 virtual void do_buffer(void** buffer, size_t size) { 2403 for (size_t i = 0; i < size; ++i) { 2404 do_entry(buffer[i]); 2405 } 2406 } 2407 }; 2408 2409 class G1RemarkThreadsClosure : public ThreadClosure { 2410 CMSATBBufferClosure _cm_satb_cl; 2411 G1CMOopClosure _cm_cl; 2412 MarkingCodeBlobClosure _code_cl; 2413 int _thread_parity; 2414 2415 public: 2416 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) : 2417 _cm_satb_cl(task, g1h), 2418 _cm_cl(g1h, g1h->concurrent_mark(), task), 2419 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2420 _thread_parity(Threads::thread_claim_parity()) {} 2421 2422 void do_thread(Thread* thread) { 2423 if (thread->is_Java_thread()) { 2424 if (thread->claim_oops_do(true, _thread_parity)) { 2425 JavaThread* jt = (JavaThread*)thread; 2426 2427 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2428 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2429 // * Alive if on the stack of an executing method 2430 // * Weakly reachable otherwise 2431 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2432 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2433 jt->nmethods_do(&_code_cl); 2434 2435 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 2436 } 2437 } else if (thread->is_VM_thread()) { 2438 if (thread->claim_oops_do(true, _thread_parity)) { 2439 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 2440 } 2441 } 2442 } 2443 }; 2444 2445 class CMRemarkTask: public AbstractGangTask { 2446 private: 2447 ConcurrentMark* _cm; 2448 public: 2449 void work(uint worker_id) { 2450 // Since all available tasks are actually started, we should 2451 // only proceed if we're supposed to be active. 2452 if (worker_id < _cm->active_tasks()) { 2453 CMTask* task = _cm->task(worker_id); 2454 task->record_start_time(); 2455 { 2456 ResourceMark rm; 2457 HandleMark hm; 2458 2459 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 2460 Threads::threads_do(&threads_f); 2461 } 2462 2463 do { 2464 task->do_marking_step(1000000000.0 /* something very large */, 2465 true /* do_termination */, 2466 false /* is_serial */); 2467 } while (task->has_aborted() && !_cm->has_overflown()); 2468 // If we overflow, then we do not want to restart. We instead 2469 // want to abort remark and do concurrent marking again. 2470 task->record_end_time(); 2471 } 2472 } 2473 2474 CMRemarkTask(ConcurrentMark* cm, uint active_workers) : 2475 AbstractGangTask("Par Remark"), _cm(cm) { 2476 _cm->terminator()->reset_for_reuse(active_workers); 2477 } 2478 }; 2479 2480 void ConcurrentMark::checkpointRootsFinalWork() { 2481 ResourceMark rm; 2482 HandleMark hm; 2483 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2484 2485 G1CMTraceTime trace("Finalize Marking", G1Log::finer()); 2486 2487 g1h->ensure_parsability(false); 2488 2489 // this is remark, so we'll use up all active threads 2490 uint active_workers = g1h->workers()->active_workers(); 2491 set_concurrency_and_phase(active_workers, false /* concurrent */); 2492 // Leave _parallel_marking_threads at it's 2493 // value originally calculated in the ConcurrentMark 2494 // constructor and pass values of the active workers 2495 // through the gang in the task. 2496 2497 { 2498 StrongRootsScope srs(active_workers); 2499 2500 CMRemarkTask remarkTask(this, active_workers); 2501 // We will start all available threads, even if we decide that the 2502 // active_workers will be fewer. The extra ones will just bail out 2503 // immediately. 2504 g1h->workers()->run_task(&remarkTask); 2505 } 2506 2507 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2508 guarantee(has_overflown() || 2509 satb_mq_set.completed_buffers_num() == 0, 2510 "Invariant: has_overflown = %s, num buffers = %d", 2511 BOOL_TO_STR(has_overflown()), 2512 satb_mq_set.completed_buffers_num()); 2513 2514 print_stats(); 2515 } 2516 2517 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2518 // Note we are overriding the read-only view of the prev map here, via 2519 // the cast. 2520 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2521 } 2522 2523 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2524 _nextMarkBitMap->clearRange(mr); 2525 } 2526 2527 HeapRegion* 2528 ConcurrentMark::claim_region(uint worker_id) { 2529 // "checkpoint" the finger 2530 HeapWord* finger = _finger; 2531 2532 // _heap_end will not change underneath our feet; it only changes at 2533 // yield points. 2534 while (finger < _heap_end) { 2535 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2536 2537 // Note on how this code handles humongous regions. In the 2538 // normal case the finger will reach the start of a "starts 2539 // humongous" (SH) region. Its end will either be the end of the 2540 // last "continues humongous" (CH) region in the sequence, or the 2541 // standard end of the SH region (if the SH is the only region in 2542 // the sequence). That way claim_region() will skip over the CH 2543 // regions. However, there is a subtle race between a CM thread 2544 // executing this method and a mutator thread doing a humongous 2545 // object allocation. The two are not mutually exclusive as the CM 2546 // thread does not need to hold the Heap_lock when it gets 2547 // here. So there is a chance that claim_region() will come across 2548 // a free region that's in the progress of becoming a SH or a CH 2549 // region. In the former case, it will either 2550 // a) Miss the update to the region's end, in which case it will 2551 // visit every subsequent CH region, will find their bitmaps 2552 // empty, and do nothing, or 2553 // b) Will observe the update of the region's end (in which case 2554 // it will skip the subsequent CH regions). 2555 // If it comes across a region that suddenly becomes CH, the 2556 // scenario will be similar to b). So, the race between 2557 // claim_region() and a humongous object allocation might force us 2558 // to do a bit of unnecessary work (due to some unnecessary bitmap 2559 // iterations) but it should not introduce and correctness issues. 2560 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 2561 2562 // Above heap_region_containing may return NULL as we always scan claim 2563 // until the end of the heap. In this case, just jump to the next region. 2564 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 2565 2566 // Is the gap between reading the finger and doing the CAS too long? 2567 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2568 if (res == finger && curr_region != NULL) { 2569 // we succeeded 2570 HeapWord* bottom = curr_region->bottom(); 2571 HeapWord* limit = curr_region->next_top_at_mark_start(); 2572 2573 if (verbose_low()) { 2574 gclog_or_tty->print_cr("[%u] curr_region = " PTR_FORMAT " " 2575 "[" PTR_FORMAT ", " PTR_FORMAT "), " 2576 "limit = " PTR_FORMAT, 2577 worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit)); 2578 } 2579 2580 // notice that _finger == end cannot be guaranteed here since, 2581 // someone else might have moved the finger even further 2582 assert(_finger >= end, "the finger should have moved forward"); 2583 2584 if (verbose_low()) { 2585 gclog_or_tty->print_cr("[%u] we were successful with region = " 2586 PTR_FORMAT, worker_id, p2i(curr_region)); 2587 } 2588 2589 if (limit > bottom) { 2590 if (verbose_low()) { 2591 gclog_or_tty->print_cr("[%u] region " PTR_FORMAT " is not empty, " 2592 "returning it ", worker_id, p2i(curr_region)); 2593 } 2594 return curr_region; 2595 } else { 2596 assert(limit == bottom, 2597 "the region limit should be at bottom"); 2598 if (verbose_low()) { 2599 gclog_or_tty->print_cr("[%u] region " PTR_FORMAT " is empty, " 2600 "returning NULL", worker_id, p2i(curr_region)); 2601 } 2602 // we return NULL and the caller should try calling 2603 // claim_region() again. 2604 return NULL; 2605 } 2606 } else { 2607 assert(_finger > finger, "the finger should have moved forward"); 2608 if (verbose_low()) { 2609 if (curr_region == NULL) { 2610 gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, " 2611 "global finger = " PTR_FORMAT ", " 2612 "our finger = " PTR_FORMAT, 2613 worker_id, p2i(_finger), p2i(finger)); 2614 } else { 2615 gclog_or_tty->print_cr("[%u] somebody else moved the finger, " 2616 "global finger = " PTR_FORMAT ", " 2617 "our finger = " PTR_FORMAT, 2618 worker_id, p2i(_finger), p2i(finger)); 2619 } 2620 } 2621 2622 // read it again 2623 finger = _finger; 2624 } 2625 } 2626 2627 return NULL; 2628 } 2629 2630 #ifndef PRODUCT 2631 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 2632 private: 2633 G1CollectedHeap* _g1h; 2634 const char* _phase; 2635 int _info; 2636 2637 public: 2638 VerifyNoCSetOops(const char* phase, int info = -1) : 2639 _g1h(G1CollectedHeap::heap()), 2640 _phase(phase), 2641 _info(info) 2642 { } 2643 2644 void operator()(oop obj) const { 2645 guarantee(obj->is_oop(), 2646 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 2647 p2i(obj), _phase, _info); 2648 guarantee(!_g1h->obj_in_cs(obj), 2649 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 2650 p2i(obj), _phase, _info); 2651 } 2652 }; 2653 2654 void ConcurrentMark::verify_no_cset_oops() { 2655 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2656 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 2657 return; 2658 } 2659 2660 // Verify entries on the global mark stack 2661 _markStack.iterate(VerifyNoCSetOops("Stack")); 2662 2663 // Verify entries on the task queues 2664 for (uint i = 0; i < _max_worker_id; ++i) { 2665 CMTaskQueue* queue = _task_queues->queue(i); 2666 queue->iterate(VerifyNoCSetOops("Queue", i)); 2667 } 2668 2669 // Verify the global finger 2670 HeapWord* global_finger = finger(); 2671 if (global_finger != NULL && global_finger < _heap_end) { 2672 // The global finger always points to a heap region boundary. We 2673 // use heap_region_containing() to get the containing region 2674 // given that the global finger could be pointing to a free region 2675 // which subsequently becomes continues humongous. If that 2676 // happens, heap_region_containing() will return the bottom of the 2677 // corresponding starts humongous region and the check below will 2678 // not hold any more. 2679 // Since we always iterate over all regions, we might get a NULL HeapRegion 2680 // here. 2681 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 2682 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 2683 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 2684 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 2685 } 2686 2687 // Verify the task fingers 2688 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2689 for (uint i = 0; i < parallel_marking_threads(); ++i) { 2690 CMTask* task = _tasks[i]; 2691 HeapWord* task_finger = task->finger(); 2692 if (task_finger != NULL && task_finger < _heap_end) { 2693 // See above note on the global finger verification. 2694 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 2695 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 2696 !task_hr->in_collection_set(), 2697 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 2698 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 2699 } 2700 } 2701 } 2702 #endif // PRODUCT 2703 2704 // Aggregate the counting data that was constructed concurrently 2705 // with marking. 2706 class AggregateCountDataHRClosure: public HeapRegionClosure { 2707 G1CollectedHeap* _g1h; 2708 ConcurrentMark* _cm; 2709 CardTableModRefBS* _ct_bs; 2710 BitMap* _cm_card_bm; 2711 uint _max_worker_id; 2712 2713 public: 2714 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2715 BitMap* cm_card_bm, 2716 uint max_worker_id) : 2717 _g1h(g1h), _cm(g1h->concurrent_mark()), 2718 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 2719 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 2720 2721 bool doHeapRegion(HeapRegion* hr) { 2722 HeapWord* start = hr->bottom(); 2723 HeapWord* limit = hr->next_top_at_mark_start(); 2724 HeapWord* end = hr->end(); 2725 2726 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2727 "Preconditions not met - " 2728 "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", " 2729 "top: " PTR_FORMAT ", end: " PTR_FORMAT, 2730 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())); 2731 2732 assert(hr->next_marked_bytes() == 0, "Precondition"); 2733 2734 if (start == limit) { 2735 // NTAMS of this region has not been set so nothing to do. 2736 return false; 2737 } 2738 2739 // 'start' should be in the heap. 2740 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2741 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 2742 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2743 2744 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2745 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2746 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2747 2748 // If ntams is not card aligned then we bump card bitmap index 2749 // for limit so that we get the all the cards spanned by 2750 // the object ending at ntams. 2751 // Note: if this is the last region in the heap then ntams 2752 // could be actually just beyond the end of the the heap; 2753 // limit_idx will then correspond to a (non-existent) card 2754 // that is also outside the heap. 2755 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2756 limit_idx += 1; 2757 } 2758 2759 assert(limit_idx <= end_idx, "or else use atomics"); 2760 2761 // Aggregate the "stripe" in the count data associated with hr. 2762 uint hrm_index = hr->hrm_index(); 2763 size_t marked_bytes = 0; 2764 2765 for (uint i = 0; i < _max_worker_id; i += 1) { 2766 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2767 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2768 2769 // Fetch the marked_bytes in this region for task i and 2770 // add it to the running total for this region. 2771 marked_bytes += marked_bytes_array[hrm_index]; 2772 2773 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 2774 // into the global card bitmap. 2775 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2776 2777 while (scan_idx < limit_idx) { 2778 assert(task_card_bm->at(scan_idx) == true, "should be"); 2779 _cm_card_bm->set_bit(scan_idx); 2780 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2781 2782 // BitMap::get_next_one_offset() can handle the case when 2783 // its left_offset parameter is greater than its right_offset 2784 // parameter. It does, however, have an early exit if 2785 // left_offset == right_offset. So let's limit the value 2786 // passed in for left offset here. 2787 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2788 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2789 } 2790 } 2791 2792 // Update the marked bytes for this region. 2793 hr->add_to_marked_bytes(marked_bytes); 2794 2795 // Next heap region 2796 return false; 2797 } 2798 }; 2799 2800 class G1AggregateCountDataTask: public AbstractGangTask { 2801 protected: 2802 G1CollectedHeap* _g1h; 2803 ConcurrentMark* _cm; 2804 BitMap* _cm_card_bm; 2805 uint _max_worker_id; 2806 uint _active_workers; 2807 HeapRegionClaimer _hrclaimer; 2808 2809 public: 2810 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2811 ConcurrentMark* cm, 2812 BitMap* cm_card_bm, 2813 uint max_worker_id, 2814 uint n_workers) : 2815 AbstractGangTask("Count Aggregation"), 2816 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2817 _max_worker_id(max_worker_id), 2818 _active_workers(n_workers), 2819 _hrclaimer(_active_workers) { 2820 } 2821 2822 void work(uint worker_id) { 2823 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 2824 2825 _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer); 2826 } 2827 }; 2828 2829 2830 void ConcurrentMark::aggregate_count_data() { 2831 uint n_workers = _g1h->workers()->active_workers(); 2832 2833 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 2834 _max_worker_id, n_workers); 2835 2836 _g1h->workers()->run_task(&g1_par_agg_task); 2837 } 2838 2839 // Clear the per-worker arrays used to store the per-region counting data 2840 void ConcurrentMark::clear_all_count_data() { 2841 // Clear the global card bitmap - it will be filled during 2842 // liveness count aggregation (during remark) and the 2843 // final counting task. 2844 _card_bm.clear(); 2845 2846 // Clear the global region bitmap - it will be filled as part 2847 // of the final counting task. 2848 _region_bm.clear(); 2849 2850 uint max_regions = _g1h->max_regions(); 2851 assert(_max_worker_id > 0, "uninitialized"); 2852 2853 for (uint i = 0; i < _max_worker_id; i += 1) { 2854 BitMap* task_card_bm = count_card_bitmap_for(i); 2855 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 2856 2857 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 2858 assert(marked_bytes_array != NULL, "uninitialized"); 2859 2860 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 2861 task_card_bm->clear(); 2862 } 2863 } 2864 2865 void ConcurrentMark::print_stats() { 2866 if (verbose_stats()) { 2867 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2868 for (size_t i = 0; i < _active_tasks; ++i) { 2869 _tasks[i]->print_stats(); 2870 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2871 } 2872 } 2873 } 2874 2875 // abandon current marking iteration due to a Full GC 2876 void ConcurrentMark::abort() { 2877 if (!cmThread()->during_cycle() || _has_aborted) { 2878 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2879 return; 2880 } 2881 2882 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2883 // concurrent bitmap clearing. 2884 _nextMarkBitMap->clearAll(); 2885 2886 // Note we cannot clear the previous marking bitmap here 2887 // since VerifyDuringGC verifies the objects marked during 2888 // a full GC against the previous bitmap. 2889 2890 // Clear the liveness counting data 2891 clear_all_count_data(); 2892 // Empty mark stack 2893 reset_marking_state(); 2894 for (uint i = 0; i < _max_worker_id; ++i) { 2895 _tasks[i]->clear_region_fields(); 2896 } 2897 _first_overflow_barrier_sync.abort(); 2898 _second_overflow_barrier_sync.abort(); 2899 _has_aborted = true; 2900 2901 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2902 satb_mq_set.abandon_partial_marking(); 2903 // This can be called either during or outside marking, we'll read 2904 // the expected_active value from the SATB queue set. 2905 satb_mq_set.set_active_all_threads( 2906 false, /* new active value */ 2907 satb_mq_set.is_active() /* expected_active */); 2908 2909 _g1h->trace_heap_after_concurrent_cycle(); 2910 _g1h->register_concurrent_cycle_end(); 2911 } 2912 2913 static void print_ms_time_info(const char* prefix, const char* name, 2914 NumberSeq& ns) { 2915 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2916 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2917 if (ns.num() > 0) { 2918 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2919 prefix, ns.sd(), ns.maximum()); 2920 } 2921 } 2922 2923 void ConcurrentMark::print_summary_info() { 2924 gclog_or_tty->print_cr(" Concurrent marking:"); 2925 print_ms_time_info(" ", "init marks", _init_times); 2926 print_ms_time_info(" ", "remarks", _remark_times); 2927 { 2928 print_ms_time_info(" ", "final marks", _remark_mark_times); 2929 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2930 2931 } 2932 print_ms_time_info(" ", "cleanups", _cleanup_times); 2933 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 2934 _total_counting_time, 2935 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 2936 (double)_cleanup_times.num() 2937 : 0.0)); 2938 if (G1ScrubRemSets) { 2939 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2940 _total_rs_scrub_time, 2941 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 2942 (double)_cleanup_times.num() 2943 : 0.0)); 2944 } 2945 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 2946 (_init_times.sum() + _remark_times.sum() + 2947 _cleanup_times.sum())/1000.0); 2948 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 2949 "(%8.2f s marking).", 2950 cmThread()->vtime_accum(), 2951 cmThread()->vtime_mark_accum()); 2952 } 2953 2954 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2955 _parallel_workers->print_worker_threads_on(st); 2956 } 2957 2958 void ConcurrentMark::print_on_error(outputStream* st) const { 2959 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2960 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 2961 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 2962 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 2963 } 2964 2965 // We take a break if someone is trying to stop the world. 2966 bool ConcurrentMark::do_yield_check(uint worker_id) { 2967 if (SuspendibleThreadSet::should_yield()) { 2968 if (worker_id == 0) { 2969 _g1h->g1_policy()->record_concurrent_pause(); 2970 } 2971 SuspendibleThreadSet::yield(); 2972 return true; 2973 } else { 2974 return false; 2975 } 2976 } 2977 2978 #ifndef PRODUCT 2979 // for debugging purposes 2980 void ConcurrentMark::print_finger() { 2981 gclog_or_tty->print_cr("heap [" PTR_FORMAT ", " PTR_FORMAT "), global finger = " PTR_FORMAT, 2982 p2i(_heap_start), p2i(_heap_end), p2i(_finger)); 2983 for (uint i = 0; i < _max_worker_id; ++i) { 2984 gclog_or_tty->print(" %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger())); 2985 } 2986 gclog_or_tty->cr(); 2987 } 2988 #endif 2989 2990 // Closure for iteration over bitmaps 2991 class CMBitMapClosure : public BitMapClosure { 2992 private: 2993 // the bitmap that is being iterated over 2994 CMBitMap* _nextMarkBitMap; 2995 ConcurrentMark* _cm; 2996 CMTask* _task; 2997 2998 public: 2999 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3000 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3001 3002 bool do_bit(size_t offset) { 3003 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3004 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3005 assert( addr < _cm->finger(), "invariant"); 3006 3007 statsOnly( _task->increase_objs_found_on_bitmap() ); 3008 assert(addr >= _task->finger(), "invariant"); 3009 3010 // We move that task's local finger along. 3011 _task->move_finger_to(addr); 3012 3013 _task->scan_object(oop(addr)); 3014 // we only partially drain the local queue and global stack 3015 _task->drain_local_queue(true); 3016 _task->drain_global_stack(true); 3017 3018 // if the has_aborted flag has been raised, we need to bail out of 3019 // the iteration 3020 return !_task->has_aborted(); 3021 } 3022 }; 3023 3024 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 3025 ReferenceProcessor* result = NULL; 3026 if (G1UseConcMarkReferenceProcessing) { 3027 result = g1h->ref_processor_cm(); 3028 assert(result != NULL, "should not be NULL"); 3029 } 3030 return result; 3031 } 3032 3033 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3034 ConcurrentMark* cm, 3035 CMTask* task) 3036 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 3037 _g1h(g1h), _cm(cm), _task(task) 3038 { } 3039 3040 void CMTask::setup_for_region(HeapRegion* hr) { 3041 assert(hr != NULL, 3042 "claim_region() should have filtered out NULL regions"); 3043 3044 if (_cm->verbose_low()) { 3045 gclog_or_tty->print_cr("[%u] setting up for region " PTR_FORMAT, 3046 _worker_id, p2i(hr)); 3047 } 3048 3049 _curr_region = hr; 3050 _finger = hr->bottom(); 3051 update_region_limit(); 3052 } 3053 3054 void CMTask::update_region_limit() { 3055 HeapRegion* hr = _curr_region; 3056 HeapWord* bottom = hr->bottom(); 3057 HeapWord* limit = hr->next_top_at_mark_start(); 3058 3059 if (limit == bottom) { 3060 if (_cm->verbose_low()) { 3061 gclog_or_tty->print_cr("[%u] found an empty region " 3062 "[" PTR_FORMAT ", " PTR_FORMAT ")", 3063 _worker_id, p2i(bottom), p2i(limit)); 3064 } 3065 // The region was collected underneath our feet. 3066 // We set the finger to bottom to ensure that the bitmap 3067 // iteration that will follow this will not do anything. 3068 // (this is not a condition that holds when we set the region up, 3069 // as the region is not supposed to be empty in the first place) 3070 _finger = bottom; 3071 } else if (limit >= _region_limit) { 3072 assert(limit >= _finger, "peace of mind"); 3073 } else { 3074 assert(limit < _region_limit, "only way to get here"); 3075 // This can happen under some pretty unusual circumstances. An 3076 // evacuation pause empties the region underneath our feet (NTAMS 3077 // at bottom). We then do some allocation in the region (NTAMS 3078 // stays at bottom), followed by the region being used as a GC 3079 // alloc region (NTAMS will move to top() and the objects 3080 // originally below it will be grayed). All objects now marked in 3081 // the region are explicitly grayed, if below the global finger, 3082 // and we do not need in fact to scan anything else. So, we simply 3083 // set _finger to be limit to ensure that the bitmap iteration 3084 // doesn't do anything. 3085 _finger = limit; 3086 } 3087 3088 _region_limit = limit; 3089 } 3090 3091 void CMTask::giveup_current_region() { 3092 assert(_curr_region != NULL, "invariant"); 3093 if (_cm->verbose_low()) { 3094 gclog_or_tty->print_cr("[%u] giving up region " PTR_FORMAT, 3095 _worker_id, p2i(_curr_region)); 3096 } 3097 clear_region_fields(); 3098 } 3099 3100 void CMTask::clear_region_fields() { 3101 // Values for these three fields that indicate that we're not 3102 // holding on to a region. 3103 _curr_region = NULL; 3104 _finger = NULL; 3105 _region_limit = NULL; 3106 } 3107 3108 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3109 if (cm_oop_closure == NULL) { 3110 assert(_cm_oop_closure != NULL, "invariant"); 3111 } else { 3112 assert(_cm_oop_closure == NULL, "invariant"); 3113 } 3114 _cm_oop_closure = cm_oop_closure; 3115 } 3116 3117 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3118 guarantee(nextMarkBitMap != NULL, "invariant"); 3119 3120 if (_cm->verbose_low()) { 3121 gclog_or_tty->print_cr("[%u] resetting", _worker_id); 3122 } 3123 3124 _nextMarkBitMap = nextMarkBitMap; 3125 clear_region_fields(); 3126 3127 _calls = 0; 3128 _elapsed_time_ms = 0.0; 3129 _termination_time_ms = 0.0; 3130 _termination_start_time_ms = 0.0; 3131 3132 #if _MARKING_STATS_ 3133 _aborted = 0; 3134 _aborted_overflow = 0; 3135 _aborted_cm_aborted = 0; 3136 _aborted_yield = 0; 3137 _aborted_timed_out = 0; 3138 _aborted_satb = 0; 3139 _aborted_termination = 0; 3140 _steal_attempts = 0; 3141 _steals = 0; 3142 _local_pushes = 0; 3143 _local_pops = 0; 3144 _local_max_size = 0; 3145 _objs_scanned = 0; 3146 _global_pushes = 0; 3147 _global_pops = 0; 3148 _global_max_size = 0; 3149 _global_transfers_to = 0; 3150 _global_transfers_from = 0; 3151 _regions_claimed = 0; 3152 _objs_found_on_bitmap = 0; 3153 _satb_buffers_processed = 0; 3154 #endif // _MARKING_STATS_ 3155 } 3156 3157 bool CMTask::should_exit_termination() { 3158 regular_clock_call(); 3159 // This is called when we are in the termination protocol. We should 3160 // quit if, for some reason, this task wants to abort or the global 3161 // stack is not empty (this means that we can get work from it). 3162 return !_cm->mark_stack_empty() || has_aborted(); 3163 } 3164 3165 void CMTask::reached_limit() { 3166 assert(_words_scanned >= _words_scanned_limit || 3167 _refs_reached >= _refs_reached_limit , 3168 "shouldn't have been called otherwise"); 3169 regular_clock_call(); 3170 } 3171 3172 void CMTask::regular_clock_call() { 3173 if (has_aborted()) return; 3174 3175 // First, we need to recalculate the words scanned and refs reached 3176 // limits for the next clock call. 3177 recalculate_limits(); 3178 3179 // During the regular clock call we do the following 3180 3181 // (1) If an overflow has been flagged, then we abort. 3182 if (_cm->has_overflown()) { 3183 set_has_aborted(); 3184 return; 3185 } 3186 3187 // If we are not concurrent (i.e. we're doing remark) we don't need 3188 // to check anything else. The other steps are only needed during 3189 // the concurrent marking phase. 3190 if (!concurrent()) return; 3191 3192 // (2) If marking has been aborted for Full GC, then we also abort. 3193 if (_cm->has_aborted()) { 3194 set_has_aborted(); 3195 statsOnly( ++_aborted_cm_aborted ); 3196 return; 3197 } 3198 3199 double curr_time_ms = os::elapsedVTime() * 1000.0; 3200 3201 // (3) If marking stats are enabled, then we update the step history. 3202 #if _MARKING_STATS_ 3203 if (_words_scanned >= _words_scanned_limit) { 3204 ++_clock_due_to_scanning; 3205 } 3206 if (_refs_reached >= _refs_reached_limit) { 3207 ++_clock_due_to_marking; 3208 } 3209 3210 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3211 _interval_start_time_ms = curr_time_ms; 3212 _all_clock_intervals_ms.add(last_interval_ms); 3213 3214 if (_cm->verbose_medium()) { 3215 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, " 3216 "scanned = " SIZE_FORMAT "%s, refs reached = " SIZE_FORMAT "%s", 3217 _worker_id, last_interval_ms, 3218 _words_scanned, 3219 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3220 _refs_reached, 3221 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3222 } 3223 #endif // _MARKING_STATS_ 3224 3225 // (4) We check whether we should yield. If we have to, then we abort. 3226 if (SuspendibleThreadSet::should_yield()) { 3227 // We should yield. To do this we abort the task. The caller is 3228 // responsible for yielding. 3229 set_has_aborted(); 3230 statsOnly( ++_aborted_yield ); 3231 return; 3232 } 3233 3234 // (5) We check whether we've reached our time quota. If we have, 3235 // then we abort. 3236 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3237 if (elapsed_time_ms > _time_target_ms) { 3238 set_has_aborted(); 3239 _has_timed_out = true; 3240 statsOnly( ++_aborted_timed_out ); 3241 return; 3242 } 3243 3244 // (6) Finally, we check whether there are enough completed STAB 3245 // buffers available for processing. If there are, we abort. 3246 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3247 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3248 if (_cm->verbose_low()) { 3249 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers", 3250 _worker_id); 3251 } 3252 // we do need to process SATB buffers, we'll abort and restart 3253 // the marking task to do so 3254 set_has_aborted(); 3255 statsOnly( ++_aborted_satb ); 3256 return; 3257 } 3258 } 3259 3260 void CMTask::recalculate_limits() { 3261 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3262 _words_scanned_limit = _real_words_scanned_limit; 3263 3264 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3265 _refs_reached_limit = _real_refs_reached_limit; 3266 } 3267 3268 void CMTask::decrease_limits() { 3269 // This is called when we believe that we're going to do an infrequent 3270 // operation which will increase the per byte scanned cost (i.e. move 3271 // entries to/from the global stack). It basically tries to decrease the 3272 // scanning limit so that the clock is called earlier. 3273 3274 if (_cm->verbose_medium()) { 3275 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id); 3276 } 3277 3278 _words_scanned_limit = _real_words_scanned_limit - 3279 3 * words_scanned_period / 4; 3280 _refs_reached_limit = _real_refs_reached_limit - 3281 3 * refs_reached_period / 4; 3282 } 3283 3284 void CMTask::move_entries_to_global_stack() { 3285 // local array where we'll store the entries that will be popped 3286 // from the local queue 3287 oop buffer[global_stack_transfer_size]; 3288 3289 int n = 0; 3290 oop obj; 3291 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3292 buffer[n] = obj; 3293 ++n; 3294 } 3295 3296 if (n > 0) { 3297 // we popped at least one entry from the local queue 3298 3299 statsOnly( ++_global_transfers_to; _local_pops += n ); 3300 3301 if (!_cm->mark_stack_push(buffer, n)) { 3302 if (_cm->verbose_low()) { 3303 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow", 3304 _worker_id); 3305 } 3306 set_has_aborted(); 3307 } else { 3308 // the transfer was successful 3309 3310 if (_cm->verbose_medium()) { 3311 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack", 3312 _worker_id, n); 3313 } 3314 statsOnly( size_t tmp_size = _cm->mark_stack_size(); 3315 if (tmp_size > _global_max_size) { 3316 _global_max_size = tmp_size; 3317 } 3318 _global_pushes += n ); 3319 } 3320 } 3321 3322 // this operation was quite expensive, so decrease the limits 3323 decrease_limits(); 3324 } 3325 3326 void CMTask::get_entries_from_global_stack() { 3327 // local array where we'll store the entries that will be popped 3328 // from the global stack. 3329 oop buffer[global_stack_transfer_size]; 3330 int n; 3331 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3332 assert(n <= global_stack_transfer_size, 3333 "we should not pop more than the given limit"); 3334 if (n > 0) { 3335 // yes, we did actually pop at least one entry 3336 3337 statsOnly( ++_global_transfers_from; _global_pops += n ); 3338 if (_cm->verbose_medium()) { 3339 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack", 3340 _worker_id, n); 3341 } 3342 for (int i = 0; i < n; ++i) { 3343 bool success = _task_queue->push(buffer[i]); 3344 // We only call this when the local queue is empty or under a 3345 // given target limit. So, we do not expect this push to fail. 3346 assert(success, "invariant"); 3347 } 3348 3349 statsOnly( size_t tmp_size = (size_t)_task_queue->size(); 3350 if (tmp_size > _local_max_size) { 3351 _local_max_size = tmp_size; 3352 } 3353 _local_pushes += n ); 3354 } 3355 3356 // this operation was quite expensive, so decrease the limits 3357 decrease_limits(); 3358 } 3359 3360 void CMTask::drain_local_queue(bool partially) { 3361 if (has_aborted()) return; 3362 3363 // Decide what the target size is, depending whether we're going to 3364 // drain it partially (so that other tasks can steal if they run out 3365 // of things to do) or totally (at the very end). 3366 size_t target_size; 3367 if (partially) { 3368 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3369 } else { 3370 target_size = 0; 3371 } 3372 3373 if (_task_queue->size() > target_size) { 3374 if (_cm->verbose_high()) { 3375 gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT, 3376 _worker_id, target_size); 3377 } 3378 3379 oop obj; 3380 bool ret = _task_queue->pop_local(obj); 3381 while (ret) { 3382 statsOnly( ++_local_pops ); 3383 3384 if (_cm->verbose_high()) { 3385 gclog_or_tty->print_cr("[%u] popped " PTR_FORMAT, _worker_id, 3386 p2i((void*) obj)); 3387 } 3388 3389 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3390 assert(!_g1h->is_on_master_free_list( 3391 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3392 3393 scan_object(obj); 3394 3395 if (_task_queue->size() <= target_size || has_aborted()) { 3396 ret = false; 3397 } else { 3398 ret = _task_queue->pop_local(obj); 3399 } 3400 } 3401 3402 if (_cm->verbose_high()) { 3403 gclog_or_tty->print_cr("[%u] drained local queue, size = %u", 3404 _worker_id, _task_queue->size()); 3405 } 3406 } 3407 } 3408 3409 void CMTask::drain_global_stack(bool partially) { 3410 if (has_aborted()) return; 3411 3412 // We have a policy to drain the local queue before we attempt to 3413 // drain the global stack. 3414 assert(partially || _task_queue->size() == 0, "invariant"); 3415 3416 // Decide what the target size is, depending whether we're going to 3417 // drain it partially (so that other tasks can steal if they run out 3418 // of things to do) or totally (at the very end). Notice that, 3419 // because we move entries from the global stack in chunks or 3420 // because another task might be doing the same, we might in fact 3421 // drop below the target. But, this is not a problem. 3422 size_t target_size; 3423 if (partially) { 3424 target_size = _cm->partial_mark_stack_size_target(); 3425 } else { 3426 target_size = 0; 3427 } 3428 3429 if (_cm->mark_stack_size() > target_size) { 3430 if (_cm->verbose_low()) { 3431 gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT, 3432 _worker_id, target_size); 3433 } 3434 3435 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3436 get_entries_from_global_stack(); 3437 drain_local_queue(partially); 3438 } 3439 3440 if (_cm->verbose_low()) { 3441 gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT, 3442 _worker_id, _cm->mark_stack_size()); 3443 } 3444 } 3445 } 3446 3447 // SATB Queue has several assumptions on whether to call the par or 3448 // non-par versions of the methods. this is why some of the code is 3449 // replicated. We should really get rid of the single-threaded version 3450 // of the code to simplify things. 3451 void CMTask::drain_satb_buffers() { 3452 if (has_aborted()) return; 3453 3454 // We set this so that the regular clock knows that we're in the 3455 // middle of draining buffers and doesn't set the abort flag when it 3456 // notices that SATB buffers are available for draining. It'd be 3457 // very counter productive if it did that. :-) 3458 _draining_satb_buffers = true; 3459 3460 CMSATBBufferClosure satb_cl(this, _g1h); 3461 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3462 3463 // This keeps claiming and applying the closure to completed buffers 3464 // until we run out of buffers or we need to abort. 3465 while (!has_aborted() && 3466 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 3467 if (_cm->verbose_medium()) { 3468 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3469 } 3470 statsOnly( ++_satb_buffers_processed ); 3471 regular_clock_call(); 3472 } 3473 3474 _draining_satb_buffers = false; 3475 3476 assert(has_aborted() || 3477 concurrent() || 3478 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3479 3480 // again, this was a potentially expensive operation, decrease the 3481 // limits to get the regular clock call early 3482 decrease_limits(); 3483 } 3484 3485 void CMTask::print_stats() { 3486 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 3487 _worker_id, _calls); 3488 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3489 _elapsed_time_ms, _termination_time_ms); 3490 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3491 _step_times_ms.num(), _step_times_ms.avg(), 3492 _step_times_ms.sd()); 3493 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3494 _step_times_ms.maximum(), _step_times_ms.sum()); 3495 3496 #if _MARKING_STATS_ 3497 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3498 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3499 _all_clock_intervals_ms.sd()); 3500 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3501 _all_clock_intervals_ms.maximum(), 3502 _all_clock_intervals_ms.sum()); 3503 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = " SIZE_FORMAT ", marking = " SIZE_FORMAT, 3504 _clock_due_to_scanning, _clock_due_to_marking); 3505 gclog_or_tty->print_cr(" Objects: scanned = " SIZE_FORMAT ", found on the bitmap = " SIZE_FORMAT, 3506 _objs_scanned, _objs_found_on_bitmap); 3507 gclog_or_tty->print_cr(" Local Queue: pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT, 3508 _local_pushes, _local_pops, _local_max_size); 3509 gclog_or_tty->print_cr(" Global Stack: pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT, 3510 _global_pushes, _global_pops, _global_max_size); 3511 gclog_or_tty->print_cr(" transfers to = " SIZE_FORMAT ", transfers from = " SIZE_FORMAT, 3512 _global_transfers_to,_global_transfers_from); 3513 gclog_or_tty->print_cr(" Regions: claimed = " SIZE_FORMAT, _regions_claimed); 3514 gclog_or_tty->print_cr(" SATB buffers: processed = " SIZE_FORMAT, _satb_buffers_processed); 3515 gclog_or_tty->print_cr(" Steals: attempts = " SIZE_FORMAT ", successes = " SIZE_FORMAT, 3516 _steal_attempts, _steals); 3517 gclog_or_tty->print_cr(" Aborted: " SIZE_FORMAT ", due to", _aborted); 3518 gclog_or_tty->print_cr(" overflow: " SIZE_FORMAT ", global abort: " SIZE_FORMAT ", yield: " SIZE_FORMAT, 3519 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3520 gclog_or_tty->print_cr(" time out: " SIZE_FORMAT ", SATB: " SIZE_FORMAT ", termination: " SIZE_FORMAT, 3521 _aborted_timed_out, _aborted_satb, _aborted_termination); 3522 #endif // _MARKING_STATS_ 3523 } 3524 3525 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) { 3526 return _task_queues->steal(worker_id, hash_seed, obj); 3527 } 3528 3529 /***************************************************************************** 3530 3531 The do_marking_step(time_target_ms, ...) method is the building 3532 block of the parallel marking framework. It can be called in parallel 3533 with other invocations of do_marking_step() on different tasks 3534 (but only one per task, obviously) and concurrently with the 3535 mutator threads, or during remark, hence it eliminates the need 3536 for two versions of the code. When called during remark, it will 3537 pick up from where the task left off during the concurrent marking 3538 phase. Interestingly, tasks are also claimable during evacuation 3539 pauses too, since do_marking_step() ensures that it aborts before 3540 it needs to yield. 3541 3542 The data structures that it uses to do marking work are the 3543 following: 3544 3545 (1) Marking Bitmap. If there are gray objects that appear only 3546 on the bitmap (this happens either when dealing with an overflow 3547 or when the initial marking phase has simply marked the roots 3548 and didn't push them on the stack), then tasks claim heap 3549 regions whose bitmap they then scan to find gray objects. A 3550 global finger indicates where the end of the last claimed region 3551 is. A local finger indicates how far into the region a task has 3552 scanned. The two fingers are used to determine how to gray an 3553 object (i.e. whether simply marking it is OK, as it will be 3554 visited by a task in the future, or whether it needs to be also 3555 pushed on a stack). 3556 3557 (2) Local Queue. The local queue of the task which is accessed 3558 reasonably efficiently by the task. Other tasks can steal from 3559 it when they run out of work. Throughout the marking phase, a 3560 task attempts to keep its local queue short but not totally 3561 empty, so that entries are available for stealing by other 3562 tasks. Only when there is no more work, a task will totally 3563 drain its local queue. 3564 3565 (3) Global Mark Stack. This handles local queue overflow. During 3566 marking only sets of entries are moved between it and the local 3567 queues, as access to it requires a mutex and more fine-grain 3568 interaction with it which might cause contention. If it 3569 overflows, then the marking phase should restart and iterate 3570 over the bitmap to identify gray objects. Throughout the marking 3571 phase, tasks attempt to keep the global mark stack at a small 3572 length but not totally empty, so that entries are available for 3573 popping by other tasks. Only when there is no more work, tasks 3574 will totally drain the global mark stack. 3575 3576 (4) SATB Buffer Queue. This is where completed SATB buffers are 3577 made available. Buffers are regularly removed from this queue 3578 and scanned for roots, so that the queue doesn't get too 3579 long. During remark, all completed buffers are processed, as 3580 well as the filled in parts of any uncompleted buffers. 3581 3582 The do_marking_step() method tries to abort when the time target 3583 has been reached. There are a few other cases when the 3584 do_marking_step() method also aborts: 3585 3586 (1) When the marking phase has been aborted (after a Full GC). 3587 3588 (2) When a global overflow (on the global stack) has been 3589 triggered. Before the task aborts, it will actually sync up with 3590 the other tasks to ensure that all the marking data structures 3591 (local queues, stacks, fingers etc.) are re-initialized so that 3592 when do_marking_step() completes, the marking phase can 3593 immediately restart. 3594 3595 (3) When enough completed SATB buffers are available. The 3596 do_marking_step() method only tries to drain SATB buffers right 3597 at the beginning. So, if enough buffers are available, the 3598 marking step aborts and the SATB buffers are processed at 3599 the beginning of the next invocation. 3600 3601 (4) To yield. when we have to yield then we abort and yield 3602 right at the end of do_marking_step(). This saves us from a lot 3603 of hassle as, by yielding we might allow a Full GC. If this 3604 happens then objects will be compacted underneath our feet, the 3605 heap might shrink, etc. We save checking for this by just 3606 aborting and doing the yield right at the end. 3607 3608 From the above it follows that the do_marking_step() method should 3609 be called in a loop (or, otherwise, regularly) until it completes. 3610 3611 If a marking step completes without its has_aborted() flag being 3612 true, it means it has completed the current marking phase (and 3613 also all other marking tasks have done so and have all synced up). 3614 3615 A method called regular_clock_call() is invoked "regularly" (in 3616 sub ms intervals) throughout marking. It is this clock method that 3617 checks all the abort conditions which were mentioned above and 3618 decides when the task should abort. A work-based scheme is used to 3619 trigger this clock method: when the number of object words the 3620 marking phase has scanned or the number of references the marking 3621 phase has visited reach a given limit. Additional invocations to 3622 the method clock have been planted in a few other strategic places 3623 too. The initial reason for the clock method was to avoid calling 3624 vtime too regularly, as it is quite expensive. So, once it was in 3625 place, it was natural to piggy-back all the other conditions on it 3626 too and not constantly check them throughout the code. 3627 3628 If do_termination is true then do_marking_step will enter its 3629 termination protocol. 3630 3631 The value of is_serial must be true when do_marking_step is being 3632 called serially (i.e. by the VMThread) and do_marking_step should 3633 skip any synchronization in the termination and overflow code. 3634 Examples include the serial remark code and the serial reference 3635 processing closures. 3636 3637 The value of is_serial must be false when do_marking_step is 3638 being called by any of the worker threads in a work gang. 3639 Examples include the concurrent marking code (CMMarkingTask), 3640 the MT remark code, and the MT reference processing closures. 3641 3642 *****************************************************************************/ 3643 3644 void CMTask::do_marking_step(double time_target_ms, 3645 bool do_termination, 3646 bool is_serial) { 3647 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3648 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3649 3650 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3651 assert(_task_queues != NULL, "invariant"); 3652 assert(_task_queue != NULL, "invariant"); 3653 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 3654 3655 assert(!_claimed, 3656 "only one thread should claim this task at any one time"); 3657 3658 // OK, this doesn't safeguard again all possible scenarios, as it is 3659 // possible for two threads to set the _claimed flag at the same 3660 // time. But it is only for debugging purposes anyway and it will 3661 // catch most problems. 3662 _claimed = true; 3663 3664 _start_time_ms = os::elapsedVTime() * 1000.0; 3665 statsOnly( _interval_start_time_ms = _start_time_ms ); 3666 3667 // If do_stealing is true then do_marking_step will attempt to 3668 // steal work from the other CMTasks. It only makes sense to 3669 // enable stealing when the termination protocol is enabled 3670 // and do_marking_step() is not being called serially. 3671 bool do_stealing = do_termination && !is_serial; 3672 3673 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 3674 _time_target_ms = time_target_ms - diff_prediction_ms; 3675 3676 // set up the variables that are used in the work-based scheme to 3677 // call the regular clock method 3678 _words_scanned = 0; 3679 _refs_reached = 0; 3680 recalculate_limits(); 3681 3682 // clear all flags 3683 clear_has_aborted(); 3684 _has_timed_out = false; 3685 _draining_satb_buffers = false; 3686 3687 ++_calls; 3688 3689 if (_cm->verbose_low()) { 3690 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, " 3691 "target = %1.2lfms >>>>>>>>>>", 3692 _worker_id, _calls, _time_target_ms); 3693 } 3694 3695 // Set up the bitmap and oop closures. Anything that uses them is 3696 // eventually called from this method, so it is OK to allocate these 3697 // statically. 3698 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3699 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3700 set_cm_oop_closure(&cm_oop_closure); 3701 3702 if (_cm->has_overflown()) { 3703 // This can happen if the mark stack overflows during a GC pause 3704 // and this task, after a yield point, restarts. We have to abort 3705 // as we need to get into the overflow protocol which happens 3706 // right at the end of this task. 3707 set_has_aborted(); 3708 } 3709 3710 // First drain any available SATB buffers. After this, we will not 3711 // look at SATB buffers before the next invocation of this method. 3712 // If enough completed SATB buffers are queued up, the regular clock 3713 // will abort this task so that it restarts. 3714 drain_satb_buffers(); 3715 // ...then partially drain the local queue and the global stack 3716 drain_local_queue(true); 3717 drain_global_stack(true); 3718 3719 do { 3720 if (!has_aborted() && _curr_region != NULL) { 3721 // This means that we're already holding on to a region. 3722 assert(_finger != NULL, "if region is not NULL, then the finger " 3723 "should not be NULL either"); 3724 3725 // We might have restarted this task after an evacuation pause 3726 // which might have evacuated the region we're holding on to 3727 // underneath our feet. Let's read its limit again to make sure 3728 // that we do not iterate over a region of the heap that 3729 // contains garbage (update_region_limit() will also move 3730 // _finger to the start of the region if it is found empty). 3731 update_region_limit(); 3732 // We will start from _finger not from the start of the region, 3733 // as we might be restarting this task after aborting half-way 3734 // through scanning this region. In this case, _finger points to 3735 // the address where we last found a marked object. If this is a 3736 // fresh region, _finger points to start(). 3737 MemRegion mr = MemRegion(_finger, _region_limit); 3738 3739 if (_cm->verbose_low()) { 3740 gclog_or_tty->print_cr("[%u] we're scanning part " 3741 "[" PTR_FORMAT ", " PTR_FORMAT ") " 3742 "of region " HR_FORMAT, 3743 _worker_id, p2i(_finger), p2i(_region_limit), 3744 HR_FORMAT_PARAMS(_curr_region)); 3745 } 3746 3747 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 3748 "humongous regions should go around loop once only"); 3749 3750 // Some special cases: 3751 // If the memory region is empty, we can just give up the region. 3752 // If the current region is humongous then we only need to check 3753 // the bitmap for the bit associated with the start of the object, 3754 // scan the object if it's live, and give up the region. 3755 // Otherwise, let's iterate over the bitmap of the part of the region 3756 // that is left. 3757 // If the iteration is successful, give up the region. 3758 if (mr.is_empty()) { 3759 giveup_current_region(); 3760 regular_clock_call(); 3761 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 3762 if (_nextMarkBitMap->isMarked(mr.start())) { 3763 // The object is marked - apply the closure 3764 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 3765 bitmap_closure.do_bit(offset); 3766 } 3767 // Even if this task aborted while scanning the humongous object 3768 // we can (and should) give up the current region. 3769 giveup_current_region(); 3770 regular_clock_call(); 3771 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3772 giveup_current_region(); 3773 regular_clock_call(); 3774 } else { 3775 assert(has_aborted(), "currently the only way to do so"); 3776 // The only way to abort the bitmap iteration is to return 3777 // false from the do_bit() method. However, inside the 3778 // do_bit() method we move the _finger to point to the 3779 // object currently being looked at. So, if we bail out, we 3780 // have definitely set _finger to something non-null. 3781 assert(_finger != NULL, "invariant"); 3782 3783 // Region iteration was actually aborted. So now _finger 3784 // points to the address of the object we last scanned. If we 3785 // leave it there, when we restart this task, we will rescan 3786 // the object. It is easy to avoid this. We move the finger by 3787 // enough to point to the next possible object header (the 3788 // bitmap knows by how much we need to move it as it knows its 3789 // granularity). 3790 assert(_finger < _region_limit, "invariant"); 3791 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 3792 // Check if bitmap iteration was aborted while scanning the last object 3793 if (new_finger >= _region_limit) { 3794 giveup_current_region(); 3795 } else { 3796 move_finger_to(new_finger); 3797 } 3798 } 3799 } 3800 // At this point we have either completed iterating over the 3801 // region we were holding on to, or we have aborted. 3802 3803 // We then partially drain the local queue and the global stack. 3804 // (Do we really need this?) 3805 drain_local_queue(true); 3806 drain_global_stack(true); 3807 3808 // Read the note on the claim_region() method on why it might 3809 // return NULL with potentially more regions available for 3810 // claiming and why we have to check out_of_regions() to determine 3811 // whether we're done or not. 3812 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 3813 // We are going to try to claim a new region. We should have 3814 // given up on the previous one. 3815 // Separated the asserts so that we know which one fires. 3816 assert(_curr_region == NULL, "invariant"); 3817 assert(_finger == NULL, "invariant"); 3818 assert(_region_limit == NULL, "invariant"); 3819 if (_cm->verbose_low()) { 3820 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id); 3821 } 3822 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 3823 if (claimed_region != NULL) { 3824 // Yes, we managed to claim one 3825 statsOnly( ++_regions_claimed ); 3826 3827 if (_cm->verbose_low()) { 3828 gclog_or_tty->print_cr("[%u] we successfully claimed " 3829 "region " PTR_FORMAT, 3830 _worker_id, p2i(claimed_region)); 3831 } 3832 3833 setup_for_region(claimed_region); 3834 assert(_curr_region == claimed_region, "invariant"); 3835 } 3836 // It is important to call the regular clock here. It might take 3837 // a while to claim a region if, for example, we hit a large 3838 // block of empty regions. So we need to call the regular clock 3839 // method once round the loop to make sure it's called 3840 // frequently enough. 3841 regular_clock_call(); 3842 } 3843 3844 if (!has_aborted() && _curr_region == NULL) { 3845 assert(_cm->out_of_regions(), 3846 "at this point we should be out of regions"); 3847 } 3848 } while ( _curr_region != NULL && !has_aborted()); 3849 3850 if (!has_aborted()) { 3851 // We cannot check whether the global stack is empty, since other 3852 // tasks might be pushing objects to it concurrently. 3853 assert(_cm->out_of_regions(), 3854 "at this point we should be out of regions"); 3855 3856 if (_cm->verbose_low()) { 3857 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id); 3858 } 3859 3860 // Try to reduce the number of available SATB buffers so that 3861 // remark has less work to do. 3862 drain_satb_buffers(); 3863 } 3864 3865 // Since we've done everything else, we can now totally drain the 3866 // local queue and global stack. 3867 drain_local_queue(false); 3868 drain_global_stack(false); 3869 3870 // Attempt at work stealing from other task's queues. 3871 if (do_stealing && !has_aborted()) { 3872 // We have not aborted. This means that we have finished all that 3873 // we could. Let's try to do some stealing... 3874 3875 // We cannot check whether the global stack is empty, since other 3876 // tasks might be pushing objects to it concurrently. 3877 assert(_cm->out_of_regions() && _task_queue->size() == 0, 3878 "only way to reach here"); 3879 3880 if (_cm->verbose_low()) { 3881 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id); 3882 } 3883 3884 while (!has_aborted()) { 3885 oop obj; 3886 statsOnly( ++_steal_attempts ); 3887 3888 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 3889 if (_cm->verbose_medium()) { 3890 gclog_or_tty->print_cr("[%u] stolen " PTR_FORMAT " successfully", 3891 _worker_id, p2i((void*) obj)); 3892 } 3893 3894 statsOnly( ++_steals ); 3895 3896 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 3897 "any stolen object should be marked"); 3898 scan_object(obj); 3899 3900 // And since we're towards the end, let's totally drain the 3901 // local queue and global stack. 3902 drain_local_queue(false); 3903 drain_global_stack(false); 3904 } else { 3905 break; 3906 } 3907 } 3908 } 3909 3910 // If we are about to wrap up and go into termination, check if we 3911 // should raise the overflow flag. 3912 if (do_termination && !has_aborted()) { 3913 if (_cm->force_overflow()->should_force()) { 3914 _cm->set_has_overflown(); 3915 regular_clock_call(); 3916 } 3917 } 3918 3919 // We still haven't aborted. Now, let's try to get into the 3920 // termination protocol. 3921 if (do_termination && !has_aborted()) { 3922 // We cannot check whether the global stack is empty, since other 3923 // tasks might be concurrently pushing objects on it. 3924 // Separated the asserts so that we know which one fires. 3925 assert(_cm->out_of_regions(), "only way to reach here"); 3926 assert(_task_queue->size() == 0, "only way to reach here"); 3927 3928 if (_cm->verbose_low()) { 3929 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id); 3930 } 3931 3932 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 3933 3934 // The CMTask class also extends the TerminatorTerminator class, 3935 // hence its should_exit_termination() method will also decide 3936 // whether to exit the termination protocol or not. 3937 bool finished = (is_serial || 3938 _cm->terminator()->offer_termination(this)); 3939 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 3940 _termination_time_ms += 3941 termination_end_time_ms - _termination_start_time_ms; 3942 3943 if (finished) { 3944 // We're all done. 3945 3946 if (_worker_id == 0) { 3947 // let's allow task 0 to do this 3948 if (concurrent()) { 3949 assert(_cm->concurrent_marking_in_progress(), "invariant"); 3950 // we need to set this to false before the next 3951 // safepoint. This way we ensure that the marking phase 3952 // doesn't observe any more heap expansions. 3953 _cm->clear_concurrent_marking_in_progress(); 3954 } 3955 } 3956 3957 // We can now guarantee that the global stack is empty, since 3958 // all other tasks have finished. We separated the guarantees so 3959 // that, if a condition is false, we can immediately find out 3960 // which one. 3961 guarantee(_cm->out_of_regions(), "only way to reach here"); 3962 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 3963 guarantee(_task_queue->size() == 0, "only way to reach here"); 3964 guarantee(!_cm->has_overflown(), "only way to reach here"); 3965 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 3966 3967 if (_cm->verbose_low()) { 3968 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id); 3969 } 3970 } else { 3971 // Apparently there's more work to do. Let's abort this task. It 3972 // will restart it and we can hopefully find more things to do. 3973 3974 if (_cm->verbose_low()) { 3975 gclog_or_tty->print_cr("[%u] apparently there is more work to do", 3976 _worker_id); 3977 } 3978 3979 set_has_aborted(); 3980 statsOnly( ++_aborted_termination ); 3981 } 3982 } 3983 3984 // Mainly for debugging purposes to make sure that a pointer to the 3985 // closure which was statically allocated in this frame doesn't 3986 // escape it by accident. 3987 set_cm_oop_closure(NULL); 3988 double end_time_ms = os::elapsedVTime() * 1000.0; 3989 double elapsed_time_ms = end_time_ms - _start_time_ms; 3990 // Update the step history. 3991 _step_times_ms.add(elapsed_time_ms); 3992 3993 if (has_aborted()) { 3994 // The task was aborted for some reason. 3995 3996 statsOnly( ++_aborted ); 3997 3998 if (_has_timed_out) { 3999 double diff_ms = elapsed_time_ms - _time_target_ms; 4000 // Keep statistics of how well we did with respect to hitting 4001 // our target only if we actually timed out (if we aborted for 4002 // other reasons, then the results might get skewed). 4003 _marking_step_diffs_ms.add(diff_ms); 4004 } 4005 4006 if (_cm->has_overflown()) { 4007 // This is the interesting one. We aborted because a global 4008 // overflow was raised. This means we have to restart the 4009 // marking phase and start iterating over regions. However, in 4010 // order to do this we have to make sure that all tasks stop 4011 // what they are doing and re-initialize in a safe manner. We 4012 // will achieve this with the use of two barrier sync points. 4013 4014 if (_cm->verbose_low()) { 4015 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id); 4016 } 4017 4018 if (!is_serial) { 4019 // We only need to enter the sync barrier if being called 4020 // from a parallel context 4021 _cm->enter_first_sync_barrier(_worker_id); 4022 4023 // When we exit this sync barrier we know that all tasks have 4024 // stopped doing marking work. So, it's now safe to 4025 // re-initialize our data structures. At the end of this method, 4026 // task 0 will clear the global data structures. 4027 } 4028 4029 statsOnly( ++_aborted_overflow ); 4030 4031 // We clear the local state of this task... 4032 clear_region_fields(); 4033 4034 if (!is_serial) { 4035 // ...and enter the second barrier. 4036 _cm->enter_second_sync_barrier(_worker_id); 4037 } 4038 // At this point, if we're during the concurrent phase of 4039 // marking, everything has been re-initialized and we're 4040 // ready to restart. 4041 } 4042 4043 if (_cm->verbose_low()) { 4044 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4045 "elapsed = %1.2lfms <<<<<<<<<<", 4046 _worker_id, _time_target_ms, elapsed_time_ms); 4047 if (_cm->has_aborted()) { 4048 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========", 4049 _worker_id); 4050 } 4051 } 4052 } else { 4053 if (_cm->verbose_low()) { 4054 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4055 "elapsed = %1.2lfms <<<<<<<<<<", 4056 _worker_id, _time_target_ms, elapsed_time_ms); 4057 } 4058 } 4059 4060 _claimed = false; 4061 } 4062 4063 CMTask::CMTask(uint worker_id, 4064 ConcurrentMark* cm, 4065 size_t* marked_bytes, 4066 BitMap* card_bm, 4067 CMTaskQueue* task_queue, 4068 CMTaskQueueSet* task_queues) 4069 : _g1h(G1CollectedHeap::heap()), 4070 _worker_id(worker_id), _cm(cm), 4071 _claimed(false), 4072 _nextMarkBitMap(NULL), _hash_seed(17), 4073 _task_queue(task_queue), 4074 _task_queues(task_queues), 4075 _cm_oop_closure(NULL), 4076 _marked_bytes_array(marked_bytes), 4077 _card_bm(card_bm) { 4078 guarantee(task_queue != NULL, "invariant"); 4079 guarantee(task_queues != NULL, "invariant"); 4080 4081 statsOnly( _clock_due_to_scanning = 0; 4082 _clock_due_to_marking = 0 ); 4083 4084 _marking_step_diffs_ms.add(0.5); 4085 } 4086 4087 // These are formatting macros that are used below to ensure 4088 // consistent formatting. The *_H_* versions are used to format the 4089 // header for a particular value and they should be kept consistent 4090 // with the corresponding macro. Also note that most of the macros add 4091 // the necessary white space (as a prefix) which makes them a bit 4092 // easier to compose. 4093 4094 // All the output lines are prefixed with this string to be able to 4095 // identify them easily in a large log file. 4096 #define G1PPRL_LINE_PREFIX "###" 4097 4098 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 4099 #ifdef _LP64 4100 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4101 #else // _LP64 4102 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4103 #endif // _LP64 4104 4105 // For per-region info 4106 #define G1PPRL_TYPE_FORMAT " %-4s" 4107 #define G1PPRL_TYPE_H_FORMAT " %4s" 4108 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 4109 #define G1PPRL_BYTE_H_FORMAT " %9s" 4110 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4111 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4112 4113 // For summary info 4114 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 4115 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 4116 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 4117 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 4118 4119 G1PrintRegionLivenessInfoClosure:: 4120 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4121 : _out(out), 4122 _total_used_bytes(0), _total_capacity_bytes(0), 4123 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4124 _hum_used_bytes(0), _hum_capacity_bytes(0), 4125 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 4126 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 4127 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4128 MemRegion g1_reserved = g1h->g1_reserved(); 4129 double now = os::elapsedTime(); 4130 4131 // Print the header of the output. 4132 _out->cr(); 4133 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4134 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4135 G1PPRL_SUM_ADDR_FORMAT("reserved") 4136 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4137 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 4138 HeapRegion::GrainBytes); 4139 _out->print_cr(G1PPRL_LINE_PREFIX); 4140 _out->print_cr(G1PPRL_LINE_PREFIX 4141 G1PPRL_TYPE_H_FORMAT 4142 G1PPRL_ADDR_BASE_H_FORMAT 4143 G1PPRL_BYTE_H_FORMAT 4144 G1PPRL_BYTE_H_FORMAT 4145 G1PPRL_BYTE_H_FORMAT 4146 G1PPRL_DOUBLE_H_FORMAT 4147 G1PPRL_BYTE_H_FORMAT 4148 G1PPRL_BYTE_H_FORMAT, 4149 "type", "address-range", 4150 "used", "prev-live", "next-live", "gc-eff", 4151 "remset", "code-roots"); 4152 _out->print_cr(G1PPRL_LINE_PREFIX 4153 G1PPRL_TYPE_H_FORMAT 4154 G1PPRL_ADDR_BASE_H_FORMAT 4155 G1PPRL_BYTE_H_FORMAT 4156 G1PPRL_BYTE_H_FORMAT 4157 G1PPRL_BYTE_H_FORMAT 4158 G1PPRL_DOUBLE_H_FORMAT 4159 G1PPRL_BYTE_H_FORMAT 4160 G1PPRL_BYTE_H_FORMAT, 4161 "", "", 4162 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 4163 "(bytes)", "(bytes)"); 4164 } 4165 4166 // It takes as a parameter a reference to one of the _hum_* fields, it 4167 // deduces the corresponding value for a region in a humongous region 4168 // series (either the region size, or what's left if the _hum_* field 4169 // is < the region size), and updates the _hum_* field accordingly. 4170 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4171 size_t bytes = 0; 4172 // The > 0 check is to deal with the prev and next live bytes which 4173 // could be 0. 4174 if (*hum_bytes > 0) { 4175 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4176 *hum_bytes -= bytes; 4177 } 4178 return bytes; 4179 } 4180 4181 // It deduces the values for a region in a humongous region series 4182 // from the _hum_* fields and updates those accordingly. It assumes 4183 // that that _hum_* fields have already been set up from the "starts 4184 // humongous" region and we visit the regions in address order. 4185 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4186 size_t* capacity_bytes, 4187 size_t* prev_live_bytes, 4188 size_t* next_live_bytes) { 4189 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4190 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4191 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4192 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4193 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4194 } 4195 4196 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4197 const char* type = r->get_type_str(); 4198 HeapWord* bottom = r->bottom(); 4199 HeapWord* end = r->end(); 4200 size_t capacity_bytes = r->capacity(); 4201 size_t used_bytes = r->used(); 4202 size_t prev_live_bytes = r->live_bytes(); 4203 size_t next_live_bytes = r->next_live_bytes(); 4204 double gc_eff = r->gc_efficiency(); 4205 size_t remset_bytes = r->rem_set()->mem_size(); 4206 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 4207 4208 if (r->is_starts_humongous()) { 4209 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4210 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4211 "they should have been zeroed after the last time we used them"); 4212 // Set up the _hum_* fields. 4213 _hum_capacity_bytes = capacity_bytes; 4214 _hum_used_bytes = used_bytes; 4215 _hum_prev_live_bytes = prev_live_bytes; 4216 _hum_next_live_bytes = next_live_bytes; 4217 get_hum_bytes(&used_bytes, &capacity_bytes, 4218 &prev_live_bytes, &next_live_bytes); 4219 end = bottom + HeapRegion::GrainWords; 4220 } else if (r->is_continues_humongous()) { 4221 get_hum_bytes(&used_bytes, &capacity_bytes, 4222 &prev_live_bytes, &next_live_bytes); 4223 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4224 } 4225 4226 _total_used_bytes += used_bytes; 4227 _total_capacity_bytes += capacity_bytes; 4228 _total_prev_live_bytes += prev_live_bytes; 4229 _total_next_live_bytes += next_live_bytes; 4230 _total_remset_bytes += remset_bytes; 4231 _total_strong_code_roots_bytes += strong_code_roots_bytes; 4232 4233 // Print a line for this particular region. 4234 _out->print_cr(G1PPRL_LINE_PREFIX 4235 G1PPRL_TYPE_FORMAT 4236 G1PPRL_ADDR_BASE_FORMAT 4237 G1PPRL_BYTE_FORMAT 4238 G1PPRL_BYTE_FORMAT 4239 G1PPRL_BYTE_FORMAT 4240 G1PPRL_DOUBLE_FORMAT 4241 G1PPRL_BYTE_FORMAT 4242 G1PPRL_BYTE_FORMAT, 4243 type, p2i(bottom), p2i(end), 4244 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 4245 remset_bytes, strong_code_roots_bytes); 4246 4247 return false; 4248 } 4249 4250 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4251 // add static memory usages to remembered set sizes 4252 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 4253 // Print the footer of the output. 4254 _out->print_cr(G1PPRL_LINE_PREFIX); 4255 _out->print_cr(G1PPRL_LINE_PREFIX 4256 " SUMMARY" 4257 G1PPRL_SUM_MB_FORMAT("capacity") 4258 G1PPRL_SUM_MB_PERC_FORMAT("used") 4259 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4260 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 4261 G1PPRL_SUM_MB_FORMAT("remset") 4262 G1PPRL_SUM_MB_FORMAT("code-roots"), 4263 bytes_to_mb(_total_capacity_bytes), 4264 bytes_to_mb(_total_used_bytes), 4265 perc(_total_used_bytes, _total_capacity_bytes), 4266 bytes_to_mb(_total_prev_live_bytes), 4267 perc(_total_prev_live_bytes, _total_capacity_bytes), 4268 bytes_to_mb(_total_next_live_bytes), 4269 perc(_total_next_live_bytes, _total_capacity_bytes), 4270 bytes_to_mb(_total_remset_bytes), 4271 bytes_to_mb(_total_strong_code_roots_bytes)); 4272 _out->cr(); 4273 }