1 /* 2 * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 32 #include "gc_implementation/g1/g1Log.hpp" 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 34 #include "gc_implementation/g1/g1RemSet.hpp" 35 #include "gc_implementation/g1/heapRegion.inline.hpp" 36 #include "gc_implementation/g1/heapRegionRemSet.hpp" 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 38 #include "gc_implementation/shared/vmGCOperations.hpp" 39 #include "memory/genOopClosures.inline.hpp" 40 #include "memory/referencePolicy.hpp" 41 #include "memory/resourceArea.hpp" 42 #include "oops/oop.inline.hpp" 43 #include "runtime/handles.inline.hpp" 44 #include "runtime/java.hpp" 45 #include "services/memTracker.hpp" 46 47 // Concurrent marking bit map wrapper 48 49 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) : 50 _bm((uintptr_t*)NULL,0), 51 _shifter(shifter) { 52 _bmStartWord = (HeapWord*)(rs.base()); 53 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 54 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 55 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 56 57 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC); 58 59 guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map"); 60 // For now we'll just commit all of the bit map up fromt. 61 // Later on we'll try to be more parsimonious with swap. 62 guarantee(_virtual_space.initialize(brs, brs.size()), 63 "couldn't reseve backing store for concurrent marking bit map"); 64 assert(_virtual_space.committed_size() == brs.size(), 65 "didn't reserve backing store for all of concurrent marking bit map?"); 66 _bm.set_map((uintptr_t*)_virtual_space.low()); 67 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 68 _bmWordSize, "inconsistency in bit map sizing"); 69 _bm.set_size(_bmWordSize >> _shifter); 70 } 71 72 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 73 HeapWord* limit) const { 74 // First we must round addr *up* to a possible object boundary. 75 addr = (HeapWord*)align_size_up((intptr_t)addr, 76 HeapWordSize << _shifter); 77 size_t addrOffset = heapWordToOffset(addr); 78 if (limit == NULL) { 79 limit = _bmStartWord + _bmWordSize; 80 } 81 size_t limitOffset = heapWordToOffset(limit); 82 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 83 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 84 assert(nextAddr >= addr, "get_next_one postcondition"); 85 assert(nextAddr == limit || isMarked(nextAddr), 86 "get_next_one postcondition"); 87 return nextAddr; 88 } 89 90 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 91 HeapWord* limit) const { 92 size_t addrOffset = heapWordToOffset(addr); 93 if (limit == NULL) { 94 limit = _bmStartWord + _bmWordSize; 95 } 96 size_t limitOffset = heapWordToOffset(limit); 97 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 98 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 99 assert(nextAddr >= addr, "get_next_one postcondition"); 100 assert(nextAddr == limit || !isMarked(nextAddr), 101 "get_next_one postcondition"); 102 return nextAddr; 103 } 104 105 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 106 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 107 return (int) (diff >> _shifter); 108 } 109 110 #ifndef PRODUCT 111 bool CMBitMapRO::covers(ReservedSpace rs) const { 112 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 113 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 114 "size inconsistency"); 115 return _bmStartWord == (HeapWord*)(rs.base()) && 116 _bmWordSize == rs.size()>>LogHeapWordSize; 117 } 118 #endif 119 120 void CMBitMap::clearAll() { 121 _bm.clear(); 122 return; 123 } 124 125 void CMBitMap::markRange(MemRegion mr) { 126 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 127 assert(!mr.is_empty(), "unexpected empty region"); 128 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 129 ((HeapWord *) mr.end())), 130 "markRange memory region end is not card aligned"); 131 // convert address range into offset range 132 _bm.at_put_range(heapWordToOffset(mr.start()), 133 heapWordToOffset(mr.end()), true); 134 } 135 136 void CMBitMap::clearRange(MemRegion mr) { 137 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 138 assert(!mr.is_empty(), "unexpected empty region"); 139 // convert address range into offset range 140 _bm.at_put_range(heapWordToOffset(mr.start()), 141 heapWordToOffset(mr.end()), false); 142 } 143 144 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 145 HeapWord* end_addr) { 146 HeapWord* start = getNextMarkedWordAddress(addr); 147 start = MIN2(start, end_addr); 148 HeapWord* end = getNextUnmarkedWordAddress(start); 149 end = MIN2(end, end_addr); 150 assert(start <= end, "Consistency check"); 151 MemRegion mr(start, end); 152 if (!mr.is_empty()) { 153 clearRange(mr); 154 } 155 return mr; 156 } 157 158 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 159 _base(NULL), _cm(cm) 160 #ifdef ASSERT 161 , _drain_in_progress(false) 162 , _drain_in_progress_yields(false) 163 #endif 164 {} 165 166 void CMMarkStack::allocate(size_t size) { 167 _base = NEW_C_HEAP_ARRAY(oop, size, mtGC); 168 if (_base == NULL) { 169 vm_exit_during_initialization("Failed to allocate CM region mark stack"); 170 } 171 _index = 0; 172 _capacity = (jint) size; 173 _saved_index = -1; 174 NOT_PRODUCT(_max_depth = 0); 175 } 176 177 CMMarkStack::~CMMarkStack() { 178 if (_base != NULL) { 179 FREE_C_HEAP_ARRAY(oop, _base, mtGC); 180 } 181 } 182 183 void CMMarkStack::par_push(oop ptr) { 184 while (true) { 185 if (isFull()) { 186 _overflow = true; 187 return; 188 } 189 // Otherwise... 190 jint index = _index; 191 jint next_index = index+1; 192 jint res = Atomic::cmpxchg(next_index, &_index, index); 193 if (res == index) { 194 _base[index] = ptr; 195 // Note that we don't maintain this atomically. We could, but it 196 // doesn't seem necessary. 197 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 198 return; 199 } 200 // Otherwise, we need to try again. 201 } 202 } 203 204 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 205 while (true) { 206 if (isFull()) { 207 _overflow = true; 208 return; 209 } 210 // Otherwise... 211 jint index = _index; 212 jint next_index = index + n; 213 if (next_index > _capacity) { 214 _overflow = true; 215 return; 216 } 217 jint res = Atomic::cmpxchg(next_index, &_index, index); 218 if (res == index) { 219 for (int i = 0; i < n; i++) { 220 int ind = index + i; 221 assert(ind < _capacity, "By overflow test above."); 222 _base[ind] = ptr_arr[i]; 223 } 224 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 225 return; 226 } 227 // Otherwise, we need to try again. 228 } 229 } 230 231 232 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 233 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 234 jint start = _index; 235 jint next_index = start + n; 236 if (next_index > _capacity) { 237 _overflow = true; 238 return; 239 } 240 // Otherwise. 241 _index = next_index; 242 for (int i = 0; i < n; i++) { 243 int ind = start + i; 244 assert(ind < _capacity, "By overflow test above."); 245 _base[ind] = ptr_arr[i]; 246 } 247 } 248 249 250 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 251 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 252 jint index = _index; 253 if (index == 0) { 254 *n = 0; 255 return false; 256 } else { 257 int k = MIN2(max, index); 258 jint new_ind = index - k; 259 for (int j = 0; j < k; j++) { 260 ptr_arr[j] = _base[new_ind + j]; 261 } 262 _index = new_ind; 263 *n = k; 264 return true; 265 } 266 } 267 268 template<class OopClosureClass> 269 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 270 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 271 || SafepointSynchronize::is_at_safepoint(), 272 "Drain recursion must be yield-safe."); 273 bool res = true; 274 debug_only(_drain_in_progress = true); 275 debug_only(_drain_in_progress_yields = yield_after); 276 while (!isEmpty()) { 277 oop newOop = pop(); 278 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 279 assert(newOop->is_oop(), "Expected an oop"); 280 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 281 "only grey objects on this stack"); 282 newOop->oop_iterate(cl); 283 if (yield_after && _cm->do_yield_check()) { 284 res = false; 285 break; 286 } 287 } 288 debug_only(_drain_in_progress = false); 289 return res; 290 } 291 292 void CMMarkStack::note_start_of_gc() { 293 assert(_saved_index == -1, 294 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 295 _saved_index = _index; 296 } 297 298 void CMMarkStack::note_end_of_gc() { 299 // This is intentionally a guarantee, instead of an assert. If we 300 // accidentally add something to the mark stack during GC, it 301 // will be a correctness issue so it's better if we crash. we'll 302 // only check this once per GC anyway, so it won't be a performance 303 // issue in any way. 304 guarantee(_saved_index == _index, 305 err_msg("saved index: %d index: %d", _saved_index, _index)); 306 _saved_index = -1; 307 } 308 309 void CMMarkStack::oops_do(OopClosure* f) { 310 assert(_saved_index == _index, 311 err_msg("saved index: %d index: %d", _saved_index, _index)); 312 for (int i = 0; i < _index; i += 1) { 313 f->do_oop(&_base[i]); 314 } 315 } 316 317 bool ConcurrentMark::not_yet_marked(oop obj) const { 318 return (_g1h->is_obj_ill(obj) 319 || (_g1h->is_in_permanent(obj) 320 && !nextMarkBitMap()->isMarked((HeapWord*)obj))); 321 } 322 323 CMRootRegions::CMRootRegions() : 324 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 325 _should_abort(false), _next_survivor(NULL) { } 326 327 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 328 _young_list = g1h->young_list(); 329 _cm = cm; 330 } 331 332 void CMRootRegions::prepare_for_scan() { 333 assert(!scan_in_progress(), "pre-condition"); 334 335 // Currently, only survivors can be root regions. 336 assert(_next_survivor == NULL, "pre-condition"); 337 _next_survivor = _young_list->first_survivor_region(); 338 _scan_in_progress = (_next_survivor != NULL); 339 _should_abort = false; 340 } 341 342 HeapRegion* CMRootRegions::claim_next() { 343 if (_should_abort) { 344 // If someone has set the should_abort flag, we return NULL to 345 // force the caller to bail out of their loop. 346 return NULL; 347 } 348 349 // Currently, only survivors can be root regions. 350 HeapRegion* res = _next_survivor; 351 if (res != NULL) { 352 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 353 // Read it again in case it changed while we were waiting for the lock. 354 res = _next_survivor; 355 if (res != NULL) { 356 if (res == _young_list->last_survivor_region()) { 357 // We just claimed the last survivor so store NULL to indicate 358 // that we're done. 359 _next_survivor = NULL; 360 } else { 361 _next_survivor = res->get_next_young_region(); 362 } 363 } else { 364 // Someone else claimed the last survivor while we were trying 365 // to take the lock so nothing else to do. 366 } 367 } 368 assert(res == NULL || res->is_survivor(), "post-condition"); 369 370 return res; 371 } 372 373 void CMRootRegions::scan_finished() { 374 assert(scan_in_progress(), "pre-condition"); 375 376 // Currently, only survivors can be root regions. 377 if (!_should_abort) { 378 assert(_next_survivor == NULL, "we should have claimed all survivors"); 379 } 380 _next_survivor = NULL; 381 382 { 383 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 384 _scan_in_progress = false; 385 RootRegionScan_lock->notify_all(); 386 } 387 } 388 389 bool CMRootRegions::wait_until_scan_finished() { 390 if (!scan_in_progress()) return false; 391 392 { 393 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 394 while (scan_in_progress()) { 395 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 396 } 397 } 398 return true; 399 } 400 401 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 402 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 403 #endif // _MSC_VER 404 405 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 406 return MAX2((n_par_threads + 2) / 4, 1U); 407 } 408 409 ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) : 410 _markBitMap1(rs, MinObjAlignment - 1), 411 _markBitMap2(rs, MinObjAlignment - 1), 412 413 _parallel_marking_threads(0), 414 _max_parallel_marking_threads(0), 415 _sleep_factor(0.0), 416 _marking_task_overhead(1.0), 417 _cleanup_sleep_factor(0.0), 418 _cleanup_task_overhead(1.0), 419 _cleanup_list("Cleanup List"), 420 _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/), 421 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 422 CardTableModRefBS::card_shift, 423 false /* in_resource_area*/), 424 425 _prevMarkBitMap(&_markBitMap1), 426 _nextMarkBitMap(&_markBitMap2), 427 428 _markStack(this), 429 // _finger set in set_non_marking_state 430 431 _max_task_num(MAX2((uint)ParallelGCThreads, 1U)), 432 // _active_tasks set in set_non_marking_state 433 // _tasks set inside the constructor 434 _task_queues(new CMTaskQueueSet((int) _max_task_num)), 435 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 436 437 _has_overflown(false), 438 _concurrent(false), 439 _has_aborted(false), 440 _restart_for_overflow(false), 441 _concurrent_marking_in_progress(false), 442 443 // _verbose_level set below 444 445 _init_times(), 446 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 447 _cleanup_times(), 448 _total_counting_time(0.0), 449 _total_rs_scrub_time(0.0), 450 451 _parallel_workers(NULL), 452 453 _count_card_bitmaps(NULL), 454 _count_marked_bytes(NULL) { 455 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 456 if (verbose_level < no_verbose) { 457 verbose_level = no_verbose; 458 } 459 if (verbose_level > high_verbose) { 460 verbose_level = high_verbose; 461 } 462 _verbose_level = verbose_level; 463 464 if (verbose_low()) { 465 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 466 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 467 } 468 469 _markStack.allocate(MarkStackSize); 470 471 // Create & start a ConcurrentMark thread. 472 _cmThread = new ConcurrentMarkThread(this); 473 assert(cmThread() != NULL, "CM Thread should have been created"); 474 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 475 476 _g1h = G1CollectedHeap::heap(); 477 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 478 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 479 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 480 481 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 482 satb_qs.set_buffer_size(G1SATBBufferSize); 483 484 _root_regions.init(_g1h, this); 485 486 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num, mtGC); 487 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num, mtGC); 488 489 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_task_num, mtGC); 490 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num, mtGC); 491 492 BitMap::idx_t card_bm_size = _card_bm.size(); 493 494 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 495 _active_tasks = _max_task_num; 496 for (int i = 0; i < (int) _max_task_num; ++i) { 497 CMTaskQueue* task_queue = new CMTaskQueue(); 498 task_queue->initialize(); 499 _task_queues->register_queue(i, task_queue); 500 501 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 502 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC); 503 504 _tasks[i] = new CMTask(i, this, 505 _count_marked_bytes[i], 506 &_count_card_bitmaps[i], 507 task_queue, _task_queues); 508 509 _accum_task_vtime[i] = 0.0; 510 } 511 512 // Calculate the card number for the bottom of the heap. Used 513 // in biasing indexes into the accounting card bitmaps. 514 _heap_bottom_card_num = 515 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 516 CardTableModRefBS::card_shift); 517 518 // Clear all the liveness counting data 519 clear_all_count_data(); 520 521 if (ConcGCThreads > ParallelGCThreads) { 522 vm_exit_during_initialization("Can't have more ConcGCThreads " 523 "than ParallelGCThreads."); 524 } 525 if (ParallelGCThreads == 0) { 526 // if we are not running with any parallel GC threads we will not 527 // spawn any marking threads either 528 _parallel_marking_threads = 0; 529 _max_parallel_marking_threads = 0; 530 _sleep_factor = 0.0; 531 _marking_task_overhead = 1.0; 532 } else { 533 if (ConcGCThreads > 0) { 534 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 535 // if both are set 536 537 _parallel_marking_threads = (uint) ConcGCThreads; 538 _max_parallel_marking_threads = _parallel_marking_threads; 539 _sleep_factor = 0.0; 540 _marking_task_overhead = 1.0; 541 } else if (G1MarkingOverheadPercent > 0) { 542 // we will calculate the number of parallel marking threads 543 // based on a target overhead with respect to the soft real-time 544 // goal 545 546 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 547 double overall_cm_overhead = 548 (double) MaxGCPauseMillis * marking_overhead / 549 (double) GCPauseIntervalMillis; 550 double cpu_ratio = 1.0 / (double) os::processor_count(); 551 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 552 double marking_task_overhead = 553 overall_cm_overhead / marking_thread_num * 554 (double) os::processor_count(); 555 double sleep_factor = 556 (1.0 - marking_task_overhead) / marking_task_overhead; 557 558 _parallel_marking_threads = (uint) marking_thread_num; 559 _max_parallel_marking_threads = _parallel_marking_threads; 560 _sleep_factor = sleep_factor; 561 _marking_task_overhead = marking_task_overhead; 562 } else { 563 _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads); 564 _max_parallel_marking_threads = _parallel_marking_threads; 565 _sleep_factor = 0.0; 566 _marking_task_overhead = 1.0; 567 } 568 569 if (parallel_marking_threads() > 1) { 570 _cleanup_task_overhead = 1.0; 571 } else { 572 _cleanup_task_overhead = marking_task_overhead(); 573 } 574 _cleanup_sleep_factor = 575 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 576 577 #if 0 578 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 579 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 580 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 581 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 582 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 583 #endif 584 585 guarantee(parallel_marking_threads() > 0, "peace of mind"); 586 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 587 _max_parallel_marking_threads, false, true); 588 if (_parallel_workers == NULL) { 589 vm_exit_during_initialization("Failed necessary allocation."); 590 } else { 591 _parallel_workers->initialize_workers(); 592 } 593 } 594 595 // so that the call below can read a sensible value 596 _heap_start = (HeapWord*) rs.base(); 597 set_non_marking_state(); 598 } 599 600 void ConcurrentMark::update_g1_committed(bool force) { 601 // If concurrent marking is not in progress, then we do not need to 602 // update _heap_end. 603 if (!concurrent_marking_in_progress() && !force) return; 604 605 MemRegion committed = _g1h->g1_committed(); 606 assert(committed.start() == _heap_start, "start shouldn't change"); 607 HeapWord* new_end = committed.end(); 608 if (new_end > _heap_end) { 609 // The heap has been expanded. 610 611 _heap_end = new_end; 612 } 613 // Notice that the heap can also shrink. However, this only happens 614 // during a Full GC (at least currently) and the entire marking 615 // phase will bail out and the task will not be restarted. So, let's 616 // do nothing. 617 } 618 619 void ConcurrentMark::reset() { 620 // Starting values for these two. This should be called in a STW 621 // phase. CM will be notified of any future g1_committed expansions 622 // will be at the end of evacuation pauses, when tasks are 623 // inactive. 624 MemRegion committed = _g1h->g1_committed(); 625 _heap_start = committed.start(); 626 _heap_end = committed.end(); 627 628 // Separated the asserts so that we know which one fires. 629 assert(_heap_start != NULL, "heap bounds should look ok"); 630 assert(_heap_end != NULL, "heap bounds should look ok"); 631 assert(_heap_start < _heap_end, "heap bounds should look ok"); 632 633 // reset all the marking data structures and any necessary flags 634 clear_marking_state(); 635 636 if (verbose_low()) { 637 gclog_or_tty->print_cr("[global] resetting"); 638 } 639 640 // We do reset all of them, since different phases will use 641 // different number of active threads. So, it's easiest to have all 642 // of them ready. 643 for (int i = 0; i < (int) _max_task_num; ++i) { 644 _tasks[i]->reset(_nextMarkBitMap); 645 } 646 647 // we need this to make sure that the flag is on during the evac 648 // pause with initial mark piggy-backed 649 set_concurrent_marking_in_progress(); 650 } 651 652 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) { 653 assert(active_tasks <= _max_task_num, "we should not have more"); 654 655 _active_tasks = active_tasks; 656 // Need to update the three data structures below according to the 657 // number of active threads for this phase. 658 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 659 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 660 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 661 662 _concurrent = concurrent; 663 // We propagate this to all tasks, not just the active ones. 664 for (int i = 0; i < (int) _max_task_num; ++i) 665 _tasks[i]->set_concurrent(concurrent); 666 667 if (concurrent) { 668 set_concurrent_marking_in_progress(); 669 } else { 670 // We currently assume that the concurrent flag has been set to 671 // false before we start remark. At this point we should also be 672 // in a STW phase. 673 assert(!concurrent_marking_in_progress(), "invariant"); 674 assert(_finger == _heap_end, "only way to get here"); 675 update_g1_committed(true); 676 } 677 } 678 679 void ConcurrentMark::set_non_marking_state() { 680 // We set the global marking state to some default values when we're 681 // not doing marking. 682 clear_marking_state(); 683 _active_tasks = 0; 684 clear_concurrent_marking_in_progress(); 685 } 686 687 ConcurrentMark::~ConcurrentMark() { 688 // The ConcurrentMark instance is never freed. 689 ShouldNotReachHere(); 690 } 691 692 void ConcurrentMark::clearNextBitmap() { 693 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 694 G1CollectorPolicy* g1p = g1h->g1_policy(); 695 696 // Make sure that the concurrent mark thread looks to still be in 697 // the current cycle. 698 guarantee(cmThread()->during_cycle(), "invariant"); 699 700 // We are finishing up the current cycle by clearing the next 701 // marking bitmap and getting it ready for the next cycle. During 702 // this time no other cycle can start. So, let's make sure that this 703 // is the case. 704 guarantee(!g1h->mark_in_progress(), "invariant"); 705 706 // clear the mark bitmap (no grey objects to start with). 707 // We need to do this in chunks and offer to yield in between 708 // each chunk. 709 HeapWord* start = _nextMarkBitMap->startWord(); 710 HeapWord* end = _nextMarkBitMap->endWord(); 711 HeapWord* cur = start; 712 size_t chunkSize = M; 713 while (cur < end) { 714 HeapWord* next = cur + chunkSize; 715 if (next > end) { 716 next = end; 717 } 718 MemRegion mr(cur,next); 719 _nextMarkBitMap->clearRange(mr); 720 cur = next; 721 do_yield_check(); 722 723 // Repeat the asserts from above. We'll do them as asserts here to 724 // minimize their overhead on the product. However, we'll have 725 // them as guarantees at the beginning / end of the bitmap 726 // clearing to get some checking in the product. 727 assert(cmThread()->during_cycle(), "invariant"); 728 assert(!g1h->mark_in_progress(), "invariant"); 729 } 730 731 // Clear the liveness counting data 732 clear_all_count_data(); 733 734 // Repeat the asserts from above. 735 guarantee(cmThread()->during_cycle(), "invariant"); 736 guarantee(!g1h->mark_in_progress(), "invariant"); 737 } 738 739 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 740 public: 741 bool doHeapRegion(HeapRegion* r) { 742 if (!r->continuesHumongous()) { 743 r->note_start_of_marking(); 744 } 745 return false; 746 } 747 }; 748 749 void ConcurrentMark::checkpointRootsInitialPre() { 750 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 751 G1CollectorPolicy* g1p = g1h->g1_policy(); 752 753 _has_aborted = false; 754 755 #ifndef PRODUCT 756 if (G1PrintReachableAtInitialMark) { 757 print_reachable("at-cycle-start", 758 VerifyOption_G1UsePrevMarking, true /* all */); 759 } 760 #endif 761 762 // Initialise marking structures. This has to be done in a STW phase. 763 reset(); 764 765 // For each region note start of marking. 766 NoteStartOfMarkHRClosure startcl; 767 g1h->heap_region_iterate(&startcl); 768 } 769 770 771 void ConcurrentMark::checkpointRootsInitialPost() { 772 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 773 774 // If we force an overflow during remark, the remark operation will 775 // actually abort and we'll restart concurrent marking. If we always 776 // force an oveflow during remark we'll never actually complete the 777 // marking phase. So, we initilize this here, at the start of the 778 // cycle, so that at the remaining overflow number will decrease at 779 // every remark and we'll eventually not need to cause one. 780 force_overflow_stw()->init(); 781 782 // Start Concurrent Marking weak-reference discovery. 783 ReferenceProcessor* rp = g1h->ref_processor_cm(); 784 // enable ("weak") refs discovery 785 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 786 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 787 788 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 789 // This is the start of the marking cycle, we're expected all 790 // threads to have SATB queues with active set to false. 791 satb_mq_set.set_active_all_threads(true, /* new active value */ 792 false /* expected_active */); 793 794 _root_regions.prepare_for_scan(); 795 796 // update_g1_committed() will be called at the end of an evac pause 797 // when marking is on. So, it's also called at the end of the 798 // initial-mark pause to update the heap end, if the heap expands 799 // during it. No need to call it here. 800 } 801 802 /* 803 * Notice that in the next two methods, we actually leave the STS 804 * during the barrier sync and join it immediately afterwards. If we 805 * do not do this, the following deadlock can occur: one thread could 806 * be in the barrier sync code, waiting for the other thread to also 807 * sync up, whereas another one could be trying to yield, while also 808 * waiting for the other threads to sync up too. 809 * 810 * Note, however, that this code is also used during remark and in 811 * this case we should not attempt to leave / enter the STS, otherwise 812 * we'll either hit an asseert (debug / fastdebug) or deadlock 813 * (product). So we should only leave / enter the STS if we are 814 * operating concurrently. 815 * 816 * Because the thread that does the sync barrier has left the STS, it 817 * is possible to be suspended for a Full GC or an evacuation pause 818 * could occur. This is actually safe, since the entering the sync 819 * barrier is one of the last things do_marking_step() does, and it 820 * doesn't manipulate any data structures afterwards. 821 */ 822 823 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 824 if (verbose_low()) { 825 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 826 } 827 828 if (concurrent()) { 829 ConcurrentGCThread::stsLeave(); 830 } 831 _first_overflow_barrier_sync.enter(); 832 if (concurrent()) { 833 ConcurrentGCThread::stsJoin(); 834 } 835 // at this point everyone should have synced up and not be doing any 836 // more work 837 838 if (verbose_low()) { 839 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 840 } 841 842 // let task 0 do this 843 if (task_num == 0) { 844 // task 0 is responsible for clearing the global data structures 845 // We should be here because of an overflow. During STW we should 846 // not clear the overflow flag since we rely on it being true when 847 // we exit this method to abort the pause and restart concurent 848 // marking. 849 clear_marking_state(concurrent() /* clear_overflow */); 850 force_overflow()->update(); 851 852 if (G1Log::fine()) { 853 gclog_or_tty->date_stamp(PrintGCDateStamps); 854 gclog_or_tty->stamp(PrintGCTimeStamps); 855 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 856 } 857 } 858 859 // after this, each task should reset its own data structures then 860 // then go into the second barrier 861 } 862 863 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 864 if (verbose_low()) { 865 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 866 } 867 868 if (concurrent()) { 869 ConcurrentGCThread::stsLeave(); 870 } 871 _second_overflow_barrier_sync.enter(); 872 if (concurrent()) { 873 ConcurrentGCThread::stsJoin(); 874 } 875 // at this point everything should be re-initialised and ready to go 876 877 if (verbose_low()) { 878 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 879 } 880 } 881 882 #ifndef PRODUCT 883 void ForceOverflowSettings::init() { 884 _num_remaining = G1ConcMarkForceOverflow; 885 _force = false; 886 update(); 887 } 888 889 void ForceOverflowSettings::update() { 890 if (_num_remaining > 0) { 891 _num_remaining -= 1; 892 _force = true; 893 } else { 894 _force = false; 895 } 896 } 897 898 bool ForceOverflowSettings::should_force() { 899 if (_force) { 900 _force = false; 901 return true; 902 } else { 903 return false; 904 } 905 } 906 #endif // !PRODUCT 907 908 class CMConcurrentMarkingTask: public AbstractGangTask { 909 private: 910 ConcurrentMark* _cm; 911 ConcurrentMarkThread* _cmt; 912 913 public: 914 void work(uint worker_id) { 915 assert(Thread::current()->is_ConcurrentGC_thread(), 916 "this should only be done by a conc GC thread"); 917 ResourceMark rm; 918 919 double start_vtime = os::elapsedVTime(); 920 921 ConcurrentGCThread::stsJoin(); 922 923 assert(worker_id < _cm->active_tasks(), "invariant"); 924 CMTask* the_task = _cm->task(worker_id); 925 the_task->record_start_time(); 926 if (!_cm->has_aborted()) { 927 do { 928 double start_vtime_sec = os::elapsedVTime(); 929 double start_time_sec = os::elapsedTime(); 930 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 931 932 the_task->do_marking_step(mark_step_duration_ms, 933 true /* do_stealing */, 934 true /* do_termination */); 935 936 double end_time_sec = os::elapsedTime(); 937 double end_vtime_sec = os::elapsedVTime(); 938 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 939 double elapsed_time_sec = end_time_sec - start_time_sec; 940 _cm->clear_has_overflown(); 941 942 bool ret = _cm->do_yield_check(worker_id); 943 944 jlong sleep_time_ms; 945 if (!_cm->has_aborted() && the_task->has_aborted()) { 946 sleep_time_ms = 947 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 948 ConcurrentGCThread::stsLeave(); 949 os::sleep(Thread::current(), sleep_time_ms, false); 950 ConcurrentGCThread::stsJoin(); 951 } 952 double end_time2_sec = os::elapsedTime(); 953 double elapsed_time2_sec = end_time2_sec - start_time_sec; 954 955 #if 0 956 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 957 "overhead %1.4lf", 958 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 959 the_task->conc_overhead(os::elapsedTime()) * 8.0); 960 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 961 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 962 #endif 963 } while (!_cm->has_aborted() && the_task->has_aborted()); 964 } 965 the_task->record_end_time(); 966 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 967 968 ConcurrentGCThread::stsLeave(); 969 970 double end_vtime = os::elapsedVTime(); 971 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 972 } 973 974 CMConcurrentMarkingTask(ConcurrentMark* cm, 975 ConcurrentMarkThread* cmt) : 976 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 977 978 ~CMConcurrentMarkingTask() { } 979 }; 980 981 // Calculates the number of active workers for a concurrent 982 // phase. 983 uint ConcurrentMark::calc_parallel_marking_threads() { 984 if (G1CollectedHeap::use_parallel_gc_threads()) { 985 uint n_conc_workers = 0; 986 if (!UseDynamicNumberOfGCThreads || 987 (!FLAG_IS_DEFAULT(ConcGCThreads) && 988 !ForceDynamicNumberOfGCThreads)) { 989 n_conc_workers = max_parallel_marking_threads(); 990 } else { 991 n_conc_workers = 992 AdaptiveSizePolicy::calc_default_active_workers( 993 max_parallel_marking_threads(), 994 1, /* Minimum workers */ 995 parallel_marking_threads(), 996 Threads::number_of_non_daemon_threads()); 997 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 998 // that scaling has already gone into "_max_parallel_marking_threads". 999 } 1000 assert(n_conc_workers > 0, "Always need at least 1"); 1001 return n_conc_workers; 1002 } 1003 // If we are not running with any parallel GC threads we will not 1004 // have spawned any marking threads either. Hence the number of 1005 // concurrent workers should be 0. 1006 return 0; 1007 } 1008 1009 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1010 // Currently, only survivors can be root regions. 1011 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1012 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1013 1014 const uintx interval = PrefetchScanIntervalInBytes; 1015 HeapWord* curr = hr->bottom(); 1016 const HeapWord* end = hr->top(); 1017 while (curr < end) { 1018 Prefetch::read(curr, interval); 1019 oop obj = oop(curr); 1020 int size = obj->oop_iterate(&cl); 1021 assert(size == obj->size(), "sanity"); 1022 curr += size; 1023 } 1024 } 1025 1026 class CMRootRegionScanTask : public AbstractGangTask { 1027 private: 1028 ConcurrentMark* _cm; 1029 1030 public: 1031 CMRootRegionScanTask(ConcurrentMark* cm) : 1032 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1033 1034 void work(uint worker_id) { 1035 assert(Thread::current()->is_ConcurrentGC_thread(), 1036 "this should only be done by a conc GC thread"); 1037 1038 CMRootRegions* root_regions = _cm->root_regions(); 1039 HeapRegion* hr = root_regions->claim_next(); 1040 while (hr != NULL) { 1041 _cm->scanRootRegion(hr, worker_id); 1042 hr = root_regions->claim_next(); 1043 } 1044 } 1045 }; 1046 1047 void ConcurrentMark::scanRootRegions() { 1048 // scan_in_progress() will have been set to true only if there was 1049 // at least one root region to scan. So, if it's false, we 1050 // should not attempt to do any further work. 1051 if (root_regions()->scan_in_progress()) { 1052 _parallel_marking_threads = calc_parallel_marking_threads(); 1053 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1054 "Maximum number of marking threads exceeded"); 1055 uint active_workers = MAX2(1U, parallel_marking_threads()); 1056 1057 CMRootRegionScanTask task(this); 1058 if (parallel_marking_threads() > 0) { 1059 _parallel_workers->set_active_workers((int) active_workers); 1060 _parallel_workers->run_task(&task); 1061 } else { 1062 task.work(0); 1063 } 1064 1065 // It's possible that has_aborted() is true here without actually 1066 // aborting the survivor scan earlier. This is OK as it's 1067 // mainly used for sanity checking. 1068 root_regions()->scan_finished(); 1069 } 1070 } 1071 1072 void ConcurrentMark::markFromRoots() { 1073 // we might be tempted to assert that: 1074 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1075 // "inconsistent argument?"); 1076 // However that wouldn't be right, because it's possible that 1077 // a safepoint is indeed in progress as a younger generation 1078 // stop-the-world GC happens even as we mark in this generation. 1079 1080 _restart_for_overflow = false; 1081 force_overflow_conc()->init(); 1082 1083 // _g1h has _n_par_threads 1084 _parallel_marking_threads = calc_parallel_marking_threads(); 1085 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1086 "Maximum number of marking threads exceeded"); 1087 1088 uint active_workers = MAX2(1U, parallel_marking_threads()); 1089 1090 // Parallel task terminator is set in "set_phase()" 1091 set_phase(active_workers, true /* concurrent */); 1092 1093 CMConcurrentMarkingTask markingTask(this, cmThread()); 1094 if (parallel_marking_threads() > 0) { 1095 _parallel_workers->set_active_workers((int)active_workers); 1096 // Don't set _n_par_threads because it affects MT in proceess_strong_roots() 1097 // and the decisions on that MT processing is made elsewhere. 1098 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1099 _parallel_workers->run_task(&markingTask); 1100 } else { 1101 markingTask.work(0); 1102 } 1103 print_stats(); 1104 } 1105 1106 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1107 // world is stopped at this checkpoint 1108 assert(SafepointSynchronize::is_at_safepoint(), 1109 "world should be stopped"); 1110 1111 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1112 1113 // If a full collection has happened, we shouldn't do this. 1114 if (has_aborted()) { 1115 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1116 return; 1117 } 1118 1119 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1120 1121 if (VerifyDuringGC) { 1122 HandleMark hm; // handle scope 1123 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1124 Universe::heap()->prepare_for_verify(); 1125 Universe::verify(/* silent */ false, 1126 /* option */ VerifyOption_G1UsePrevMarking); 1127 } 1128 1129 G1CollectorPolicy* g1p = g1h->g1_policy(); 1130 g1p->record_concurrent_mark_remark_start(); 1131 1132 double start = os::elapsedTime(); 1133 1134 checkpointRootsFinalWork(); 1135 1136 double mark_work_end = os::elapsedTime(); 1137 1138 weakRefsWork(clear_all_soft_refs); 1139 1140 if (has_overflown()) { 1141 // Oops. We overflowed. Restart concurrent marking. 1142 _restart_for_overflow = true; 1143 // Clear the flag. We do not need it any more. 1144 clear_has_overflown(); 1145 if (G1TraceMarkStackOverflow) { 1146 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1147 } 1148 } else { 1149 // Aggregate the per-task counting data that we have accumulated 1150 // while marking. 1151 aggregate_count_data(); 1152 1153 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1154 // We're done with marking. 1155 // This is the end of the marking cycle, we're expected all 1156 // threads to have SATB queues with active set to true. 1157 satb_mq_set.set_active_all_threads(false, /* new active value */ 1158 true /* expected_active */); 1159 1160 if (VerifyDuringGC) { 1161 HandleMark hm; // handle scope 1162 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1163 Universe::heap()->prepare_for_verify(); 1164 Universe::verify(/* silent */ false, 1165 /* option */ VerifyOption_G1UseNextMarking); 1166 } 1167 assert(!restart_for_overflow(), "sanity"); 1168 } 1169 1170 // Reset the marking state if marking completed 1171 if (!restart_for_overflow()) { 1172 set_non_marking_state(); 1173 } 1174 1175 #if VERIFY_OBJS_PROCESSED 1176 _scan_obj_cl.objs_processed = 0; 1177 ThreadLocalObjQueue::objs_enqueued = 0; 1178 #endif 1179 1180 // Statistics 1181 double now = os::elapsedTime(); 1182 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1183 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1184 _remark_times.add((now - start) * 1000.0); 1185 1186 g1p->record_concurrent_mark_remark_end(); 1187 } 1188 1189 // Base class of the closures that finalize and verify the 1190 // liveness counting data. 1191 class CMCountDataClosureBase: public HeapRegionClosure { 1192 protected: 1193 ConcurrentMark* _cm; 1194 BitMap* _region_bm; 1195 BitMap* _card_bm; 1196 1197 void set_card_bitmap_range(BitMap::idx_t start_idx, BitMap::idx_t last_idx) { 1198 assert(start_idx <= last_idx, "sanity"); 1199 1200 // Set the inclusive bit range [start_idx, last_idx]. 1201 // For small ranges (up to 8 cards) use a simple loop; otherwise 1202 // use par_at_put_range. 1203 if ((last_idx - start_idx) < 8) { 1204 for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) { 1205 _card_bm->par_set_bit(i); 1206 } 1207 } else { 1208 assert(last_idx < _card_bm->size(), "sanity"); 1209 // Note BitMap::par_at_put_range() is exclusive. 1210 _card_bm->par_at_put_range(start_idx, last_idx+1, true); 1211 } 1212 } 1213 1214 // It takes a region that's not empty (i.e., it has at least one 1215 // live object in it and sets its corresponding bit on the region 1216 // bitmap to 1. If the region is "starts humongous" it will also set 1217 // to 1 the bits on the region bitmap that correspond to its 1218 // associated "continues humongous" regions. 1219 void set_bit_for_region(HeapRegion* hr) { 1220 assert(!hr->continuesHumongous(), "should have filtered those out"); 1221 1222 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1223 if (!hr->startsHumongous()) { 1224 // Normal (non-humongous) case: just set the bit. 1225 _region_bm->par_at_put(index, true); 1226 } else { 1227 // Starts humongous case: calculate how many regions are part of 1228 // this humongous region and then set the bit range. 1229 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1230 _region_bm->par_at_put_range(index, end_index, true); 1231 } 1232 } 1233 1234 public: 1235 CMCountDataClosureBase(ConcurrentMark *cm, 1236 BitMap* region_bm, BitMap* card_bm): 1237 _cm(cm), _region_bm(region_bm), _card_bm(card_bm) { } 1238 }; 1239 1240 // Closure that calculates the # live objects per region. Used 1241 // for verification purposes during the cleanup pause. 1242 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1243 CMBitMapRO* _bm; 1244 size_t _region_marked_bytes; 1245 1246 public: 1247 CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm, 1248 BitMap* region_bm, BitMap* card_bm) : 1249 CMCountDataClosureBase(cm, region_bm, card_bm), 1250 _bm(bm), _region_marked_bytes(0) { } 1251 1252 bool doHeapRegion(HeapRegion* hr) { 1253 1254 if (hr->continuesHumongous()) { 1255 // We will ignore these here and process them when their 1256 // associated "starts humongous" region is processed (see 1257 // set_bit_for_heap_region()). Note that we cannot rely on their 1258 // associated "starts humongous" region to have their bit set to 1259 // 1 since, due to the region chunking in the parallel region 1260 // iteration, a "continues humongous" region might be visited 1261 // before its associated "starts humongous". 1262 return false; 1263 } 1264 1265 HeapWord* nextTop = hr->next_top_at_mark_start(); 1266 HeapWord* start = hr->bottom(); 1267 1268 assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(), 1269 err_msg("Preconditions not met - " 1270 "start: "PTR_FORMAT", nextTop: "PTR_FORMAT", end: "PTR_FORMAT, 1271 start, nextTop, hr->end())); 1272 1273 // Find the first marked object at or after "start". 1274 start = _bm->getNextMarkedWordAddress(start, nextTop); 1275 1276 size_t marked_bytes = 0; 1277 1278 while (start < nextTop) { 1279 oop obj = oop(start); 1280 int obj_sz = obj->size(); 1281 HeapWord* obj_last = start + obj_sz - 1; 1282 1283 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1284 BitMap::idx_t last_idx = _cm->card_bitmap_index_for(obj_last); 1285 1286 // Set the bits in the card BM for this object (inclusive). 1287 set_card_bitmap_range(start_idx, last_idx); 1288 1289 // Add the size of this object to the number of marked bytes. 1290 marked_bytes += (size_t)obj_sz * HeapWordSize; 1291 1292 // Find the next marked object after this one. 1293 start = _bm->getNextMarkedWordAddress(obj_last + 1, nextTop); 1294 } 1295 1296 // Mark the allocated-since-marking portion... 1297 HeapWord* top = hr->top(); 1298 if (nextTop < top) { 1299 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(nextTop); 1300 BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top - 1); 1301 1302 set_card_bitmap_range(start_idx, last_idx); 1303 1304 // This definitely means the region has live objects. 1305 set_bit_for_region(hr); 1306 } 1307 1308 // Update the live region bitmap. 1309 if (marked_bytes > 0) { 1310 set_bit_for_region(hr); 1311 } 1312 1313 // Set the marked bytes for the current region so that 1314 // it can be queried by a calling verificiation routine 1315 _region_marked_bytes = marked_bytes; 1316 1317 return false; 1318 } 1319 1320 size_t region_marked_bytes() const { return _region_marked_bytes; } 1321 }; 1322 1323 // Heap region closure used for verifying the counting data 1324 // that was accumulated concurrently and aggregated during 1325 // the remark pause. This closure is applied to the heap 1326 // regions during the STW cleanup pause. 1327 1328 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1329 ConcurrentMark* _cm; 1330 CalcLiveObjectsClosure _calc_cl; 1331 BitMap* _region_bm; // Region BM to be verified 1332 BitMap* _card_bm; // Card BM to be verified 1333 bool _verbose; // verbose output? 1334 1335 BitMap* _exp_region_bm; // Expected Region BM values 1336 BitMap* _exp_card_bm; // Expected card BM values 1337 1338 int _failures; 1339 1340 public: 1341 VerifyLiveObjectDataHRClosure(ConcurrentMark* cm, 1342 BitMap* region_bm, 1343 BitMap* card_bm, 1344 BitMap* exp_region_bm, 1345 BitMap* exp_card_bm, 1346 bool verbose) : 1347 _cm(cm), 1348 _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm), 1349 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1350 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1351 _failures(0) { } 1352 1353 int failures() const { return _failures; } 1354 1355 bool doHeapRegion(HeapRegion* hr) { 1356 if (hr->continuesHumongous()) { 1357 // We will ignore these here and process them when their 1358 // associated "starts humongous" region is processed (see 1359 // set_bit_for_heap_region()). Note that we cannot rely on their 1360 // associated "starts humongous" region to have their bit set to 1361 // 1 since, due to the region chunking in the parallel region 1362 // iteration, a "continues humongous" region might be visited 1363 // before its associated "starts humongous". 1364 return false; 1365 } 1366 1367 int failures = 0; 1368 1369 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1370 // this region and set the corresponding bits in the expected region 1371 // and card bitmaps. 1372 bool res = _calc_cl.doHeapRegion(hr); 1373 assert(res == false, "should be continuing"); 1374 1375 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1376 Mutex::_no_safepoint_check_flag); 1377 1378 // Verify the marked bytes for this region. 1379 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1380 size_t act_marked_bytes = hr->next_marked_bytes(); 1381 1382 // We're not OK if expected marked bytes > actual marked bytes. It means 1383 // we have missed accounting some objects during the actual marking. 1384 if (exp_marked_bytes > act_marked_bytes) { 1385 if (_verbose) { 1386 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1387 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1388 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1389 } 1390 failures += 1; 1391 } 1392 1393 // Verify the bit, for this region, in the actual and expected 1394 // (which was just calculated) region bit maps. 1395 // We're not OK if the bit in the calculated expected region 1396 // bitmap is set and the bit in the actual region bitmap is not. 1397 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1398 1399 bool expected = _exp_region_bm->at(index); 1400 bool actual = _region_bm->at(index); 1401 if (expected && !actual) { 1402 if (_verbose) { 1403 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1404 "expected: %s, actual: %s", 1405 hr->hrs_index(), 1406 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1407 } 1408 failures += 1; 1409 } 1410 1411 // Verify that the card bit maps for the cards spanned by the current 1412 // region match. We have an error if we have a set bit in the expected 1413 // bit map and the corresponding bit in the actual bitmap is not set. 1414 1415 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1416 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1417 1418 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1419 expected = _exp_card_bm->at(i); 1420 actual = _card_bm->at(i); 1421 1422 if (expected && !actual) { 1423 if (_verbose) { 1424 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1425 "expected: %s, actual: %s", 1426 hr->hrs_index(), i, 1427 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1428 } 1429 failures += 1; 1430 } 1431 } 1432 1433 if (failures > 0 && _verbose) { 1434 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1435 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1436 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(), 1437 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1438 } 1439 1440 _failures += failures; 1441 1442 // We could stop iteration over the heap when we 1443 // find the first violating region by returning true. 1444 return false; 1445 } 1446 }; 1447 1448 1449 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1450 protected: 1451 G1CollectedHeap* _g1h; 1452 ConcurrentMark* _cm; 1453 BitMap* _actual_region_bm; 1454 BitMap* _actual_card_bm; 1455 1456 uint _n_workers; 1457 1458 BitMap* _expected_region_bm; 1459 BitMap* _expected_card_bm; 1460 1461 int _failures; 1462 bool _verbose; 1463 1464 public: 1465 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1466 BitMap* region_bm, BitMap* card_bm, 1467 BitMap* expected_region_bm, BitMap* expected_card_bm) 1468 : AbstractGangTask("G1 verify final counting"), 1469 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1470 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1471 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1472 _failures(0), _verbose(false), 1473 _n_workers(0) { 1474 assert(VerifyDuringGC, "don't call this otherwise"); 1475 1476 // Use the value already set as the number of active threads 1477 // in the call to run_task(). 1478 if (G1CollectedHeap::use_parallel_gc_threads()) { 1479 assert( _g1h->workers()->active_workers() > 0, 1480 "Should have been previously set"); 1481 _n_workers = _g1h->workers()->active_workers(); 1482 } else { 1483 _n_workers = 1; 1484 } 1485 1486 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1487 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1488 1489 _verbose = _cm->verbose_medium(); 1490 } 1491 1492 void work(uint worker_id) { 1493 assert(worker_id < _n_workers, "invariant"); 1494 1495 VerifyLiveObjectDataHRClosure verify_cl(_cm, 1496 _actual_region_bm, _actual_card_bm, 1497 _expected_region_bm, 1498 _expected_card_bm, 1499 _verbose); 1500 1501 if (G1CollectedHeap::use_parallel_gc_threads()) { 1502 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1503 worker_id, 1504 _n_workers, 1505 HeapRegion::VerifyCountClaimValue); 1506 } else { 1507 _g1h->heap_region_iterate(&verify_cl); 1508 } 1509 1510 Atomic::add(verify_cl.failures(), &_failures); 1511 } 1512 1513 int failures() const { return _failures; } 1514 }; 1515 1516 // Closure that finalizes the liveness counting data. 1517 // Used during the cleanup pause. 1518 // Sets the bits corresponding to the interval [NTAMS, top] 1519 // (which contains the implicitly live objects) in the 1520 // card liveness bitmap. Also sets the bit for each region, 1521 // containing live data, in the region liveness bitmap. 1522 1523 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1524 public: 1525 FinalCountDataUpdateClosure(ConcurrentMark* cm, 1526 BitMap* region_bm, 1527 BitMap* card_bm) : 1528 CMCountDataClosureBase(cm, region_bm, card_bm) { } 1529 1530 bool doHeapRegion(HeapRegion* hr) { 1531 1532 if (hr->continuesHumongous()) { 1533 // We will ignore these here and process them when their 1534 // associated "starts humongous" region is processed (see 1535 // set_bit_for_heap_region()). Note that we cannot rely on their 1536 // associated "starts humongous" region to have their bit set to 1537 // 1 since, due to the region chunking in the parallel region 1538 // iteration, a "continues humongous" region might be visited 1539 // before its associated "starts humongous". 1540 return false; 1541 } 1542 1543 HeapWord* ntams = hr->next_top_at_mark_start(); 1544 HeapWord* top = hr->top(); 1545 1546 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1547 1548 // Mark the allocated-since-marking portion... 1549 if (ntams < top) { 1550 // This definitely means the region has live objects. 1551 set_bit_for_region(hr); 1552 } 1553 1554 // Now set the bits for [ntams, top] 1555 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1556 BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top); 1557 set_card_bitmap_range(start_idx, last_idx); 1558 1559 // Set the bit for the region if it contains live data 1560 if (hr->next_marked_bytes() > 0) { 1561 set_bit_for_region(hr); 1562 } 1563 1564 return false; 1565 } 1566 }; 1567 1568 class G1ParFinalCountTask: public AbstractGangTask { 1569 protected: 1570 G1CollectedHeap* _g1h; 1571 ConcurrentMark* _cm; 1572 BitMap* _actual_region_bm; 1573 BitMap* _actual_card_bm; 1574 1575 uint _n_workers; 1576 1577 public: 1578 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1579 : AbstractGangTask("G1 final counting"), 1580 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1581 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1582 _n_workers(0) { 1583 // Use the value already set as the number of active threads 1584 // in the call to run_task(). 1585 if (G1CollectedHeap::use_parallel_gc_threads()) { 1586 assert( _g1h->workers()->active_workers() > 0, 1587 "Should have been previously set"); 1588 _n_workers = _g1h->workers()->active_workers(); 1589 } else { 1590 _n_workers = 1; 1591 } 1592 } 1593 1594 void work(uint worker_id) { 1595 assert(worker_id < _n_workers, "invariant"); 1596 1597 FinalCountDataUpdateClosure final_update_cl(_cm, 1598 _actual_region_bm, 1599 _actual_card_bm); 1600 1601 if (G1CollectedHeap::use_parallel_gc_threads()) { 1602 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1603 worker_id, 1604 _n_workers, 1605 HeapRegion::FinalCountClaimValue); 1606 } else { 1607 _g1h->heap_region_iterate(&final_update_cl); 1608 } 1609 } 1610 }; 1611 1612 class G1ParNoteEndTask; 1613 1614 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1615 G1CollectedHeap* _g1; 1616 int _worker_num; 1617 size_t _max_live_bytes; 1618 uint _regions_claimed; 1619 size_t _freed_bytes; 1620 FreeRegionList* _local_cleanup_list; 1621 OldRegionSet* _old_proxy_set; 1622 HumongousRegionSet* _humongous_proxy_set; 1623 HRRSCleanupTask* _hrrs_cleanup_task; 1624 double _claimed_region_time; 1625 double _max_region_time; 1626 1627 public: 1628 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1629 int worker_num, 1630 FreeRegionList* local_cleanup_list, 1631 OldRegionSet* old_proxy_set, 1632 HumongousRegionSet* humongous_proxy_set, 1633 HRRSCleanupTask* hrrs_cleanup_task) : 1634 _g1(g1), _worker_num(worker_num), 1635 _max_live_bytes(0), _regions_claimed(0), 1636 _freed_bytes(0), 1637 _claimed_region_time(0.0), _max_region_time(0.0), 1638 _local_cleanup_list(local_cleanup_list), 1639 _old_proxy_set(old_proxy_set), 1640 _humongous_proxy_set(humongous_proxy_set), 1641 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1642 1643 size_t freed_bytes() { return _freed_bytes; } 1644 1645 bool doHeapRegion(HeapRegion *hr) { 1646 if (hr->continuesHumongous()) { 1647 return false; 1648 } 1649 // We use a claim value of zero here because all regions 1650 // were claimed with value 1 in the FinalCount task. 1651 _g1->reset_gc_time_stamps(hr); 1652 double start = os::elapsedTime(); 1653 _regions_claimed++; 1654 hr->note_end_of_marking(); 1655 _max_live_bytes += hr->max_live_bytes(); 1656 _g1->free_region_if_empty(hr, 1657 &_freed_bytes, 1658 _local_cleanup_list, 1659 _old_proxy_set, 1660 _humongous_proxy_set, 1661 _hrrs_cleanup_task, 1662 true /* par */); 1663 double region_time = (os::elapsedTime() - start); 1664 _claimed_region_time += region_time; 1665 if (region_time > _max_region_time) { 1666 _max_region_time = region_time; 1667 } 1668 return false; 1669 } 1670 1671 size_t max_live_bytes() { return _max_live_bytes; } 1672 uint regions_claimed() { return _regions_claimed; } 1673 double claimed_region_time_sec() { return _claimed_region_time; } 1674 double max_region_time_sec() { return _max_region_time; } 1675 }; 1676 1677 class G1ParNoteEndTask: public AbstractGangTask { 1678 friend class G1NoteEndOfConcMarkClosure; 1679 1680 protected: 1681 G1CollectedHeap* _g1h; 1682 size_t _max_live_bytes; 1683 size_t _freed_bytes; 1684 FreeRegionList* _cleanup_list; 1685 1686 public: 1687 G1ParNoteEndTask(G1CollectedHeap* g1h, 1688 FreeRegionList* cleanup_list) : 1689 AbstractGangTask("G1 note end"), _g1h(g1h), 1690 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1691 1692 void work(uint worker_id) { 1693 double start = os::elapsedTime(); 1694 FreeRegionList local_cleanup_list("Local Cleanup List"); 1695 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set"); 1696 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1697 HRRSCleanupTask hrrs_cleanup_task; 1698 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list, 1699 &old_proxy_set, 1700 &humongous_proxy_set, 1701 &hrrs_cleanup_task); 1702 if (G1CollectedHeap::use_parallel_gc_threads()) { 1703 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1704 _g1h->workers()->active_workers(), 1705 HeapRegion::NoteEndClaimValue); 1706 } else { 1707 _g1h->heap_region_iterate(&g1_note_end); 1708 } 1709 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1710 1711 // Now update the lists 1712 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1713 NULL /* free_list */, 1714 &old_proxy_set, 1715 &humongous_proxy_set, 1716 true /* par */); 1717 { 1718 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1719 _max_live_bytes += g1_note_end.max_live_bytes(); 1720 _freed_bytes += g1_note_end.freed_bytes(); 1721 1722 // If we iterate over the global cleanup list at the end of 1723 // cleanup to do this printing we will not guarantee to only 1724 // generate output for the newly-reclaimed regions (the list 1725 // might not be empty at the beginning of cleanup; we might 1726 // still be working on its previous contents). So we do the 1727 // printing here, before we append the new regions to the global 1728 // cleanup list. 1729 1730 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1731 if (hr_printer->is_active()) { 1732 HeapRegionLinkedListIterator iter(&local_cleanup_list); 1733 while (iter.more_available()) { 1734 HeapRegion* hr = iter.get_next(); 1735 hr_printer->cleanup(hr); 1736 } 1737 } 1738 1739 _cleanup_list->add_as_tail(&local_cleanup_list); 1740 assert(local_cleanup_list.is_empty(), "post-condition"); 1741 1742 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1743 } 1744 } 1745 size_t max_live_bytes() { return _max_live_bytes; } 1746 size_t freed_bytes() { return _freed_bytes; } 1747 }; 1748 1749 class G1ParScrubRemSetTask: public AbstractGangTask { 1750 protected: 1751 G1RemSet* _g1rs; 1752 BitMap* _region_bm; 1753 BitMap* _card_bm; 1754 public: 1755 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1756 BitMap* region_bm, BitMap* card_bm) : 1757 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1758 _region_bm(region_bm), _card_bm(card_bm) { } 1759 1760 void work(uint worker_id) { 1761 if (G1CollectedHeap::use_parallel_gc_threads()) { 1762 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1763 HeapRegion::ScrubRemSetClaimValue); 1764 } else { 1765 _g1rs->scrub(_region_bm, _card_bm); 1766 } 1767 } 1768 1769 }; 1770 1771 void ConcurrentMark::cleanup() { 1772 // world is stopped at this checkpoint 1773 assert(SafepointSynchronize::is_at_safepoint(), 1774 "world should be stopped"); 1775 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1776 1777 // If a full collection has happened, we shouldn't do this. 1778 if (has_aborted()) { 1779 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1780 return; 1781 } 1782 1783 HRSPhaseSetter x(HRSPhaseCleanup); 1784 g1h->verify_region_sets_optional(); 1785 1786 if (VerifyDuringGC) { 1787 HandleMark hm; // handle scope 1788 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1789 Universe::heap()->prepare_for_verify(); 1790 Universe::verify(/* silent */ false, 1791 /* option */ VerifyOption_G1UsePrevMarking); 1792 } 1793 1794 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1795 g1p->record_concurrent_mark_cleanup_start(); 1796 1797 double start = os::elapsedTime(); 1798 1799 HeapRegionRemSet::reset_for_cleanup_tasks(); 1800 1801 uint n_workers; 1802 1803 // Do counting once more with the world stopped for good measure. 1804 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1805 1806 if (G1CollectedHeap::use_parallel_gc_threads()) { 1807 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 1808 "sanity check"); 1809 1810 g1h->set_par_threads(); 1811 n_workers = g1h->n_par_threads(); 1812 assert(g1h->n_par_threads() == n_workers, 1813 "Should not have been reset"); 1814 g1h->workers()->run_task(&g1_par_count_task); 1815 // Done with the parallel phase so reset to 0. 1816 g1h->set_par_threads(0); 1817 1818 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 1819 "sanity check"); 1820 } else { 1821 n_workers = 1; 1822 g1_par_count_task.work(0); 1823 } 1824 1825 if (VerifyDuringGC) { 1826 // Verify that the counting data accumulated during marking matches 1827 // that calculated by walking the marking bitmap. 1828 1829 // Bitmaps to hold expected values 1830 BitMap expected_region_bm(_region_bm.size(), false); 1831 BitMap expected_card_bm(_card_bm.size(), false); 1832 1833 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1834 &_region_bm, 1835 &_card_bm, 1836 &expected_region_bm, 1837 &expected_card_bm); 1838 1839 if (G1CollectedHeap::use_parallel_gc_threads()) { 1840 g1h->set_par_threads((int)n_workers); 1841 g1h->workers()->run_task(&g1_par_verify_task); 1842 // Done with the parallel phase so reset to 0. 1843 g1h->set_par_threads(0); 1844 1845 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 1846 "sanity check"); 1847 } else { 1848 g1_par_verify_task.work(0); 1849 } 1850 1851 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1852 } 1853 1854 size_t start_used_bytes = g1h->used(); 1855 g1h->set_marking_complete(); 1856 1857 double count_end = os::elapsedTime(); 1858 double this_final_counting_time = (count_end - start); 1859 _total_counting_time += this_final_counting_time; 1860 1861 if (G1PrintRegionLivenessInfo) { 1862 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1863 _g1h->heap_region_iterate(&cl); 1864 } 1865 1866 // Install newly created mark bitMap as "prev". 1867 swapMarkBitMaps(); 1868 1869 g1h->reset_gc_time_stamp(); 1870 1871 // Note end of marking in all heap regions. 1872 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 1873 if (G1CollectedHeap::use_parallel_gc_threads()) { 1874 g1h->set_par_threads((int)n_workers); 1875 g1h->workers()->run_task(&g1_par_note_end_task); 1876 g1h->set_par_threads(0); 1877 1878 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 1879 "sanity check"); 1880 } else { 1881 g1_par_note_end_task.work(0); 1882 } 1883 g1h->check_gc_time_stamps(); 1884 1885 if (!cleanup_list_is_empty()) { 1886 // The cleanup list is not empty, so we'll have to process it 1887 // concurrently. Notify anyone else that might be wanting free 1888 // regions that there will be more free regions coming soon. 1889 g1h->set_free_regions_coming(); 1890 } 1891 1892 // call below, since it affects the metric by which we sort the heap 1893 // regions. 1894 if (G1ScrubRemSets) { 1895 double rs_scrub_start = os::elapsedTime(); 1896 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1897 if (G1CollectedHeap::use_parallel_gc_threads()) { 1898 g1h->set_par_threads((int)n_workers); 1899 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1900 g1h->set_par_threads(0); 1901 1902 assert(g1h->check_heap_region_claim_values( 1903 HeapRegion::ScrubRemSetClaimValue), 1904 "sanity check"); 1905 } else { 1906 g1_par_scrub_rs_task.work(0); 1907 } 1908 1909 double rs_scrub_end = os::elapsedTime(); 1910 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1911 _total_rs_scrub_time += this_rs_scrub_time; 1912 } 1913 1914 // this will also free any regions totally full of garbage objects, 1915 // and sort the regions. 1916 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 1917 1918 // Statistics. 1919 double end = os::elapsedTime(); 1920 _cleanup_times.add((end - start) * 1000.0); 1921 1922 if (G1Log::fine()) { 1923 g1h->print_size_transition(gclog_or_tty, 1924 start_used_bytes, 1925 g1h->used(), 1926 g1h->capacity()); 1927 } 1928 1929 // Clean up will have freed any regions completely full of garbage. 1930 // Update the soft reference policy with the new heap occupancy. 1931 Universe::update_heap_info_at_gc(); 1932 1933 // We need to make this be a "collection" so any collection pause that 1934 // races with it goes around and waits for completeCleanup to finish. 1935 g1h->increment_total_collections(); 1936 1937 // We reclaimed old regions so we should calculate the sizes to make 1938 // sure we update the old gen/space data. 1939 g1h->g1mm()->update_sizes(); 1940 1941 if (VerifyDuringGC) { 1942 HandleMark hm; // handle scope 1943 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1944 Universe::heap()->prepare_for_verify(); 1945 Universe::verify(/* silent */ false, 1946 /* option */ VerifyOption_G1UsePrevMarking); 1947 } 1948 1949 g1h->verify_region_sets_optional(); 1950 } 1951 1952 void ConcurrentMark::completeCleanup() { 1953 if (has_aborted()) return; 1954 1955 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1956 1957 _cleanup_list.verify_optional(); 1958 FreeRegionList tmp_free_list("Tmp Free List"); 1959 1960 if (G1ConcRegionFreeingVerbose) { 1961 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1962 "cleanup list has %u entries", 1963 _cleanup_list.length()); 1964 } 1965 1966 // Noone else should be accessing the _cleanup_list at this point, 1967 // so it's not necessary to take any locks 1968 while (!_cleanup_list.is_empty()) { 1969 HeapRegion* hr = _cleanup_list.remove_head(); 1970 assert(hr != NULL, "the list was not empty"); 1971 hr->par_clear(); 1972 tmp_free_list.add_as_tail(hr); 1973 1974 // Instead of adding one region at a time to the secondary_free_list, 1975 // we accumulate them in the local list and move them a few at a 1976 // time. This also cuts down on the number of notify_all() calls 1977 // we do during this process. We'll also append the local list when 1978 // _cleanup_list is empty (which means we just removed the last 1979 // region from the _cleanup_list). 1980 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1981 _cleanup_list.is_empty()) { 1982 if (G1ConcRegionFreeingVerbose) { 1983 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1984 "appending %u entries to the secondary_free_list, " 1985 "cleanup list still has %u entries", 1986 tmp_free_list.length(), 1987 _cleanup_list.length()); 1988 } 1989 1990 { 1991 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1992 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 1993 SecondaryFreeList_lock->notify_all(); 1994 } 1995 1996 if (G1StressConcRegionFreeing) { 1997 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1998 os::sleep(Thread::current(), (jlong) 1, false); 1999 } 2000 } 2001 } 2002 } 2003 assert(tmp_free_list.is_empty(), "post-condition"); 2004 } 2005 2006 // Support closures for reference procssing in G1 2007 2008 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2009 HeapWord* addr = (HeapWord*)obj; 2010 return addr != NULL && 2011 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2012 } 2013 2014 class G1CMKeepAliveClosure: public OopClosure { 2015 G1CollectedHeap* _g1; 2016 ConcurrentMark* _cm; 2017 public: 2018 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) : 2019 _g1(g1), _cm(cm) { 2020 assert(Thread::current()->is_VM_thread(), "otherwise fix worker id"); 2021 } 2022 2023 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2024 virtual void do_oop( oop* p) { do_oop_work(p); } 2025 2026 template <class T> void do_oop_work(T* p) { 2027 oop obj = oopDesc::load_decode_heap_oop(p); 2028 HeapWord* addr = (HeapWord*)obj; 2029 2030 if (_cm->verbose_high()) { 2031 gclog_or_tty->print_cr("\t[0] we're looking at location " 2032 "*"PTR_FORMAT" = "PTR_FORMAT, 2033 p, (void*) obj); 2034 } 2035 2036 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) { 2037 _cm->mark_and_count(obj); 2038 _cm->mark_stack_push(obj); 2039 } 2040 } 2041 }; 2042 2043 class G1CMDrainMarkingStackClosure: public VoidClosure { 2044 ConcurrentMark* _cm; 2045 CMMarkStack* _markStack; 2046 G1CMKeepAliveClosure* _oopClosure; 2047 public: 2048 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack, 2049 G1CMKeepAliveClosure* oopClosure) : 2050 _cm(cm), 2051 _markStack(markStack), 2052 _oopClosure(oopClosure) { } 2053 2054 void do_void() { 2055 _markStack->drain((OopClosure*)_oopClosure, _cm->nextMarkBitMap(), false); 2056 } 2057 }; 2058 2059 // 'Keep Alive' closure used by parallel reference processing. 2060 // An instance of this closure is used in the parallel reference processing 2061 // code rather than an instance of G1CMKeepAliveClosure. We could have used 2062 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are 2063 // placed on to discovered ref lists once so we can mark and push with no 2064 // need to check whether the object has already been marked. Using the 2065 // G1CMKeepAliveClosure would mean, however, having all the worker threads 2066 // operating on the global mark stack. This means that an individual 2067 // worker would be doing lock-free pushes while it processes its own 2068 // discovered ref list followed by drain call. If the discovered ref lists 2069 // are unbalanced then this could cause interference with the other 2070 // workers. Using a CMTask (and its embedded local data structures) 2071 // avoids that potential interference. 2072 class G1CMParKeepAliveAndDrainClosure: public OopClosure { 2073 ConcurrentMark* _cm; 2074 CMTask* _task; 2075 int _ref_counter_limit; 2076 int _ref_counter; 2077 public: 2078 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) : 2079 _cm(cm), _task(task), 2080 _ref_counter_limit(G1RefProcDrainInterval) { 2081 assert(_ref_counter_limit > 0, "sanity"); 2082 _ref_counter = _ref_counter_limit; 2083 } 2084 2085 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2086 virtual void do_oop( oop* p) { do_oop_work(p); } 2087 2088 template <class T> void do_oop_work(T* p) { 2089 if (!_cm->has_overflown()) { 2090 oop obj = oopDesc::load_decode_heap_oop(p); 2091 if (_cm->verbose_high()) { 2092 gclog_or_tty->print_cr("\t[%d] we're looking at location " 2093 "*"PTR_FORMAT" = "PTR_FORMAT, 2094 _task->task_id(), p, (void*) obj); 2095 } 2096 2097 _task->deal_with_reference(obj); 2098 _ref_counter--; 2099 2100 if (_ref_counter == 0) { 2101 // We have dealt with _ref_counter_limit references, pushing them and objects 2102 // reachable from them on to the local stack (and possibly the global stack). 2103 // Call do_marking_step() to process these entries. We call the routine in a 2104 // loop, which we'll exit if there's nothing more to do (i.e. we're done 2105 // with the entries that we've pushed as a result of the deal_with_reference 2106 // calls above) or we overflow. 2107 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2108 // while there may still be some work to do. (See the comment at the 2109 // beginning of CMTask::do_marking_step() for those conditions - one of which 2110 // is reaching the specified time target.) It is only when 2111 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2112 // that the marking has completed. 2113 do { 2114 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2115 _task->do_marking_step(mark_step_duration_ms, 2116 false /* do_stealing */, 2117 false /* do_termination */); 2118 } while (_task->has_aborted() && !_cm->has_overflown()); 2119 _ref_counter = _ref_counter_limit; 2120 } 2121 } else { 2122 if (_cm->verbose_high()) { 2123 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id()); 2124 } 2125 } 2126 } 2127 }; 2128 2129 class G1CMParDrainMarkingStackClosure: public VoidClosure { 2130 ConcurrentMark* _cm; 2131 CMTask* _task; 2132 public: 2133 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) : 2134 _cm(cm), _task(task) { } 2135 2136 void do_void() { 2137 do { 2138 if (_cm->verbose_high()) { 2139 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", 2140 _task->task_id()); 2141 } 2142 2143 // We call CMTask::do_marking_step() to completely drain the local and 2144 // global marking stacks. The routine is called in a loop, which we'll 2145 // exit if there's nothing more to do (i.e. we'completely drained the 2146 // entries that were pushed as a result of applying the 2147 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref 2148 // lists above) or we overflow the global marking stack. 2149 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2150 // while there may still be some work to do. (See the comment at the 2151 // beginning of CMTask::do_marking_step() for those conditions - one of which 2152 // is reaching the specified time target.) It is only when 2153 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2154 // that the marking has completed. 2155 2156 _task->do_marking_step(1000000000.0 /* something very large */, 2157 true /* do_stealing */, 2158 true /* do_termination */); 2159 } while (_task->has_aborted() && !_cm->has_overflown()); 2160 } 2161 }; 2162 2163 // Implementation of AbstractRefProcTaskExecutor for parallel 2164 // reference processing at the end of G1 concurrent marking 2165 2166 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2167 private: 2168 G1CollectedHeap* _g1h; 2169 ConcurrentMark* _cm; 2170 WorkGang* _workers; 2171 int _active_workers; 2172 2173 public: 2174 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2175 ConcurrentMark* cm, 2176 WorkGang* workers, 2177 int n_workers) : 2178 _g1h(g1h), _cm(cm), 2179 _workers(workers), _active_workers(n_workers) { } 2180 2181 // Executes the given task using concurrent marking worker threads. 2182 virtual void execute(ProcessTask& task); 2183 virtual void execute(EnqueueTask& task); 2184 }; 2185 2186 class G1CMRefProcTaskProxy: public AbstractGangTask { 2187 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2188 ProcessTask& _proc_task; 2189 G1CollectedHeap* _g1h; 2190 ConcurrentMark* _cm; 2191 2192 public: 2193 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2194 G1CollectedHeap* g1h, 2195 ConcurrentMark* cm) : 2196 AbstractGangTask("Process reference objects in parallel"), 2197 _proc_task(proc_task), _g1h(g1h), _cm(cm) { } 2198 2199 virtual void work(uint worker_id) { 2200 CMTask* marking_task = _cm->task(worker_id); 2201 G1CMIsAliveClosure g1_is_alive(_g1h); 2202 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task); 2203 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task); 2204 2205 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2206 } 2207 }; 2208 2209 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2210 assert(_workers != NULL, "Need parallel worker threads."); 2211 2212 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2213 2214 // We need to reset the phase for each task execution so that 2215 // the termination protocol of CMTask::do_marking_step works. 2216 _cm->set_phase(_active_workers, false /* concurrent */); 2217 _g1h->set_par_threads(_active_workers); 2218 _workers->run_task(&proc_task_proxy); 2219 _g1h->set_par_threads(0); 2220 } 2221 2222 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2223 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2224 EnqueueTask& _enq_task; 2225 2226 public: 2227 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2228 AbstractGangTask("Enqueue reference objects in parallel"), 2229 _enq_task(enq_task) { } 2230 2231 virtual void work(uint worker_id) { 2232 _enq_task.work(worker_id); 2233 } 2234 }; 2235 2236 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2237 assert(_workers != NULL, "Need parallel worker threads."); 2238 2239 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2240 2241 _g1h->set_par_threads(_active_workers); 2242 _workers->run_task(&enq_task_proxy); 2243 _g1h->set_par_threads(0); 2244 } 2245 2246 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2247 ResourceMark rm; 2248 HandleMark hm; 2249 2250 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2251 2252 // Is alive closure. 2253 G1CMIsAliveClosure g1_is_alive(g1h); 2254 2255 // Inner scope to exclude the cleaning of the string and symbol 2256 // tables from the displayed time. 2257 { 2258 if (G1Log::finer()) { 2259 gclog_or_tty->put(' '); 2260 } 2261 TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty); 2262 2263 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2264 2265 // See the comment in G1CollectedHeap::ref_processing_init() 2266 // about how reference processing currently works in G1. 2267 2268 // Process weak references. 2269 rp->setup_policy(clear_all_soft_refs); 2270 assert(_markStack.isEmpty(), "mark stack should be empty"); 2271 2272 G1CMKeepAliveClosure g1_keep_alive(g1h, this); 2273 G1CMDrainMarkingStackClosure 2274 g1_drain_mark_stack(this, &_markStack, &g1_keep_alive); 2275 2276 // We use the work gang from the G1CollectedHeap and we utilize all 2277 // the worker threads. 2278 uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U; 2279 active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U); 2280 2281 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2282 g1h->workers(), active_workers); 2283 2284 if (rp->processing_is_mt()) { 2285 // Set the degree of MT here. If the discovery is done MT, there 2286 // may have been a different number of threads doing the discovery 2287 // and a different number of discovered lists may have Ref objects. 2288 // That is OK as long as the Reference lists are balanced (see 2289 // balance_all_queues() and balance_queues()). 2290 rp->set_active_mt_degree(active_workers); 2291 2292 rp->process_discovered_references(&g1_is_alive, 2293 &g1_keep_alive, 2294 &g1_drain_mark_stack, 2295 &par_task_executor); 2296 2297 // The work routines of the parallel keep_alive and drain_marking_stack 2298 // will set the has_overflown flag if we overflow the global marking 2299 // stack. 2300 } else { 2301 rp->process_discovered_references(&g1_is_alive, 2302 &g1_keep_alive, 2303 &g1_drain_mark_stack, 2304 NULL); 2305 } 2306 2307 assert(_markStack.overflow() || _markStack.isEmpty(), 2308 "mark stack should be empty (unless it overflowed)"); 2309 if (_markStack.overflow()) { 2310 // Should have been done already when we tried to push an 2311 // entry on to the global mark stack. But let's do it again. 2312 set_has_overflown(); 2313 } 2314 2315 if (rp->processing_is_mt()) { 2316 assert(rp->num_q() == active_workers, "why not"); 2317 rp->enqueue_discovered_references(&par_task_executor); 2318 } else { 2319 rp->enqueue_discovered_references(); 2320 } 2321 2322 rp->verify_no_references_recorded(); 2323 assert(!rp->discovery_enabled(), "Post condition"); 2324 } 2325 2326 // Now clean up stale oops in StringTable 2327 StringTable::unlink(&g1_is_alive); 2328 // Clean up unreferenced symbols in symbol table. 2329 SymbolTable::unlink(); 2330 } 2331 2332 void ConcurrentMark::swapMarkBitMaps() { 2333 CMBitMapRO* temp = _prevMarkBitMap; 2334 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2335 _nextMarkBitMap = (CMBitMap*) temp; 2336 } 2337 2338 class CMRemarkTask: public AbstractGangTask { 2339 private: 2340 ConcurrentMark *_cm; 2341 2342 public: 2343 void work(uint worker_id) { 2344 // Since all available tasks are actually started, we should 2345 // only proceed if we're supposed to be actived. 2346 if (worker_id < _cm->active_tasks()) { 2347 CMTask* task = _cm->task(worker_id); 2348 task->record_start_time(); 2349 do { 2350 task->do_marking_step(1000000000.0 /* something very large */, 2351 true /* do_stealing */, 2352 true /* do_termination */); 2353 } while (task->has_aborted() && !_cm->has_overflown()); 2354 // If we overflow, then we do not want to restart. We instead 2355 // want to abort remark and do concurrent marking again. 2356 task->record_end_time(); 2357 } 2358 } 2359 2360 CMRemarkTask(ConcurrentMark* cm, int active_workers) : 2361 AbstractGangTask("Par Remark"), _cm(cm) { 2362 _cm->terminator()->reset_for_reuse(active_workers); 2363 } 2364 }; 2365 2366 void ConcurrentMark::checkpointRootsFinalWork() { 2367 ResourceMark rm; 2368 HandleMark hm; 2369 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2370 2371 g1h->ensure_parsability(false); 2372 2373 if (G1CollectedHeap::use_parallel_gc_threads()) { 2374 G1CollectedHeap::StrongRootsScope srs(g1h); 2375 // this is remark, so we'll use up all active threads 2376 uint active_workers = g1h->workers()->active_workers(); 2377 if (active_workers == 0) { 2378 assert(active_workers > 0, "Should have been set earlier"); 2379 active_workers = (uint) ParallelGCThreads; 2380 g1h->workers()->set_active_workers(active_workers); 2381 } 2382 set_phase(active_workers, false /* concurrent */); 2383 // Leave _parallel_marking_threads at it's 2384 // value originally calculated in the ConcurrentMark 2385 // constructor and pass values of the active workers 2386 // through the gang in the task. 2387 2388 CMRemarkTask remarkTask(this, active_workers); 2389 g1h->set_par_threads(active_workers); 2390 g1h->workers()->run_task(&remarkTask); 2391 g1h->set_par_threads(0); 2392 } else { 2393 G1CollectedHeap::StrongRootsScope srs(g1h); 2394 // this is remark, so we'll use up all available threads 2395 uint active_workers = 1; 2396 set_phase(active_workers, false /* concurrent */); 2397 2398 CMRemarkTask remarkTask(this, active_workers); 2399 // We will start all available threads, even if we decide that the 2400 // active_workers will be fewer. The extra ones will just bail out 2401 // immediately. 2402 remarkTask.work(0); 2403 } 2404 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2405 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2406 2407 print_stats(); 2408 2409 #if VERIFY_OBJS_PROCESSED 2410 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2411 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2412 _scan_obj_cl.objs_processed, 2413 ThreadLocalObjQueue::objs_enqueued); 2414 guarantee(_scan_obj_cl.objs_processed == 2415 ThreadLocalObjQueue::objs_enqueued, 2416 "Different number of objs processed and enqueued."); 2417 } 2418 #endif 2419 } 2420 2421 #ifndef PRODUCT 2422 2423 class PrintReachableOopClosure: public OopClosure { 2424 private: 2425 G1CollectedHeap* _g1h; 2426 outputStream* _out; 2427 VerifyOption _vo; 2428 bool _all; 2429 2430 public: 2431 PrintReachableOopClosure(outputStream* out, 2432 VerifyOption vo, 2433 bool all) : 2434 _g1h(G1CollectedHeap::heap()), 2435 _out(out), _vo(vo), _all(all) { } 2436 2437 void do_oop(narrowOop* p) { do_oop_work(p); } 2438 void do_oop( oop* p) { do_oop_work(p); } 2439 2440 template <class T> void do_oop_work(T* p) { 2441 oop obj = oopDesc::load_decode_heap_oop(p); 2442 const char* str = NULL; 2443 const char* str2 = ""; 2444 2445 if (obj == NULL) { 2446 str = ""; 2447 } else if (!_g1h->is_in_g1_reserved(obj)) { 2448 str = " O"; 2449 } else { 2450 HeapRegion* hr = _g1h->heap_region_containing(obj); 2451 guarantee(hr != NULL, "invariant"); 2452 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2453 bool marked = _g1h->is_marked(obj, _vo); 2454 2455 if (over_tams) { 2456 str = " >"; 2457 if (marked) { 2458 str2 = " AND MARKED"; 2459 } 2460 } else if (marked) { 2461 str = " M"; 2462 } else { 2463 str = " NOT"; 2464 } 2465 } 2466 2467 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2468 p, (void*) obj, str, str2); 2469 } 2470 }; 2471 2472 class PrintReachableObjectClosure : public ObjectClosure { 2473 private: 2474 G1CollectedHeap* _g1h; 2475 outputStream* _out; 2476 VerifyOption _vo; 2477 bool _all; 2478 HeapRegion* _hr; 2479 2480 public: 2481 PrintReachableObjectClosure(outputStream* out, 2482 VerifyOption vo, 2483 bool all, 2484 HeapRegion* hr) : 2485 _g1h(G1CollectedHeap::heap()), 2486 _out(out), _vo(vo), _all(all), _hr(hr) { } 2487 2488 void do_object(oop o) { 2489 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2490 bool marked = _g1h->is_marked(o, _vo); 2491 bool print_it = _all || over_tams || marked; 2492 2493 if (print_it) { 2494 _out->print_cr(" "PTR_FORMAT"%s", 2495 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2496 PrintReachableOopClosure oopCl(_out, _vo, _all); 2497 o->oop_iterate(&oopCl); 2498 } 2499 } 2500 }; 2501 2502 class PrintReachableRegionClosure : public HeapRegionClosure { 2503 private: 2504 G1CollectedHeap* _g1h; 2505 outputStream* _out; 2506 VerifyOption _vo; 2507 bool _all; 2508 2509 public: 2510 bool doHeapRegion(HeapRegion* hr) { 2511 HeapWord* b = hr->bottom(); 2512 HeapWord* e = hr->end(); 2513 HeapWord* t = hr->top(); 2514 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2515 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2516 "TAMS: "PTR_FORMAT, b, e, t, p); 2517 _out->cr(); 2518 2519 HeapWord* from = b; 2520 HeapWord* to = t; 2521 2522 if (to > from) { 2523 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2524 _out->cr(); 2525 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2526 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2527 _out->cr(); 2528 } 2529 2530 return false; 2531 } 2532 2533 PrintReachableRegionClosure(outputStream* out, 2534 VerifyOption vo, 2535 bool all) : 2536 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2537 }; 2538 2539 void ConcurrentMark::print_reachable(const char* str, 2540 VerifyOption vo, 2541 bool all) { 2542 gclog_or_tty->cr(); 2543 gclog_or_tty->print_cr("== Doing heap dump... "); 2544 2545 if (G1PrintReachableBaseFile == NULL) { 2546 gclog_or_tty->print_cr(" #### error: no base file defined"); 2547 return; 2548 } 2549 2550 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2551 (JVM_MAXPATHLEN - 1)) { 2552 gclog_or_tty->print_cr(" #### error: file name too long"); 2553 return; 2554 } 2555 2556 char file_name[JVM_MAXPATHLEN]; 2557 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2558 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2559 2560 fileStream fout(file_name); 2561 if (!fout.is_open()) { 2562 gclog_or_tty->print_cr(" #### error: could not open file"); 2563 return; 2564 } 2565 2566 outputStream* out = &fout; 2567 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2568 out->cr(); 2569 2570 out->print_cr("--- ITERATING OVER REGIONS"); 2571 out->cr(); 2572 PrintReachableRegionClosure rcl(out, vo, all); 2573 _g1h->heap_region_iterate(&rcl); 2574 out->cr(); 2575 2576 gclog_or_tty->print_cr(" done"); 2577 gclog_or_tty->flush(); 2578 } 2579 2580 #endif // PRODUCT 2581 2582 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2583 // Note we are overriding the read-only view of the prev map here, via 2584 // the cast. 2585 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2586 } 2587 2588 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2589 _nextMarkBitMap->clearRange(mr); 2590 } 2591 2592 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2593 clearRangePrevBitmap(mr); 2594 clearRangeNextBitmap(mr); 2595 } 2596 2597 HeapRegion* 2598 ConcurrentMark::claim_region(int task_num) { 2599 // "checkpoint" the finger 2600 HeapWord* finger = _finger; 2601 2602 // _heap_end will not change underneath our feet; it only changes at 2603 // yield points. 2604 while (finger < _heap_end) { 2605 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2606 2607 // Note on how this code handles humongous regions. In the 2608 // normal case the finger will reach the start of a "starts 2609 // humongous" (SH) region. Its end will either be the end of the 2610 // last "continues humongous" (CH) region in the sequence, or the 2611 // standard end of the SH region (if the SH is the only region in 2612 // the sequence). That way claim_region() will skip over the CH 2613 // regions. However, there is a subtle race between a CM thread 2614 // executing this method and a mutator thread doing a humongous 2615 // object allocation. The two are not mutually exclusive as the CM 2616 // thread does not need to hold the Heap_lock when it gets 2617 // here. So there is a chance that claim_region() will come across 2618 // a free region that's in the progress of becoming a SH or a CH 2619 // region. In the former case, it will either 2620 // a) Miss the update to the region's end, in which case it will 2621 // visit every subsequent CH region, will find their bitmaps 2622 // empty, and do nothing, or 2623 // b) Will observe the update of the region's end (in which case 2624 // it will skip the subsequent CH regions). 2625 // If it comes across a region that suddenly becomes CH, the 2626 // scenario will be similar to b). So, the race between 2627 // claim_region() and a humongous object allocation might force us 2628 // to do a bit of unnecessary work (due to some unnecessary bitmap 2629 // iterations) but it should not introduce and correctness issues. 2630 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2631 HeapWord* bottom = curr_region->bottom(); 2632 HeapWord* end = curr_region->end(); 2633 HeapWord* limit = curr_region->next_top_at_mark_start(); 2634 2635 if (verbose_low()) { 2636 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 2637 "["PTR_FORMAT", "PTR_FORMAT"), " 2638 "limit = "PTR_FORMAT, 2639 task_num, curr_region, bottom, end, limit); 2640 } 2641 2642 // Is the gap between reading the finger and doing the CAS too long? 2643 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2644 if (res == finger) { 2645 // we succeeded 2646 2647 // notice that _finger == end cannot be guaranteed here since, 2648 // someone else might have moved the finger even further 2649 assert(_finger >= end, "the finger should have moved forward"); 2650 2651 if (verbose_low()) { 2652 gclog_or_tty->print_cr("[%d] we were successful with region = " 2653 PTR_FORMAT, task_num, curr_region); 2654 } 2655 2656 if (limit > bottom) { 2657 if (verbose_low()) { 2658 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 2659 "returning it ", task_num, curr_region); 2660 } 2661 return curr_region; 2662 } else { 2663 assert(limit == bottom, 2664 "the region limit should be at bottom"); 2665 if (verbose_low()) { 2666 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 2667 "returning NULL", task_num, curr_region); 2668 } 2669 // we return NULL and the caller should try calling 2670 // claim_region() again. 2671 return NULL; 2672 } 2673 } else { 2674 assert(_finger > finger, "the finger should have moved forward"); 2675 if (verbose_low()) { 2676 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 2677 "global finger = "PTR_FORMAT", " 2678 "our finger = "PTR_FORMAT, 2679 task_num, _finger, finger); 2680 } 2681 2682 // read it again 2683 finger = _finger; 2684 } 2685 } 2686 2687 return NULL; 2688 } 2689 2690 #ifndef PRODUCT 2691 enum VerifyNoCSetOopsPhase { 2692 VerifyNoCSetOopsStack, 2693 VerifyNoCSetOopsQueues, 2694 VerifyNoCSetOopsSATBCompleted, 2695 VerifyNoCSetOopsSATBThread 2696 }; 2697 2698 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 2699 private: 2700 G1CollectedHeap* _g1h; 2701 VerifyNoCSetOopsPhase _phase; 2702 int _info; 2703 2704 const char* phase_str() { 2705 switch (_phase) { 2706 case VerifyNoCSetOopsStack: return "Stack"; 2707 case VerifyNoCSetOopsQueues: return "Queue"; 2708 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 2709 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 2710 default: ShouldNotReachHere(); 2711 } 2712 return NULL; 2713 } 2714 2715 void do_object_work(oop obj) { 2716 guarantee(!_g1h->obj_in_cs(obj), 2717 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 2718 (void*) obj, phase_str(), _info)); 2719 } 2720 2721 public: 2722 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 2723 2724 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 2725 _phase = phase; 2726 _info = info; 2727 } 2728 2729 virtual void do_oop(oop* p) { 2730 oop obj = oopDesc::load_decode_heap_oop(p); 2731 do_object_work(obj); 2732 } 2733 2734 virtual void do_oop(narrowOop* p) { 2735 // We should not come across narrow oops while scanning marking 2736 // stacks and SATB buffers. 2737 ShouldNotReachHere(); 2738 } 2739 2740 virtual void do_object(oop obj) { 2741 do_object_work(obj); 2742 } 2743 }; 2744 2745 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 2746 bool verify_enqueued_buffers, 2747 bool verify_thread_buffers, 2748 bool verify_fingers) { 2749 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2750 if (!G1CollectedHeap::heap()->mark_in_progress()) { 2751 return; 2752 } 2753 2754 VerifyNoCSetOopsClosure cl; 2755 2756 if (verify_stacks) { 2757 // Verify entries on the global mark stack 2758 cl.set_phase(VerifyNoCSetOopsStack); 2759 _markStack.oops_do(&cl); 2760 2761 // Verify entries on the task queues 2762 for (int i = 0; i < (int) _max_task_num; i += 1) { 2763 cl.set_phase(VerifyNoCSetOopsQueues, i); 2764 OopTaskQueue* queue = _task_queues->queue(i); 2765 queue->oops_do(&cl); 2766 } 2767 } 2768 2769 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 2770 2771 // Verify entries on the enqueued SATB buffers 2772 if (verify_enqueued_buffers) { 2773 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 2774 satb_qs.iterate_completed_buffers_read_only(&cl); 2775 } 2776 2777 // Verify entries on the per-thread SATB buffers 2778 if (verify_thread_buffers) { 2779 cl.set_phase(VerifyNoCSetOopsSATBThread); 2780 satb_qs.iterate_thread_buffers_read_only(&cl); 2781 } 2782 2783 if (verify_fingers) { 2784 // Verify the global finger 2785 HeapWord* global_finger = finger(); 2786 if (global_finger != NULL && global_finger < _heap_end) { 2787 // The global finger always points to a heap region boundary. We 2788 // use heap_region_containing_raw() to get the containing region 2789 // given that the global finger could be pointing to a free region 2790 // which subsequently becomes continues humongous. If that 2791 // happens, heap_region_containing() will return the bottom of the 2792 // corresponding starts humongous region and the check below will 2793 // not hold any more. 2794 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 2795 guarantee(global_finger == global_hr->bottom(), 2796 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 2797 global_finger, HR_FORMAT_PARAMS(global_hr))); 2798 } 2799 2800 // Verify the task fingers 2801 assert(parallel_marking_threads() <= _max_task_num, "sanity"); 2802 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 2803 CMTask* task = _tasks[i]; 2804 HeapWord* task_finger = task->finger(); 2805 if (task_finger != NULL && task_finger < _heap_end) { 2806 // See above note on the global finger verification. 2807 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 2808 guarantee(task_finger == task_hr->bottom() || 2809 !task_hr->in_collection_set(), 2810 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 2811 task_finger, HR_FORMAT_PARAMS(task_hr))); 2812 } 2813 } 2814 } 2815 } 2816 #endif // PRODUCT 2817 2818 void ConcurrentMark::clear_marking_state(bool clear_overflow) { 2819 _markStack.setEmpty(); 2820 _markStack.clear_overflow(); 2821 if (clear_overflow) { 2822 clear_has_overflown(); 2823 } else { 2824 assert(has_overflown(), "pre-condition"); 2825 } 2826 _finger = _heap_start; 2827 2828 for (int i = 0; i < (int)_max_task_num; ++i) { 2829 OopTaskQueue* queue = _task_queues->queue(i); 2830 queue->set_empty(); 2831 } 2832 } 2833 2834 // Aggregate the counting data that was constructed concurrently 2835 // with marking. 2836 class AggregateCountDataHRClosure: public HeapRegionClosure { 2837 ConcurrentMark* _cm; 2838 BitMap* _cm_card_bm; 2839 size_t _max_task_num; 2840 2841 public: 2842 AggregateCountDataHRClosure(ConcurrentMark *cm, 2843 BitMap* cm_card_bm, 2844 size_t max_task_num) : 2845 _cm(cm), _cm_card_bm(cm_card_bm), 2846 _max_task_num(max_task_num) { } 2847 2848 bool is_card_aligned(HeapWord* p) { 2849 return ((uintptr_t(p) & (CardTableModRefBS::card_size - 1)) == 0); 2850 } 2851 2852 bool doHeapRegion(HeapRegion* hr) { 2853 if (hr->continuesHumongous()) { 2854 // We will ignore these here and process them when their 2855 // associated "starts humongous" region is processed. 2856 // Note that we cannot rely on their associated 2857 // "starts humongous" region to have their bit set to 1 2858 // since, due to the region chunking in the parallel region 2859 // iteration, a "continues humongous" region might be visited 2860 // before its associated "starts humongous". 2861 return false; 2862 } 2863 2864 HeapWord* start = hr->bottom(); 2865 HeapWord* limit = hr->next_top_at_mark_start(); 2866 HeapWord* end = hr->end(); 2867 2868 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2869 err_msg("Preconditions not met - " 2870 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 2871 "top: "PTR_FORMAT", end: "PTR_FORMAT, 2872 start, limit, hr->top(), hr->end())); 2873 2874 assert(hr->next_marked_bytes() == 0, "Precondition"); 2875 2876 if (start == limit) { 2877 // NTAMS of this region has not been set so nothing to do. 2878 return false; 2879 } 2880 2881 assert(is_card_aligned(start), "sanity"); 2882 assert(is_card_aligned(end), "sanity"); 2883 2884 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2885 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2886 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2887 2888 // If ntams is not card aligned then we bump the index for 2889 // limit so that we get the card spanning ntams. 2890 if (!is_card_aligned(limit)) { 2891 limit_idx += 1; 2892 } 2893 2894 assert(limit_idx <= end_idx, "or else use atomics"); 2895 2896 // Aggregate the "stripe" in the count data associated with hr. 2897 uint hrs_index = hr->hrs_index(); 2898 size_t marked_bytes = 0; 2899 2900 for (int i = 0; (size_t)i < _max_task_num; i += 1) { 2901 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2902 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2903 2904 // Fetch the marked_bytes in this region for task i and 2905 // add it to the running total for this region. 2906 marked_bytes += marked_bytes_array[hrs_index]; 2907 2908 // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx) 2909 // into the global card bitmap. 2910 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2911 2912 while (scan_idx < limit_idx) { 2913 assert(task_card_bm->at(scan_idx) == true, "should be"); 2914 _cm_card_bm->set_bit(scan_idx); 2915 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2916 2917 // BitMap::get_next_one_offset() can handle the case when 2918 // its left_offset parameter is greater than its right_offset 2919 // parameter. If does, however, have an early exit if 2920 // left_offset == right_offset. So let's limit the value 2921 // passed in for left offset here. 2922 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2923 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2924 } 2925 } 2926 2927 // Update the marked bytes for this region. 2928 hr->add_to_marked_bytes(marked_bytes); 2929 2930 // Next heap region 2931 return false; 2932 } 2933 }; 2934 2935 class G1AggregateCountDataTask: public AbstractGangTask { 2936 protected: 2937 G1CollectedHeap* _g1h; 2938 ConcurrentMark* _cm; 2939 BitMap* _cm_card_bm; 2940 size_t _max_task_num; 2941 int _active_workers; 2942 2943 public: 2944 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2945 ConcurrentMark* cm, 2946 BitMap* cm_card_bm, 2947 size_t max_task_num, 2948 int n_workers) : 2949 AbstractGangTask("Count Aggregation"), 2950 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2951 _max_task_num(max_task_num), 2952 _active_workers(n_workers) { } 2953 2954 void work(uint worker_id) { 2955 AggregateCountDataHRClosure cl(_cm, _cm_card_bm, _max_task_num); 2956 2957 if (G1CollectedHeap::use_parallel_gc_threads()) { 2958 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 2959 _active_workers, 2960 HeapRegion::AggregateCountClaimValue); 2961 } else { 2962 _g1h->heap_region_iterate(&cl); 2963 } 2964 } 2965 }; 2966 2967 2968 void ConcurrentMark::aggregate_count_data() { 2969 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 2970 _g1h->workers()->active_workers() : 2971 1); 2972 2973 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 2974 _max_task_num, n_workers); 2975 2976 if (G1CollectedHeap::use_parallel_gc_threads()) { 2977 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 2978 "sanity check"); 2979 _g1h->set_par_threads(n_workers); 2980 _g1h->workers()->run_task(&g1_par_agg_task); 2981 _g1h->set_par_threads(0); 2982 2983 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 2984 "sanity check"); 2985 _g1h->reset_heap_region_claim_values(); 2986 } else { 2987 g1_par_agg_task.work(0); 2988 } 2989 } 2990 2991 // Clear the per-worker arrays used to store the per-region counting data 2992 void ConcurrentMark::clear_all_count_data() { 2993 // Clear the global card bitmap - it will be filled during 2994 // liveness count aggregation (during remark) and the 2995 // final counting task. 2996 _card_bm.clear(); 2997 2998 // Clear the global region bitmap - it will be filled as part 2999 // of the final counting task. 3000 _region_bm.clear(); 3001 3002 uint max_regions = _g1h->max_regions(); 3003 assert(_max_task_num != 0, "unitialized"); 3004 3005 for (int i = 0; (size_t) i < _max_task_num; i += 1) { 3006 BitMap* task_card_bm = count_card_bitmap_for(i); 3007 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3008 3009 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3010 assert(marked_bytes_array != NULL, "uninitialized"); 3011 3012 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3013 task_card_bm->clear(); 3014 } 3015 } 3016 3017 void ConcurrentMark::print_stats() { 3018 if (verbose_stats()) { 3019 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3020 for (size_t i = 0; i < _active_tasks; ++i) { 3021 _tasks[i]->print_stats(); 3022 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3023 } 3024 } 3025 } 3026 3027 // abandon current marking iteration due to a Full GC 3028 void ConcurrentMark::abort() { 3029 // Clear all marks to force marking thread to do nothing 3030 _nextMarkBitMap->clearAll(); 3031 // Clear the liveness counting data 3032 clear_all_count_data(); 3033 // Empty mark stack 3034 clear_marking_state(); 3035 for (int i = 0; i < (int)_max_task_num; ++i) { 3036 _tasks[i]->clear_region_fields(); 3037 } 3038 _has_aborted = true; 3039 3040 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3041 satb_mq_set.abandon_partial_marking(); 3042 // This can be called either during or outside marking, we'll read 3043 // the expected_active value from the SATB queue set. 3044 satb_mq_set.set_active_all_threads( 3045 false, /* new active value */ 3046 satb_mq_set.is_active() /* expected_active */); 3047 } 3048 3049 static void print_ms_time_info(const char* prefix, const char* name, 3050 NumberSeq& ns) { 3051 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3052 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3053 if (ns.num() > 0) { 3054 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3055 prefix, ns.sd(), ns.maximum()); 3056 } 3057 } 3058 3059 void ConcurrentMark::print_summary_info() { 3060 gclog_or_tty->print_cr(" Concurrent marking:"); 3061 print_ms_time_info(" ", "init marks", _init_times); 3062 print_ms_time_info(" ", "remarks", _remark_times); 3063 { 3064 print_ms_time_info(" ", "final marks", _remark_mark_times); 3065 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3066 3067 } 3068 print_ms_time_info(" ", "cleanups", _cleanup_times); 3069 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3070 _total_counting_time, 3071 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3072 (double)_cleanup_times.num() 3073 : 0.0)); 3074 if (G1ScrubRemSets) { 3075 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3076 _total_rs_scrub_time, 3077 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3078 (double)_cleanup_times.num() 3079 : 0.0)); 3080 } 3081 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3082 (_init_times.sum() + _remark_times.sum() + 3083 _cleanup_times.sum())/1000.0); 3084 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3085 "(%8.2f s marking).", 3086 cmThread()->vtime_accum(), 3087 cmThread()->vtime_mark_accum()); 3088 } 3089 3090 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3091 _parallel_workers->print_worker_threads_on(st); 3092 } 3093 3094 // We take a break if someone is trying to stop the world. 3095 bool ConcurrentMark::do_yield_check(uint worker_id) { 3096 if (should_yield()) { 3097 if (worker_id == 0) { 3098 _g1h->g1_policy()->record_concurrent_pause(); 3099 } 3100 cmThread()->yield(); 3101 return true; 3102 } else { 3103 return false; 3104 } 3105 } 3106 3107 bool ConcurrentMark::should_yield() { 3108 return cmThread()->should_yield(); 3109 } 3110 3111 bool ConcurrentMark::containing_card_is_marked(void* p) { 3112 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3113 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3114 } 3115 3116 bool ConcurrentMark::containing_cards_are_marked(void* start, 3117 void* last) { 3118 return containing_card_is_marked(start) && 3119 containing_card_is_marked(last); 3120 } 3121 3122 #ifndef PRODUCT 3123 // for debugging purposes 3124 void ConcurrentMark::print_finger() { 3125 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3126 _heap_start, _heap_end, _finger); 3127 for (int i = 0; i < (int) _max_task_num; ++i) { 3128 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 3129 } 3130 gclog_or_tty->print_cr(""); 3131 } 3132 #endif 3133 3134 void CMTask::scan_object(oop obj) { 3135 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3136 3137 if (_cm->verbose_high()) { 3138 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT, 3139 _task_id, (void*) obj); 3140 } 3141 3142 size_t obj_size = obj->size(); 3143 _words_scanned += obj_size; 3144 3145 obj->oop_iterate(_cm_oop_closure); 3146 statsOnly( ++_objs_scanned ); 3147 check_limits(); 3148 } 3149 3150 // Closure for iteration over bitmaps 3151 class CMBitMapClosure : public BitMapClosure { 3152 private: 3153 // the bitmap that is being iterated over 3154 CMBitMap* _nextMarkBitMap; 3155 ConcurrentMark* _cm; 3156 CMTask* _task; 3157 3158 public: 3159 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3160 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3161 3162 bool do_bit(size_t offset) { 3163 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3164 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3165 assert( addr < _cm->finger(), "invariant"); 3166 3167 statsOnly( _task->increase_objs_found_on_bitmap() ); 3168 assert(addr >= _task->finger(), "invariant"); 3169 3170 // We move that task's local finger along. 3171 _task->move_finger_to(addr); 3172 3173 _task->scan_object(oop(addr)); 3174 // we only partially drain the local queue and global stack 3175 _task->drain_local_queue(true); 3176 _task->drain_global_stack(true); 3177 3178 // if the has_aborted flag has been raised, we need to bail out of 3179 // the iteration 3180 return !_task->has_aborted(); 3181 } 3182 }; 3183 3184 // Closure for iterating over objects, currently only used for 3185 // processing SATB buffers. 3186 class CMObjectClosure : public ObjectClosure { 3187 private: 3188 CMTask* _task; 3189 3190 public: 3191 void do_object(oop obj) { 3192 _task->deal_with_reference(obj); 3193 } 3194 3195 CMObjectClosure(CMTask* task) : _task(task) { } 3196 }; 3197 3198 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3199 ConcurrentMark* cm, 3200 CMTask* task) 3201 : _g1h(g1h), _cm(cm), _task(task) { 3202 assert(_ref_processor == NULL, "should be initialized to NULL"); 3203 3204 if (G1UseConcMarkReferenceProcessing) { 3205 _ref_processor = g1h->ref_processor_cm(); 3206 assert(_ref_processor != NULL, "should not be NULL"); 3207 } 3208 } 3209 3210 void CMTask::setup_for_region(HeapRegion* hr) { 3211 // Separated the asserts so that we know which one fires. 3212 assert(hr != NULL, 3213 "claim_region() should have filtered out continues humongous regions"); 3214 assert(!hr->continuesHumongous(), 3215 "claim_region() should have filtered out continues humongous regions"); 3216 3217 if (_cm->verbose_low()) { 3218 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 3219 _task_id, hr); 3220 } 3221 3222 _curr_region = hr; 3223 _finger = hr->bottom(); 3224 update_region_limit(); 3225 } 3226 3227 void CMTask::update_region_limit() { 3228 HeapRegion* hr = _curr_region; 3229 HeapWord* bottom = hr->bottom(); 3230 HeapWord* limit = hr->next_top_at_mark_start(); 3231 3232 if (limit == bottom) { 3233 if (_cm->verbose_low()) { 3234 gclog_or_tty->print_cr("[%d] found an empty region " 3235 "["PTR_FORMAT", "PTR_FORMAT")", 3236 _task_id, bottom, limit); 3237 } 3238 // The region was collected underneath our feet. 3239 // We set the finger to bottom to ensure that the bitmap 3240 // iteration that will follow this will not do anything. 3241 // (this is not a condition that holds when we set the region up, 3242 // as the region is not supposed to be empty in the first place) 3243 _finger = bottom; 3244 } else if (limit >= _region_limit) { 3245 assert(limit >= _finger, "peace of mind"); 3246 } else { 3247 assert(limit < _region_limit, "only way to get here"); 3248 // This can happen under some pretty unusual circumstances. An 3249 // evacuation pause empties the region underneath our feet (NTAMS 3250 // at bottom). We then do some allocation in the region (NTAMS 3251 // stays at bottom), followed by the region being used as a GC 3252 // alloc region (NTAMS will move to top() and the objects 3253 // originally below it will be grayed). All objects now marked in 3254 // the region are explicitly grayed, if below the global finger, 3255 // and we do not need in fact to scan anything else. So, we simply 3256 // set _finger to be limit to ensure that the bitmap iteration 3257 // doesn't do anything. 3258 _finger = limit; 3259 } 3260 3261 _region_limit = limit; 3262 } 3263 3264 void CMTask::giveup_current_region() { 3265 assert(_curr_region != NULL, "invariant"); 3266 if (_cm->verbose_low()) { 3267 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 3268 _task_id, _curr_region); 3269 } 3270 clear_region_fields(); 3271 } 3272 3273 void CMTask::clear_region_fields() { 3274 // Values for these three fields that indicate that we're not 3275 // holding on to a region. 3276 _curr_region = NULL; 3277 _finger = NULL; 3278 _region_limit = NULL; 3279 } 3280 3281 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3282 if (cm_oop_closure == NULL) { 3283 assert(_cm_oop_closure != NULL, "invariant"); 3284 } else { 3285 assert(_cm_oop_closure == NULL, "invariant"); 3286 } 3287 _cm_oop_closure = cm_oop_closure; 3288 } 3289 3290 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3291 guarantee(nextMarkBitMap != NULL, "invariant"); 3292 3293 if (_cm->verbose_low()) { 3294 gclog_or_tty->print_cr("[%d] resetting", _task_id); 3295 } 3296 3297 _nextMarkBitMap = nextMarkBitMap; 3298 clear_region_fields(); 3299 3300 _calls = 0; 3301 _elapsed_time_ms = 0.0; 3302 _termination_time_ms = 0.0; 3303 _termination_start_time_ms = 0.0; 3304 3305 #if _MARKING_STATS_ 3306 _local_pushes = 0; 3307 _local_pops = 0; 3308 _local_max_size = 0; 3309 _objs_scanned = 0; 3310 _global_pushes = 0; 3311 _global_pops = 0; 3312 _global_max_size = 0; 3313 _global_transfers_to = 0; 3314 _global_transfers_from = 0; 3315 _regions_claimed = 0; 3316 _objs_found_on_bitmap = 0; 3317 _satb_buffers_processed = 0; 3318 _steal_attempts = 0; 3319 _steals = 0; 3320 _aborted = 0; 3321 _aborted_overflow = 0; 3322 _aborted_cm_aborted = 0; 3323 _aborted_yield = 0; 3324 _aborted_timed_out = 0; 3325 _aborted_satb = 0; 3326 _aborted_termination = 0; 3327 #endif // _MARKING_STATS_ 3328 } 3329 3330 bool CMTask::should_exit_termination() { 3331 regular_clock_call(); 3332 // This is called when we are in the termination protocol. We should 3333 // quit if, for some reason, this task wants to abort or the global 3334 // stack is not empty (this means that we can get work from it). 3335 return !_cm->mark_stack_empty() || has_aborted(); 3336 } 3337 3338 void CMTask::reached_limit() { 3339 assert(_words_scanned >= _words_scanned_limit || 3340 _refs_reached >= _refs_reached_limit , 3341 "shouldn't have been called otherwise"); 3342 regular_clock_call(); 3343 } 3344 3345 void CMTask::regular_clock_call() { 3346 if (has_aborted()) return; 3347 3348 // First, we need to recalculate the words scanned and refs reached 3349 // limits for the next clock call. 3350 recalculate_limits(); 3351 3352 // During the regular clock call we do the following 3353 3354 // (1) If an overflow has been flagged, then we abort. 3355 if (_cm->has_overflown()) { 3356 set_has_aborted(); 3357 return; 3358 } 3359 3360 // If we are not concurrent (i.e. we're doing remark) we don't need 3361 // to check anything else. The other steps are only needed during 3362 // the concurrent marking phase. 3363 if (!concurrent()) return; 3364 3365 // (2) If marking has been aborted for Full GC, then we also abort. 3366 if (_cm->has_aborted()) { 3367 set_has_aborted(); 3368 statsOnly( ++_aborted_cm_aborted ); 3369 return; 3370 } 3371 3372 double curr_time_ms = os::elapsedVTime() * 1000.0; 3373 3374 // (3) If marking stats are enabled, then we update the step history. 3375 #if _MARKING_STATS_ 3376 if (_words_scanned >= _words_scanned_limit) { 3377 ++_clock_due_to_scanning; 3378 } 3379 if (_refs_reached >= _refs_reached_limit) { 3380 ++_clock_due_to_marking; 3381 } 3382 3383 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3384 _interval_start_time_ms = curr_time_ms; 3385 _all_clock_intervals_ms.add(last_interval_ms); 3386 3387 if (_cm->verbose_medium()) { 3388 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 3389 "scanned = %d%s, refs reached = %d%s", 3390 _task_id, last_interval_ms, 3391 _words_scanned, 3392 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3393 _refs_reached, 3394 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3395 } 3396 #endif // _MARKING_STATS_ 3397 3398 // (4) We check whether we should yield. If we have to, then we abort. 3399 if (_cm->should_yield()) { 3400 // We should yield. To do this we abort the task. The caller is 3401 // responsible for yielding. 3402 set_has_aborted(); 3403 statsOnly( ++_aborted_yield ); 3404 return; 3405 } 3406 3407 // (5) We check whether we've reached our time quota. If we have, 3408 // then we abort. 3409 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3410 if (elapsed_time_ms > _time_target_ms) { 3411 set_has_aborted(); 3412 _has_timed_out = true; 3413 statsOnly( ++_aborted_timed_out ); 3414 return; 3415 } 3416 3417 // (6) Finally, we check whether there are enough completed STAB 3418 // buffers available for processing. If there are, we abort. 3419 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3420 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3421 if (_cm->verbose_low()) { 3422 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 3423 _task_id); 3424 } 3425 // we do need to process SATB buffers, we'll abort and restart 3426 // the marking task to do so 3427 set_has_aborted(); 3428 statsOnly( ++_aborted_satb ); 3429 return; 3430 } 3431 } 3432 3433 void CMTask::recalculate_limits() { 3434 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3435 _words_scanned_limit = _real_words_scanned_limit; 3436 3437 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3438 _refs_reached_limit = _real_refs_reached_limit; 3439 } 3440 3441 void CMTask::decrease_limits() { 3442 // This is called when we believe that we're going to do an infrequent 3443 // operation which will increase the per byte scanned cost (i.e. move 3444 // entries to/from the global stack). It basically tries to decrease the 3445 // scanning limit so that the clock is called earlier. 3446 3447 if (_cm->verbose_medium()) { 3448 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 3449 } 3450 3451 _words_scanned_limit = _real_words_scanned_limit - 3452 3 * words_scanned_period / 4; 3453 _refs_reached_limit = _real_refs_reached_limit - 3454 3 * refs_reached_period / 4; 3455 } 3456 3457 void CMTask::move_entries_to_global_stack() { 3458 // local array where we'll store the entries that will be popped 3459 // from the local queue 3460 oop buffer[global_stack_transfer_size]; 3461 3462 int n = 0; 3463 oop obj; 3464 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3465 buffer[n] = obj; 3466 ++n; 3467 } 3468 3469 if (n > 0) { 3470 // we popped at least one entry from the local queue 3471 3472 statsOnly( ++_global_transfers_to; _local_pops += n ); 3473 3474 if (!_cm->mark_stack_push(buffer, n)) { 3475 if (_cm->verbose_low()) { 3476 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", 3477 _task_id); 3478 } 3479 set_has_aborted(); 3480 } else { 3481 // the transfer was successful 3482 3483 if (_cm->verbose_medium()) { 3484 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 3485 _task_id, n); 3486 } 3487 statsOnly( int tmp_size = _cm->mark_stack_size(); 3488 if (tmp_size > _global_max_size) { 3489 _global_max_size = tmp_size; 3490 } 3491 _global_pushes += n ); 3492 } 3493 } 3494 3495 // this operation was quite expensive, so decrease the limits 3496 decrease_limits(); 3497 } 3498 3499 void CMTask::get_entries_from_global_stack() { 3500 // local array where we'll store the entries that will be popped 3501 // from the global stack. 3502 oop buffer[global_stack_transfer_size]; 3503 int n; 3504 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3505 assert(n <= global_stack_transfer_size, 3506 "we should not pop more than the given limit"); 3507 if (n > 0) { 3508 // yes, we did actually pop at least one entry 3509 3510 statsOnly( ++_global_transfers_from; _global_pops += n ); 3511 if (_cm->verbose_medium()) { 3512 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 3513 _task_id, n); 3514 } 3515 for (int i = 0; i < n; ++i) { 3516 bool success = _task_queue->push(buffer[i]); 3517 // We only call this when the local queue is empty or under a 3518 // given target limit. So, we do not expect this push to fail. 3519 assert(success, "invariant"); 3520 } 3521 3522 statsOnly( int tmp_size = _task_queue->size(); 3523 if (tmp_size > _local_max_size) { 3524 _local_max_size = tmp_size; 3525 } 3526 _local_pushes += n ); 3527 } 3528 3529 // this operation was quite expensive, so decrease the limits 3530 decrease_limits(); 3531 } 3532 3533 void CMTask::drain_local_queue(bool partially) { 3534 if (has_aborted()) return; 3535 3536 // Decide what the target size is, depending whether we're going to 3537 // drain it partially (so that other tasks can steal if they run out 3538 // of things to do) or totally (at the very end). 3539 size_t target_size; 3540 if (partially) { 3541 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3542 } else { 3543 target_size = 0; 3544 } 3545 3546 if (_task_queue->size() > target_size) { 3547 if (_cm->verbose_high()) { 3548 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 3549 _task_id, target_size); 3550 } 3551 3552 oop obj; 3553 bool ret = _task_queue->pop_local(obj); 3554 while (ret) { 3555 statsOnly( ++_local_pops ); 3556 3557 if (_cm->verbose_high()) { 3558 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 3559 (void*) obj); 3560 } 3561 3562 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3563 assert(!_g1h->is_on_master_free_list( 3564 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3565 3566 scan_object(obj); 3567 3568 if (_task_queue->size() <= target_size || has_aborted()) { 3569 ret = false; 3570 } else { 3571 ret = _task_queue->pop_local(obj); 3572 } 3573 } 3574 3575 if (_cm->verbose_high()) { 3576 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 3577 _task_id, _task_queue->size()); 3578 } 3579 } 3580 } 3581 3582 void CMTask::drain_global_stack(bool partially) { 3583 if (has_aborted()) return; 3584 3585 // We have a policy to drain the local queue before we attempt to 3586 // drain the global stack. 3587 assert(partially || _task_queue->size() == 0, "invariant"); 3588 3589 // Decide what the target size is, depending whether we're going to 3590 // drain it partially (so that other tasks can steal if they run out 3591 // of things to do) or totally (at the very end). Notice that, 3592 // because we move entries from the global stack in chunks or 3593 // because another task might be doing the same, we might in fact 3594 // drop below the target. But, this is not a problem. 3595 size_t target_size; 3596 if (partially) { 3597 target_size = _cm->partial_mark_stack_size_target(); 3598 } else { 3599 target_size = 0; 3600 } 3601 3602 if (_cm->mark_stack_size() > target_size) { 3603 if (_cm->verbose_low()) { 3604 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 3605 _task_id, target_size); 3606 } 3607 3608 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3609 get_entries_from_global_stack(); 3610 drain_local_queue(partially); 3611 } 3612 3613 if (_cm->verbose_low()) { 3614 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 3615 _task_id, _cm->mark_stack_size()); 3616 } 3617 } 3618 } 3619 3620 // SATB Queue has several assumptions on whether to call the par or 3621 // non-par versions of the methods. this is why some of the code is 3622 // replicated. We should really get rid of the single-threaded version 3623 // of the code to simplify things. 3624 void CMTask::drain_satb_buffers() { 3625 if (has_aborted()) return; 3626 3627 // We set this so that the regular clock knows that we're in the 3628 // middle of draining buffers and doesn't set the abort flag when it 3629 // notices that SATB buffers are available for draining. It'd be 3630 // very counter productive if it did that. :-) 3631 _draining_satb_buffers = true; 3632 3633 CMObjectClosure oc(this); 3634 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3635 if (G1CollectedHeap::use_parallel_gc_threads()) { 3636 satb_mq_set.set_par_closure(_task_id, &oc); 3637 } else { 3638 satb_mq_set.set_closure(&oc); 3639 } 3640 3641 // This keeps claiming and applying the closure to completed buffers 3642 // until we run out of buffers or we need to abort. 3643 if (G1CollectedHeap::use_parallel_gc_threads()) { 3644 while (!has_aborted() && 3645 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 3646 if (_cm->verbose_medium()) { 3647 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3648 } 3649 statsOnly( ++_satb_buffers_processed ); 3650 regular_clock_call(); 3651 } 3652 } else { 3653 while (!has_aborted() && 3654 satb_mq_set.apply_closure_to_completed_buffer()) { 3655 if (_cm->verbose_medium()) { 3656 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3657 } 3658 statsOnly( ++_satb_buffers_processed ); 3659 regular_clock_call(); 3660 } 3661 } 3662 3663 if (!concurrent() && !has_aborted()) { 3664 // We should only do this during remark. 3665 if (G1CollectedHeap::use_parallel_gc_threads()) { 3666 satb_mq_set.par_iterate_closure_all_threads(_task_id); 3667 } else { 3668 satb_mq_set.iterate_closure_all_threads(); 3669 } 3670 } 3671 3672 _draining_satb_buffers = false; 3673 3674 assert(has_aborted() || 3675 concurrent() || 3676 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3677 3678 if (G1CollectedHeap::use_parallel_gc_threads()) { 3679 satb_mq_set.set_par_closure(_task_id, NULL); 3680 } else { 3681 satb_mq_set.set_closure(NULL); 3682 } 3683 3684 // again, this was a potentially expensive operation, decrease the 3685 // limits to get the regular clock call early 3686 decrease_limits(); 3687 } 3688 3689 void CMTask::print_stats() { 3690 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 3691 _task_id, _calls); 3692 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3693 _elapsed_time_ms, _termination_time_ms); 3694 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3695 _step_times_ms.num(), _step_times_ms.avg(), 3696 _step_times_ms.sd()); 3697 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3698 _step_times_ms.maximum(), _step_times_ms.sum()); 3699 3700 #if _MARKING_STATS_ 3701 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3702 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3703 _all_clock_intervals_ms.sd()); 3704 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3705 _all_clock_intervals_ms.maximum(), 3706 _all_clock_intervals_ms.sum()); 3707 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3708 _clock_due_to_scanning, _clock_due_to_marking); 3709 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3710 _objs_scanned, _objs_found_on_bitmap); 3711 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3712 _local_pushes, _local_pops, _local_max_size); 3713 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3714 _global_pushes, _global_pops, _global_max_size); 3715 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3716 _global_transfers_to,_global_transfers_from); 3717 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 3718 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3719 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3720 _steal_attempts, _steals); 3721 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3722 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3723 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3724 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3725 _aborted_timed_out, _aborted_satb, _aborted_termination); 3726 #endif // _MARKING_STATS_ 3727 } 3728 3729 /***************************************************************************** 3730 3731 The do_marking_step(time_target_ms) method is the building block 3732 of the parallel marking framework. It can be called in parallel 3733 with other invocations of do_marking_step() on different tasks 3734 (but only one per task, obviously) and concurrently with the 3735 mutator threads, or during remark, hence it eliminates the need 3736 for two versions of the code. When called during remark, it will 3737 pick up from where the task left off during the concurrent marking 3738 phase. Interestingly, tasks are also claimable during evacuation 3739 pauses too, since do_marking_step() ensures that it aborts before 3740 it needs to yield. 3741 3742 The data structures that is uses to do marking work are the 3743 following: 3744 3745 (1) Marking Bitmap. If there are gray objects that appear only 3746 on the bitmap (this happens either when dealing with an overflow 3747 or when the initial marking phase has simply marked the roots 3748 and didn't push them on the stack), then tasks claim heap 3749 regions whose bitmap they then scan to find gray objects. A 3750 global finger indicates where the end of the last claimed region 3751 is. A local finger indicates how far into the region a task has 3752 scanned. The two fingers are used to determine how to gray an 3753 object (i.e. whether simply marking it is OK, as it will be 3754 visited by a task in the future, or whether it needs to be also 3755 pushed on a stack). 3756 3757 (2) Local Queue. The local queue of the task which is accessed 3758 reasonably efficiently by the task. Other tasks can steal from 3759 it when they run out of work. Throughout the marking phase, a 3760 task attempts to keep its local queue short but not totally 3761 empty, so that entries are available for stealing by other 3762 tasks. Only when there is no more work, a task will totally 3763 drain its local queue. 3764 3765 (3) Global Mark Stack. This handles local queue overflow. During 3766 marking only sets of entries are moved between it and the local 3767 queues, as access to it requires a mutex and more fine-grain 3768 interaction with it which might cause contention. If it 3769 overflows, then the marking phase should restart and iterate 3770 over the bitmap to identify gray objects. Throughout the marking 3771 phase, tasks attempt to keep the global mark stack at a small 3772 length but not totally empty, so that entries are available for 3773 popping by other tasks. Only when there is no more work, tasks 3774 will totally drain the global mark stack. 3775 3776 (4) SATB Buffer Queue. This is where completed SATB buffers are 3777 made available. Buffers are regularly removed from this queue 3778 and scanned for roots, so that the queue doesn't get too 3779 long. During remark, all completed buffers are processed, as 3780 well as the filled in parts of any uncompleted buffers. 3781 3782 The do_marking_step() method tries to abort when the time target 3783 has been reached. There are a few other cases when the 3784 do_marking_step() method also aborts: 3785 3786 (1) When the marking phase has been aborted (after a Full GC). 3787 3788 (2) When a global overflow (on the global stack) has been 3789 triggered. Before the task aborts, it will actually sync up with 3790 the other tasks to ensure that all the marking data structures 3791 (local queues, stacks, fingers etc.) are re-initialised so that 3792 when do_marking_step() completes, the marking phase can 3793 immediately restart. 3794 3795 (3) When enough completed SATB buffers are available. The 3796 do_marking_step() method only tries to drain SATB buffers right 3797 at the beginning. So, if enough buffers are available, the 3798 marking step aborts and the SATB buffers are processed at 3799 the beginning of the next invocation. 3800 3801 (4) To yield. when we have to yield then we abort and yield 3802 right at the end of do_marking_step(). This saves us from a lot 3803 of hassle as, by yielding we might allow a Full GC. If this 3804 happens then objects will be compacted underneath our feet, the 3805 heap might shrink, etc. We save checking for this by just 3806 aborting and doing the yield right at the end. 3807 3808 From the above it follows that the do_marking_step() method should 3809 be called in a loop (or, otherwise, regularly) until it completes. 3810 3811 If a marking step completes without its has_aborted() flag being 3812 true, it means it has completed the current marking phase (and 3813 also all other marking tasks have done so and have all synced up). 3814 3815 A method called regular_clock_call() is invoked "regularly" (in 3816 sub ms intervals) throughout marking. It is this clock method that 3817 checks all the abort conditions which were mentioned above and 3818 decides when the task should abort. A work-based scheme is used to 3819 trigger this clock method: when the number of object words the 3820 marking phase has scanned or the number of references the marking 3821 phase has visited reach a given limit. Additional invocations to 3822 the method clock have been planted in a few other strategic places 3823 too. The initial reason for the clock method was to avoid calling 3824 vtime too regularly, as it is quite expensive. So, once it was in 3825 place, it was natural to piggy-back all the other conditions on it 3826 too and not constantly check them throughout the code. 3827 3828 *****************************************************************************/ 3829 3830 void CMTask::do_marking_step(double time_target_ms, 3831 bool do_stealing, 3832 bool do_termination) { 3833 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3834 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3835 3836 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3837 assert(_task_queues != NULL, "invariant"); 3838 assert(_task_queue != NULL, "invariant"); 3839 assert(_task_queues->queue(_task_id) == _task_queue, "invariant"); 3840 3841 assert(!_claimed, 3842 "only one thread should claim this task at any one time"); 3843 3844 // OK, this doesn't safeguard again all possible scenarios, as it is 3845 // possible for two threads to set the _claimed flag at the same 3846 // time. But it is only for debugging purposes anyway and it will 3847 // catch most problems. 3848 _claimed = true; 3849 3850 _start_time_ms = os::elapsedVTime() * 1000.0; 3851 statsOnly( _interval_start_time_ms = _start_time_ms ); 3852 3853 double diff_prediction_ms = 3854 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 3855 _time_target_ms = time_target_ms - diff_prediction_ms; 3856 3857 // set up the variables that are used in the work-based scheme to 3858 // call the regular clock method 3859 _words_scanned = 0; 3860 _refs_reached = 0; 3861 recalculate_limits(); 3862 3863 // clear all flags 3864 clear_has_aborted(); 3865 _has_timed_out = false; 3866 _draining_satb_buffers = false; 3867 3868 ++_calls; 3869 3870 if (_cm->verbose_low()) { 3871 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 3872 "target = %1.2lfms >>>>>>>>>>", 3873 _task_id, _calls, _time_target_ms); 3874 } 3875 3876 // Set up the bitmap and oop closures. Anything that uses them is 3877 // eventually called from this method, so it is OK to allocate these 3878 // statically. 3879 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3880 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3881 set_cm_oop_closure(&cm_oop_closure); 3882 3883 if (_cm->has_overflown()) { 3884 // This can happen if the mark stack overflows during a GC pause 3885 // and this task, after a yield point, restarts. We have to abort 3886 // as we need to get into the overflow protocol which happens 3887 // right at the end of this task. 3888 set_has_aborted(); 3889 } 3890 3891 // First drain any available SATB buffers. After this, we will not 3892 // look at SATB buffers before the next invocation of this method. 3893 // If enough completed SATB buffers are queued up, the regular clock 3894 // will abort this task so that it restarts. 3895 drain_satb_buffers(); 3896 // ...then partially drain the local queue and the global stack 3897 drain_local_queue(true); 3898 drain_global_stack(true); 3899 3900 do { 3901 if (!has_aborted() && _curr_region != NULL) { 3902 // This means that we're already holding on to a region. 3903 assert(_finger != NULL, "if region is not NULL, then the finger " 3904 "should not be NULL either"); 3905 3906 // We might have restarted this task after an evacuation pause 3907 // which might have evacuated the region we're holding on to 3908 // underneath our feet. Let's read its limit again to make sure 3909 // that we do not iterate over a region of the heap that 3910 // contains garbage (update_region_limit() will also move 3911 // _finger to the start of the region if it is found empty). 3912 update_region_limit(); 3913 // We will start from _finger not from the start of the region, 3914 // as we might be restarting this task after aborting half-way 3915 // through scanning this region. In this case, _finger points to 3916 // the address where we last found a marked object. If this is a 3917 // fresh region, _finger points to start(). 3918 MemRegion mr = MemRegion(_finger, _region_limit); 3919 3920 if (_cm->verbose_low()) { 3921 gclog_or_tty->print_cr("[%d] we're scanning part " 3922 "["PTR_FORMAT", "PTR_FORMAT") " 3923 "of region "PTR_FORMAT, 3924 _task_id, _finger, _region_limit, _curr_region); 3925 } 3926 3927 // Let's iterate over the bitmap of the part of the 3928 // region that is left. 3929 if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3930 // We successfully completed iterating over the region. Now, 3931 // let's give up the region. 3932 giveup_current_region(); 3933 regular_clock_call(); 3934 } else { 3935 assert(has_aborted(), "currently the only way to do so"); 3936 // The only way to abort the bitmap iteration is to return 3937 // false from the do_bit() method. However, inside the 3938 // do_bit() method we move the _finger to point to the 3939 // object currently being looked at. So, if we bail out, we 3940 // have definitely set _finger to something non-null. 3941 assert(_finger != NULL, "invariant"); 3942 3943 // Region iteration was actually aborted. So now _finger 3944 // points to the address of the object we last scanned. If we 3945 // leave it there, when we restart this task, we will rescan 3946 // the object. It is easy to avoid this. We move the finger by 3947 // enough to point to the next possible object header (the 3948 // bitmap knows by how much we need to move it as it knows its 3949 // granularity). 3950 assert(_finger < _region_limit, "invariant"); 3951 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 3952 // Check if bitmap iteration was aborted while scanning the last object 3953 if (new_finger >= _region_limit) { 3954 giveup_current_region(); 3955 } else { 3956 move_finger_to(new_finger); 3957 } 3958 } 3959 } 3960 // At this point we have either completed iterating over the 3961 // region we were holding on to, or we have aborted. 3962 3963 // We then partially drain the local queue and the global stack. 3964 // (Do we really need this?) 3965 drain_local_queue(true); 3966 drain_global_stack(true); 3967 3968 // Read the note on the claim_region() method on why it might 3969 // return NULL with potentially more regions available for 3970 // claiming and why we have to check out_of_regions() to determine 3971 // whether we're done or not. 3972 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 3973 // We are going to try to claim a new region. We should have 3974 // given up on the previous one. 3975 // Separated the asserts so that we know which one fires. 3976 assert(_curr_region == NULL, "invariant"); 3977 assert(_finger == NULL, "invariant"); 3978 assert(_region_limit == NULL, "invariant"); 3979 if (_cm->verbose_low()) { 3980 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 3981 } 3982 HeapRegion* claimed_region = _cm->claim_region(_task_id); 3983 if (claimed_region != NULL) { 3984 // Yes, we managed to claim one 3985 statsOnly( ++_regions_claimed ); 3986 3987 if (_cm->verbose_low()) { 3988 gclog_or_tty->print_cr("[%d] we successfully claimed " 3989 "region "PTR_FORMAT, 3990 _task_id, claimed_region); 3991 } 3992 3993 setup_for_region(claimed_region); 3994 assert(_curr_region == claimed_region, "invariant"); 3995 } 3996 // It is important to call the regular clock here. It might take 3997 // a while to claim a region if, for example, we hit a large 3998 // block of empty regions. So we need to call the regular clock 3999 // method once round the loop to make sure it's called 4000 // frequently enough. 4001 regular_clock_call(); 4002 } 4003 4004 if (!has_aborted() && _curr_region == NULL) { 4005 assert(_cm->out_of_regions(), 4006 "at this point we should be out of regions"); 4007 } 4008 } while ( _curr_region != NULL && !has_aborted()); 4009 4010 if (!has_aborted()) { 4011 // We cannot check whether the global stack is empty, since other 4012 // tasks might be pushing objects to it concurrently. 4013 assert(_cm->out_of_regions(), 4014 "at this point we should be out of regions"); 4015 4016 if (_cm->verbose_low()) { 4017 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 4018 } 4019 4020 // Try to reduce the number of available SATB buffers so that 4021 // remark has less work to do. 4022 drain_satb_buffers(); 4023 } 4024 4025 // Since we've done everything else, we can now totally drain the 4026 // local queue and global stack. 4027 drain_local_queue(false); 4028 drain_global_stack(false); 4029 4030 // Attempt at work stealing from other task's queues. 4031 if (do_stealing && !has_aborted()) { 4032 // We have not aborted. This means that we have finished all that 4033 // we could. Let's try to do some stealing... 4034 4035 // We cannot check whether the global stack is empty, since other 4036 // tasks might be pushing objects to it concurrently. 4037 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4038 "only way to reach here"); 4039 4040 if (_cm->verbose_low()) { 4041 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 4042 } 4043 4044 while (!has_aborted()) { 4045 oop obj; 4046 statsOnly( ++_steal_attempts ); 4047 4048 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 4049 if (_cm->verbose_medium()) { 4050 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 4051 _task_id, (void*) obj); 4052 } 4053 4054 statsOnly( ++_steals ); 4055 4056 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4057 "any stolen object should be marked"); 4058 scan_object(obj); 4059 4060 // And since we're towards the end, let's totally drain the 4061 // local queue and global stack. 4062 drain_local_queue(false); 4063 drain_global_stack(false); 4064 } else { 4065 break; 4066 } 4067 } 4068 } 4069 4070 // If we are about to wrap up and go into termination, check if we 4071 // should raise the overflow flag. 4072 if (do_termination && !has_aborted()) { 4073 if (_cm->force_overflow()->should_force()) { 4074 _cm->set_has_overflown(); 4075 regular_clock_call(); 4076 } 4077 } 4078 4079 // We still haven't aborted. Now, let's try to get into the 4080 // termination protocol. 4081 if (do_termination && !has_aborted()) { 4082 // We cannot check whether the global stack is empty, since other 4083 // tasks might be concurrently pushing objects on it. 4084 // Separated the asserts so that we know which one fires. 4085 assert(_cm->out_of_regions(), "only way to reach here"); 4086 assert(_task_queue->size() == 0, "only way to reach here"); 4087 4088 if (_cm->verbose_low()) { 4089 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 4090 } 4091 4092 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4093 // The CMTask class also extends the TerminatorTerminator class, 4094 // hence its should_exit_termination() method will also decide 4095 // whether to exit the termination protocol or not. 4096 bool finished = _cm->terminator()->offer_termination(this); 4097 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4098 _termination_time_ms += 4099 termination_end_time_ms - _termination_start_time_ms; 4100 4101 if (finished) { 4102 // We're all done. 4103 4104 if (_task_id == 0) { 4105 // let's allow task 0 to do this 4106 if (concurrent()) { 4107 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4108 // we need to set this to false before the next 4109 // safepoint. This way we ensure that the marking phase 4110 // doesn't observe any more heap expansions. 4111 _cm->clear_concurrent_marking_in_progress(); 4112 } 4113 } 4114 4115 // We can now guarantee that the global stack is empty, since 4116 // all other tasks have finished. We separated the guarantees so 4117 // that, if a condition is false, we can immediately find out 4118 // which one. 4119 guarantee(_cm->out_of_regions(), "only way to reach here"); 4120 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4121 guarantee(_task_queue->size() == 0, "only way to reach here"); 4122 guarantee(!_cm->has_overflown(), "only way to reach here"); 4123 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4124 4125 if (_cm->verbose_low()) { 4126 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 4127 } 4128 } else { 4129 // Apparently there's more work to do. Let's abort this task. It 4130 // will restart it and we can hopefully find more things to do. 4131 4132 if (_cm->verbose_low()) { 4133 gclog_or_tty->print_cr("[%d] apparently there is more work to do", 4134 _task_id); 4135 } 4136 4137 set_has_aborted(); 4138 statsOnly( ++_aborted_termination ); 4139 } 4140 } 4141 4142 // Mainly for debugging purposes to make sure that a pointer to the 4143 // closure which was statically allocated in this frame doesn't 4144 // escape it by accident. 4145 set_cm_oop_closure(NULL); 4146 double end_time_ms = os::elapsedVTime() * 1000.0; 4147 double elapsed_time_ms = end_time_ms - _start_time_ms; 4148 // Update the step history. 4149 _step_times_ms.add(elapsed_time_ms); 4150 4151 if (has_aborted()) { 4152 // The task was aborted for some reason. 4153 4154 statsOnly( ++_aborted ); 4155 4156 if (_has_timed_out) { 4157 double diff_ms = elapsed_time_ms - _time_target_ms; 4158 // Keep statistics of how well we did with respect to hitting 4159 // our target only if we actually timed out (if we aborted for 4160 // other reasons, then the results might get skewed). 4161 _marking_step_diffs_ms.add(diff_ms); 4162 } 4163 4164 if (_cm->has_overflown()) { 4165 // This is the interesting one. We aborted because a global 4166 // overflow was raised. This means we have to restart the 4167 // marking phase and start iterating over regions. However, in 4168 // order to do this we have to make sure that all tasks stop 4169 // what they are doing and re-initialise in a safe manner. We 4170 // will achieve this with the use of two barrier sync points. 4171 4172 if (_cm->verbose_low()) { 4173 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 4174 } 4175 4176 _cm->enter_first_sync_barrier(_task_id); 4177 // When we exit this sync barrier we know that all tasks have 4178 // stopped doing marking work. So, it's now safe to 4179 // re-initialise our data structures. At the end of this method, 4180 // task 0 will clear the global data structures. 4181 4182 statsOnly( ++_aborted_overflow ); 4183 4184 // We clear the local state of this task... 4185 clear_region_fields(); 4186 4187 // ...and enter the second barrier. 4188 _cm->enter_second_sync_barrier(_task_id); 4189 // At this point everything has bee re-initialised and we're 4190 // ready to restart. 4191 } 4192 4193 if (_cm->verbose_low()) { 4194 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4195 "elapsed = %1.2lfms <<<<<<<<<<", 4196 _task_id, _time_target_ms, elapsed_time_ms); 4197 if (_cm->has_aborted()) { 4198 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 4199 _task_id); 4200 } 4201 } 4202 } else { 4203 if (_cm->verbose_low()) { 4204 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4205 "elapsed = %1.2lfms <<<<<<<<<<", 4206 _task_id, _time_target_ms, elapsed_time_ms); 4207 } 4208 } 4209 4210 _claimed = false; 4211 } 4212 4213 CMTask::CMTask(int task_id, 4214 ConcurrentMark* cm, 4215 size_t* marked_bytes, 4216 BitMap* card_bm, 4217 CMTaskQueue* task_queue, 4218 CMTaskQueueSet* task_queues) 4219 : _g1h(G1CollectedHeap::heap()), 4220 _task_id(task_id), _cm(cm), 4221 _claimed(false), 4222 _nextMarkBitMap(NULL), _hash_seed(17), 4223 _task_queue(task_queue), 4224 _task_queues(task_queues), 4225 _cm_oop_closure(NULL), 4226 _marked_bytes_array(marked_bytes), 4227 _card_bm(card_bm) { 4228 guarantee(task_queue != NULL, "invariant"); 4229 guarantee(task_queues != NULL, "invariant"); 4230 4231 statsOnly( _clock_due_to_scanning = 0; 4232 _clock_due_to_marking = 0 ); 4233 4234 _marking_step_diffs_ms.add(0.5); 4235 } 4236 4237 // These are formatting macros that are used below to ensure 4238 // consistent formatting. The *_H_* versions are used to format the 4239 // header for a particular value and they should be kept consistent 4240 // with the corresponding macro. Also note that most of the macros add 4241 // the necessary white space (as a prefix) which makes them a bit 4242 // easier to compose. 4243 4244 // All the output lines are prefixed with this string to be able to 4245 // identify them easily in a large log file. 4246 #define G1PPRL_LINE_PREFIX "###" 4247 4248 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4249 #ifdef _LP64 4250 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4251 #else // _LP64 4252 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4253 #endif // _LP64 4254 4255 // For per-region info 4256 #define G1PPRL_TYPE_FORMAT " %-4s" 4257 #define G1PPRL_TYPE_H_FORMAT " %4s" 4258 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4259 #define G1PPRL_BYTE_H_FORMAT " %9s" 4260 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4261 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4262 4263 // For summary info 4264 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4265 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4266 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4267 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4268 4269 G1PrintRegionLivenessInfoClosure:: 4270 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4271 : _out(out), 4272 _total_used_bytes(0), _total_capacity_bytes(0), 4273 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4274 _hum_used_bytes(0), _hum_capacity_bytes(0), 4275 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) { 4276 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4277 MemRegion g1_committed = g1h->g1_committed(); 4278 MemRegion g1_reserved = g1h->g1_reserved(); 4279 double now = os::elapsedTime(); 4280 4281 // Print the header of the output. 4282 _out->cr(); 4283 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4284 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4285 G1PPRL_SUM_ADDR_FORMAT("committed") 4286 G1PPRL_SUM_ADDR_FORMAT("reserved") 4287 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4288 g1_committed.start(), g1_committed.end(), 4289 g1_reserved.start(), g1_reserved.end(), 4290 HeapRegion::GrainBytes); 4291 _out->print_cr(G1PPRL_LINE_PREFIX); 4292 _out->print_cr(G1PPRL_LINE_PREFIX 4293 G1PPRL_TYPE_H_FORMAT 4294 G1PPRL_ADDR_BASE_H_FORMAT 4295 G1PPRL_BYTE_H_FORMAT 4296 G1PPRL_BYTE_H_FORMAT 4297 G1PPRL_BYTE_H_FORMAT 4298 G1PPRL_DOUBLE_H_FORMAT, 4299 "type", "address-range", 4300 "used", "prev-live", "next-live", "gc-eff"); 4301 _out->print_cr(G1PPRL_LINE_PREFIX 4302 G1PPRL_TYPE_H_FORMAT 4303 G1PPRL_ADDR_BASE_H_FORMAT 4304 G1PPRL_BYTE_H_FORMAT 4305 G1PPRL_BYTE_H_FORMAT 4306 G1PPRL_BYTE_H_FORMAT 4307 G1PPRL_DOUBLE_H_FORMAT, 4308 "", "", 4309 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)"); 4310 } 4311 4312 // It takes as a parameter a reference to one of the _hum_* fields, it 4313 // deduces the corresponding value for a region in a humongous region 4314 // series (either the region size, or what's left if the _hum_* field 4315 // is < the region size), and updates the _hum_* field accordingly. 4316 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4317 size_t bytes = 0; 4318 // The > 0 check is to deal with the prev and next live bytes which 4319 // could be 0. 4320 if (*hum_bytes > 0) { 4321 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4322 *hum_bytes -= bytes; 4323 } 4324 return bytes; 4325 } 4326 4327 // It deduces the values for a region in a humongous region series 4328 // from the _hum_* fields and updates those accordingly. It assumes 4329 // that that _hum_* fields have already been set up from the "starts 4330 // humongous" region and we visit the regions in address order. 4331 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4332 size_t* capacity_bytes, 4333 size_t* prev_live_bytes, 4334 size_t* next_live_bytes) { 4335 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4336 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4337 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4338 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4339 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4340 } 4341 4342 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4343 const char* type = ""; 4344 HeapWord* bottom = r->bottom(); 4345 HeapWord* end = r->end(); 4346 size_t capacity_bytes = r->capacity(); 4347 size_t used_bytes = r->used(); 4348 size_t prev_live_bytes = r->live_bytes(); 4349 size_t next_live_bytes = r->next_live_bytes(); 4350 double gc_eff = r->gc_efficiency(); 4351 if (r->used() == 0) { 4352 type = "FREE"; 4353 } else if (r->is_survivor()) { 4354 type = "SURV"; 4355 } else if (r->is_young()) { 4356 type = "EDEN"; 4357 } else if (r->startsHumongous()) { 4358 type = "HUMS"; 4359 4360 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4361 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4362 "they should have been zeroed after the last time we used them"); 4363 // Set up the _hum_* fields. 4364 _hum_capacity_bytes = capacity_bytes; 4365 _hum_used_bytes = used_bytes; 4366 _hum_prev_live_bytes = prev_live_bytes; 4367 _hum_next_live_bytes = next_live_bytes; 4368 get_hum_bytes(&used_bytes, &capacity_bytes, 4369 &prev_live_bytes, &next_live_bytes); 4370 end = bottom + HeapRegion::GrainWords; 4371 } else if (r->continuesHumongous()) { 4372 type = "HUMC"; 4373 get_hum_bytes(&used_bytes, &capacity_bytes, 4374 &prev_live_bytes, &next_live_bytes); 4375 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4376 } else { 4377 type = "OLD"; 4378 } 4379 4380 _total_used_bytes += used_bytes; 4381 _total_capacity_bytes += capacity_bytes; 4382 _total_prev_live_bytes += prev_live_bytes; 4383 _total_next_live_bytes += next_live_bytes; 4384 4385 // Print a line for this particular region. 4386 _out->print_cr(G1PPRL_LINE_PREFIX 4387 G1PPRL_TYPE_FORMAT 4388 G1PPRL_ADDR_BASE_FORMAT 4389 G1PPRL_BYTE_FORMAT 4390 G1PPRL_BYTE_FORMAT 4391 G1PPRL_BYTE_FORMAT 4392 G1PPRL_DOUBLE_FORMAT, 4393 type, bottom, end, 4394 used_bytes, prev_live_bytes, next_live_bytes, gc_eff); 4395 4396 return false; 4397 } 4398 4399 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4400 // Print the footer of the output. 4401 _out->print_cr(G1PPRL_LINE_PREFIX); 4402 _out->print_cr(G1PPRL_LINE_PREFIX 4403 " SUMMARY" 4404 G1PPRL_SUM_MB_FORMAT("capacity") 4405 G1PPRL_SUM_MB_PERC_FORMAT("used") 4406 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4407 G1PPRL_SUM_MB_PERC_FORMAT("next-live"), 4408 bytes_to_mb(_total_capacity_bytes), 4409 bytes_to_mb(_total_used_bytes), 4410 perc(_total_used_bytes, _total_capacity_bytes), 4411 bytes_to_mb(_total_prev_live_bytes), 4412 perc(_total_prev_live_bytes, _total_capacity_bytes), 4413 bytes_to_mb(_total_next_live_bytes), 4414 perc(_total_next_live_bytes, _total_capacity_bytes)); 4415 _out->cr(); 4416 }