1 /* 2 * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 32 #include "gc_implementation/g1/g1Log.hpp" 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 34 #include "gc_implementation/g1/g1RemSet.hpp" 35 #include "gc_implementation/g1/heapRegion.inline.hpp" 36 #include "gc_implementation/g1/heapRegionRemSet.hpp" 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 38 #include "gc_implementation/shared/vmGCOperations.hpp" 39 #include "memory/genOopClosures.inline.hpp" 40 #include "memory/referencePolicy.hpp" 41 #include "memory/resourceArea.hpp" 42 #include "oops/oop.inline.hpp" 43 #include "runtime/handles.inline.hpp" 44 #include "runtime/java.hpp" 45 #include "services/memTracker.hpp" 46 47 // Concurrent marking bit map wrapper 48 49 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) : 50 _bm((uintptr_t*)NULL,0), 51 _shifter(shifter) { 52 _bmStartWord = (HeapWord*)(rs.base()); 53 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 54 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 55 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 56 57 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC); 58 59 guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map"); 60 // For now we'll just commit all of the bit map up fromt. 61 // Later on we'll try to be more parsimonious with swap. 62 guarantee(_virtual_space.initialize(brs, brs.size()), 63 "couldn't reseve backing store for concurrent marking bit map"); 64 assert(_virtual_space.committed_size() == brs.size(), 65 "didn't reserve backing store for all of concurrent marking bit map?"); 66 _bm.set_map((uintptr_t*)_virtual_space.low()); 67 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 68 _bmWordSize, "inconsistency in bit map sizing"); 69 _bm.set_size(_bmWordSize >> _shifter); 70 } 71 72 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 73 HeapWord* limit) const { 74 // First we must round addr *up* to a possible object boundary. 75 addr = (HeapWord*)align_size_up((intptr_t)addr, 76 HeapWordSize << _shifter); 77 size_t addrOffset = heapWordToOffset(addr); 78 if (limit == NULL) { 79 limit = _bmStartWord + _bmWordSize; 80 } 81 size_t limitOffset = heapWordToOffset(limit); 82 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 83 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 84 assert(nextAddr >= addr, "get_next_one postcondition"); 85 assert(nextAddr == limit || isMarked(nextAddr), 86 "get_next_one postcondition"); 87 return nextAddr; 88 } 89 90 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 91 HeapWord* limit) const { 92 size_t addrOffset = heapWordToOffset(addr); 93 if (limit == NULL) { 94 limit = _bmStartWord + _bmWordSize; 95 } 96 size_t limitOffset = heapWordToOffset(limit); 97 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 98 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 99 assert(nextAddr >= addr, "get_next_one postcondition"); 100 assert(nextAddr == limit || !isMarked(nextAddr), 101 "get_next_one postcondition"); 102 return nextAddr; 103 } 104 105 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 106 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 107 return (int) (diff >> _shifter); 108 } 109 110 #ifndef PRODUCT 111 bool CMBitMapRO::covers(ReservedSpace rs) const { 112 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 113 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 114 "size inconsistency"); 115 return _bmStartWord == (HeapWord*)(rs.base()) && 116 _bmWordSize == rs.size()>>LogHeapWordSize; 117 } 118 #endif 119 120 void CMBitMap::clearAll() { 121 _bm.clear(); 122 return; 123 } 124 125 void CMBitMap::markRange(MemRegion mr) { 126 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 127 assert(!mr.is_empty(), "unexpected empty region"); 128 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 129 ((HeapWord *) mr.end())), 130 "markRange memory region end is not card aligned"); 131 // convert address range into offset range 132 _bm.at_put_range(heapWordToOffset(mr.start()), 133 heapWordToOffset(mr.end()), true); 134 } 135 136 void CMBitMap::clearRange(MemRegion mr) { 137 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 138 assert(!mr.is_empty(), "unexpected empty region"); 139 // convert address range into offset range 140 _bm.at_put_range(heapWordToOffset(mr.start()), 141 heapWordToOffset(mr.end()), false); 142 } 143 144 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 145 HeapWord* end_addr) { 146 HeapWord* start = getNextMarkedWordAddress(addr); 147 start = MIN2(start, end_addr); 148 HeapWord* end = getNextUnmarkedWordAddress(start); 149 end = MIN2(end, end_addr); 150 assert(start <= end, "Consistency check"); 151 MemRegion mr(start, end); 152 if (!mr.is_empty()) { 153 clearRange(mr); 154 } 155 return mr; 156 } 157 158 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 159 _base(NULL), _cm(cm) 160 #ifdef ASSERT 161 , _drain_in_progress(false) 162 , _drain_in_progress_yields(false) 163 #endif 164 {} 165 166 void CMMarkStack::allocate(size_t size) { 167 _base = NEW_C_HEAP_ARRAY(oop, size, mtGC); 168 if (_base == NULL) { 169 vm_exit_during_initialization("Failed to allocate CM region mark stack"); 170 } 171 _index = 0; 172 _capacity = (jint) size; 173 _saved_index = -1; 174 NOT_PRODUCT(_max_depth = 0); 175 } 176 177 CMMarkStack::~CMMarkStack() { 178 if (_base != NULL) { 179 FREE_C_HEAP_ARRAY(oop, _base, mtGC); 180 } 181 } 182 183 void CMMarkStack::par_push(oop ptr) { 184 while (true) { 185 if (isFull()) { 186 _overflow = true; 187 return; 188 } 189 // Otherwise... 190 jint index = _index; 191 jint next_index = index+1; 192 jint res = Atomic::cmpxchg(next_index, &_index, index); 193 if (res == index) { 194 _base[index] = ptr; 195 // Note that we don't maintain this atomically. We could, but it 196 // doesn't seem necessary. 197 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 198 return; 199 } 200 // Otherwise, we need to try again. 201 } 202 } 203 204 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 205 while (true) { 206 if (isFull()) { 207 _overflow = true; 208 return; 209 } 210 // Otherwise... 211 jint index = _index; 212 jint next_index = index + n; 213 if (next_index > _capacity) { 214 _overflow = true; 215 return; 216 } 217 jint res = Atomic::cmpxchg(next_index, &_index, index); 218 if (res == index) { 219 for (int i = 0; i < n; i++) { 220 int ind = index + i; 221 assert(ind < _capacity, "By overflow test above."); 222 _base[ind] = ptr_arr[i]; 223 } 224 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 225 return; 226 } 227 // Otherwise, we need to try again. 228 } 229 } 230 231 232 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 233 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 234 jint start = _index; 235 jint next_index = start + n; 236 if (next_index > _capacity) { 237 _overflow = true; 238 return; 239 } 240 // Otherwise. 241 _index = next_index; 242 for (int i = 0; i < n; i++) { 243 int ind = start + i; 244 assert(ind < _capacity, "By overflow test above."); 245 _base[ind] = ptr_arr[i]; 246 } 247 } 248 249 250 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 251 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 252 jint index = _index; 253 if (index == 0) { 254 *n = 0; 255 return false; 256 } else { 257 int k = MIN2(max, index); 258 jint new_ind = index - k; 259 for (int j = 0; j < k; j++) { 260 ptr_arr[j] = _base[new_ind + j]; 261 } 262 _index = new_ind; 263 *n = k; 264 return true; 265 } 266 } 267 268 template<class OopClosureClass> 269 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 270 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 271 || SafepointSynchronize::is_at_safepoint(), 272 "Drain recursion must be yield-safe."); 273 bool res = true; 274 debug_only(_drain_in_progress = true); 275 debug_only(_drain_in_progress_yields = yield_after); 276 while (!isEmpty()) { 277 oop newOop = pop(); 278 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 279 assert(newOop->is_oop(), "Expected an oop"); 280 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 281 "only grey objects on this stack"); 282 newOop->oop_iterate(cl); 283 if (yield_after && _cm->do_yield_check()) { 284 res = false; 285 break; 286 } 287 } 288 debug_only(_drain_in_progress = false); 289 return res; 290 } 291 292 void CMMarkStack::note_start_of_gc() { 293 assert(_saved_index == -1, 294 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 295 _saved_index = _index; 296 } 297 298 void CMMarkStack::note_end_of_gc() { 299 // This is intentionally a guarantee, instead of an assert. If we 300 // accidentally add something to the mark stack during GC, it 301 // will be a correctness issue so it's better if we crash. we'll 302 // only check this once per GC anyway, so it won't be a performance 303 // issue in any way. 304 guarantee(_saved_index == _index, 305 err_msg("saved index: %d index: %d", _saved_index, _index)); 306 _saved_index = -1; 307 } 308 309 void CMMarkStack::oops_do(OopClosure* f) { 310 assert(_saved_index == _index, 311 err_msg("saved index: %d index: %d", _saved_index, _index)); 312 for (int i = 0; i < _index; i += 1) { 313 f->do_oop(&_base[i]); 314 } 315 } 316 317 bool ConcurrentMark::not_yet_marked(oop obj) const { 318 return _g1h->is_obj_ill(obj); 319 } 320 321 CMRootRegions::CMRootRegions() : 322 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 323 _should_abort(false), _next_survivor(NULL) { } 324 325 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 326 _young_list = g1h->young_list(); 327 _cm = cm; 328 } 329 330 void CMRootRegions::prepare_for_scan() { 331 assert(!scan_in_progress(), "pre-condition"); 332 333 // Currently, only survivors can be root regions. 334 assert(_next_survivor == NULL, "pre-condition"); 335 _next_survivor = _young_list->first_survivor_region(); 336 _scan_in_progress = (_next_survivor != NULL); 337 _should_abort = false; 338 } 339 340 HeapRegion* CMRootRegions::claim_next() { 341 if (_should_abort) { 342 // If someone has set the should_abort flag, we return NULL to 343 // force the caller to bail out of their loop. 344 return NULL; 345 } 346 347 // Currently, only survivors can be root regions. 348 HeapRegion* res = _next_survivor; 349 if (res != NULL) { 350 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 351 // Read it again in case it changed while we were waiting for the lock. 352 res = _next_survivor; 353 if (res != NULL) { 354 if (res == _young_list->last_survivor_region()) { 355 // We just claimed the last survivor so store NULL to indicate 356 // that we're done. 357 _next_survivor = NULL; 358 } else { 359 _next_survivor = res->get_next_young_region(); 360 } 361 } else { 362 // Someone else claimed the last survivor while we were trying 363 // to take the lock so nothing else to do. 364 } 365 } 366 assert(res == NULL || res->is_survivor(), "post-condition"); 367 368 return res; 369 } 370 371 void CMRootRegions::scan_finished() { 372 assert(scan_in_progress(), "pre-condition"); 373 374 // Currently, only survivors can be root regions. 375 if (!_should_abort) { 376 assert(_next_survivor == NULL, "we should have claimed all survivors"); 377 } 378 _next_survivor = NULL; 379 380 { 381 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 382 _scan_in_progress = false; 383 RootRegionScan_lock->notify_all(); 384 } 385 } 386 387 bool CMRootRegions::wait_until_scan_finished() { 388 if (!scan_in_progress()) return false; 389 390 { 391 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 392 while (scan_in_progress()) { 393 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 394 } 395 } 396 return true; 397 } 398 399 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 400 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 401 #endif // _MSC_VER 402 403 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 404 return MAX2((n_par_threads + 2) / 4, 1U); 405 } 406 407 ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) : 408 _markBitMap1(rs, MinObjAlignment - 1), 409 _markBitMap2(rs, MinObjAlignment - 1), 410 411 _parallel_marking_threads(0), 412 _max_parallel_marking_threads(0), 413 _sleep_factor(0.0), 414 _marking_task_overhead(1.0), 415 _cleanup_sleep_factor(0.0), 416 _cleanup_task_overhead(1.0), 417 _cleanup_list("Cleanup List"), 418 _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/), 419 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 420 CardTableModRefBS::card_shift, 421 false /* in_resource_area*/), 422 423 _prevMarkBitMap(&_markBitMap1), 424 _nextMarkBitMap(&_markBitMap2), 425 426 _markStack(this), 427 // _finger set in set_non_marking_state 428 429 _max_task_num(MAX2((uint)ParallelGCThreads, 1U)), 430 // _active_tasks set in set_non_marking_state 431 // _tasks set inside the constructor 432 _task_queues(new CMTaskQueueSet((int) _max_task_num)), 433 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 434 435 _has_overflown(false), 436 _concurrent(false), 437 _has_aborted(false), 438 _restart_for_overflow(false), 439 _concurrent_marking_in_progress(false), 440 441 // _verbose_level set below 442 443 _init_times(), 444 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 445 _cleanup_times(), 446 _total_counting_time(0.0), 447 _total_rs_scrub_time(0.0), 448 449 _parallel_workers(NULL), 450 451 _count_card_bitmaps(NULL), 452 _count_marked_bytes(NULL) { 453 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 454 if (verbose_level < no_verbose) { 455 verbose_level = no_verbose; 456 } 457 if (verbose_level > high_verbose) { 458 verbose_level = high_verbose; 459 } 460 _verbose_level = verbose_level; 461 462 if (verbose_low()) { 463 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 464 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 465 } 466 467 _markStack.allocate(MarkStackSize); 468 469 // Create & start a ConcurrentMark thread. 470 _cmThread = new ConcurrentMarkThread(this); 471 assert(cmThread() != NULL, "CM Thread should have been created"); 472 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 473 474 _g1h = G1CollectedHeap::heap(); 475 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 476 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 477 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 478 479 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 480 satb_qs.set_buffer_size(G1SATBBufferSize); 481 482 _root_regions.init(_g1h, this); 483 484 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num, mtGC); 485 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num, mtGC); 486 487 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_task_num, mtGC); 488 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num, mtGC); 489 490 BitMap::idx_t card_bm_size = _card_bm.size(); 491 492 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 493 _active_tasks = _max_task_num; 494 for (int i = 0; i < (int) _max_task_num; ++i) { 495 CMTaskQueue* task_queue = new CMTaskQueue(); 496 task_queue->initialize(); 497 _task_queues->register_queue(i, task_queue); 498 499 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 500 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC); 501 502 _tasks[i] = new CMTask(i, this, 503 _count_marked_bytes[i], 504 &_count_card_bitmaps[i], 505 task_queue, _task_queues); 506 507 _accum_task_vtime[i] = 0.0; 508 } 509 510 // Calculate the card number for the bottom of the heap. Used 511 // in biasing indexes into the accounting card bitmaps. 512 _heap_bottom_card_num = 513 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 514 CardTableModRefBS::card_shift); 515 516 // Clear all the liveness counting data 517 clear_all_count_data(); 518 519 if (ConcGCThreads > ParallelGCThreads) { 520 vm_exit_during_initialization("Can't have more ConcGCThreads " 521 "than ParallelGCThreads."); 522 } 523 if (ParallelGCThreads == 0) { 524 // if we are not running with any parallel GC threads we will not 525 // spawn any marking threads either 526 _parallel_marking_threads = 0; 527 _max_parallel_marking_threads = 0; 528 _sleep_factor = 0.0; 529 _marking_task_overhead = 1.0; 530 } else { 531 if (ConcGCThreads > 0) { 532 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 533 // if both are set 534 535 _parallel_marking_threads = (uint) ConcGCThreads; 536 _max_parallel_marking_threads = _parallel_marking_threads; 537 _sleep_factor = 0.0; 538 _marking_task_overhead = 1.0; 539 } else if (G1MarkingOverheadPercent > 0) { 540 // we will calculate the number of parallel marking threads 541 // based on a target overhead with respect to the soft real-time 542 // goal 543 544 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 545 double overall_cm_overhead = 546 (double) MaxGCPauseMillis * marking_overhead / 547 (double) GCPauseIntervalMillis; 548 double cpu_ratio = 1.0 / (double) os::processor_count(); 549 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 550 double marking_task_overhead = 551 overall_cm_overhead / marking_thread_num * 552 (double) os::processor_count(); 553 double sleep_factor = 554 (1.0 - marking_task_overhead) / marking_task_overhead; 555 556 _parallel_marking_threads = (uint) marking_thread_num; 557 _max_parallel_marking_threads = _parallel_marking_threads; 558 _sleep_factor = sleep_factor; 559 _marking_task_overhead = marking_task_overhead; 560 } else { 561 _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads); 562 _max_parallel_marking_threads = _parallel_marking_threads; 563 _sleep_factor = 0.0; 564 _marking_task_overhead = 1.0; 565 } 566 567 if (parallel_marking_threads() > 1) { 568 _cleanup_task_overhead = 1.0; 569 } else { 570 _cleanup_task_overhead = marking_task_overhead(); 571 } 572 _cleanup_sleep_factor = 573 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 574 575 #if 0 576 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 577 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 578 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 579 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 580 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 581 #endif 582 583 guarantee(parallel_marking_threads() > 0, "peace of mind"); 584 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 585 _max_parallel_marking_threads, false, true); 586 if (_parallel_workers == NULL) { 587 vm_exit_during_initialization("Failed necessary allocation."); 588 } else { 589 _parallel_workers->initialize_workers(); 590 } 591 } 592 593 // so that the call below can read a sensible value 594 _heap_start = (HeapWord*) rs.base(); 595 set_non_marking_state(); 596 } 597 598 void ConcurrentMark::update_g1_committed(bool force) { 599 // If concurrent marking is not in progress, then we do not need to 600 // update _heap_end. 601 if (!concurrent_marking_in_progress() && !force) return; 602 603 MemRegion committed = _g1h->g1_committed(); 604 assert(committed.start() == _heap_start, "start shouldn't change"); 605 HeapWord* new_end = committed.end(); 606 if (new_end > _heap_end) { 607 // The heap has been expanded. 608 609 _heap_end = new_end; 610 } 611 // Notice that the heap can also shrink. However, this only happens 612 // during a Full GC (at least currently) and the entire marking 613 // phase will bail out and the task will not be restarted. So, let's 614 // do nothing. 615 } 616 617 void ConcurrentMark::reset() { 618 // Starting values for these two. This should be called in a STW 619 // phase. CM will be notified of any future g1_committed expansions 620 // will be at the end of evacuation pauses, when tasks are 621 // inactive. 622 MemRegion committed = _g1h->g1_committed(); 623 _heap_start = committed.start(); 624 _heap_end = committed.end(); 625 626 // Separated the asserts so that we know which one fires. 627 assert(_heap_start != NULL, "heap bounds should look ok"); 628 assert(_heap_end != NULL, "heap bounds should look ok"); 629 assert(_heap_start < _heap_end, "heap bounds should look ok"); 630 631 // reset all the marking data structures and any necessary flags 632 clear_marking_state(); 633 634 if (verbose_low()) { 635 gclog_or_tty->print_cr("[global] resetting"); 636 } 637 638 // We do reset all of them, since different phases will use 639 // different number of active threads. So, it's easiest to have all 640 // of them ready. 641 for (int i = 0; i < (int) _max_task_num; ++i) { 642 _tasks[i]->reset(_nextMarkBitMap); 643 } 644 645 // we need this to make sure that the flag is on during the evac 646 // pause with initial mark piggy-backed 647 set_concurrent_marking_in_progress(); 648 } 649 650 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) { 651 assert(active_tasks <= _max_task_num, "we should not have more"); 652 653 _active_tasks = active_tasks; 654 // Need to update the three data structures below according to the 655 // number of active threads for this phase. 656 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 657 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 658 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 659 660 _concurrent = concurrent; 661 // We propagate this to all tasks, not just the active ones. 662 for (int i = 0; i < (int) _max_task_num; ++i) 663 _tasks[i]->set_concurrent(concurrent); 664 665 if (concurrent) { 666 set_concurrent_marking_in_progress(); 667 } else { 668 // We currently assume that the concurrent flag has been set to 669 // false before we start remark. At this point we should also be 670 // in a STW phase. 671 assert(!concurrent_marking_in_progress(), "invariant"); 672 assert(_finger == _heap_end, "only way to get here"); 673 update_g1_committed(true); 674 } 675 } 676 677 void ConcurrentMark::set_non_marking_state() { 678 // We set the global marking state to some default values when we're 679 // not doing marking. 680 clear_marking_state(); 681 _active_tasks = 0; 682 clear_concurrent_marking_in_progress(); 683 } 684 685 ConcurrentMark::~ConcurrentMark() { 686 // The ConcurrentMark instance is never freed. 687 ShouldNotReachHere(); 688 } 689 690 void ConcurrentMark::clearNextBitmap() { 691 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 692 G1CollectorPolicy* g1p = g1h->g1_policy(); 693 694 // Make sure that the concurrent mark thread looks to still be in 695 // the current cycle. 696 guarantee(cmThread()->during_cycle(), "invariant"); 697 698 // We are finishing up the current cycle by clearing the next 699 // marking bitmap and getting it ready for the next cycle. During 700 // this time no other cycle can start. So, let's make sure that this 701 // is the case. 702 guarantee(!g1h->mark_in_progress(), "invariant"); 703 704 // clear the mark bitmap (no grey objects to start with). 705 // We need to do this in chunks and offer to yield in between 706 // each chunk. 707 HeapWord* start = _nextMarkBitMap->startWord(); 708 HeapWord* end = _nextMarkBitMap->endWord(); 709 HeapWord* cur = start; 710 size_t chunkSize = M; 711 while (cur < end) { 712 HeapWord* next = cur + chunkSize; 713 if (next > end) { 714 next = end; 715 } 716 MemRegion mr(cur,next); 717 _nextMarkBitMap->clearRange(mr); 718 cur = next; 719 do_yield_check(); 720 721 // Repeat the asserts from above. We'll do them as asserts here to 722 // minimize their overhead on the product. However, we'll have 723 // them as guarantees at the beginning / end of the bitmap 724 // clearing to get some checking in the product. 725 assert(cmThread()->during_cycle(), "invariant"); 726 assert(!g1h->mark_in_progress(), "invariant"); 727 } 728 729 // Clear the liveness counting data 730 clear_all_count_data(); 731 732 // Repeat the asserts from above. 733 guarantee(cmThread()->during_cycle(), "invariant"); 734 guarantee(!g1h->mark_in_progress(), "invariant"); 735 } 736 737 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 738 public: 739 bool doHeapRegion(HeapRegion* r) { 740 if (!r->continuesHumongous()) { 741 r->note_start_of_marking(); 742 } 743 return false; 744 } 745 }; 746 747 void ConcurrentMark::checkpointRootsInitialPre() { 748 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 749 G1CollectorPolicy* g1p = g1h->g1_policy(); 750 751 _has_aborted = false; 752 753 #ifndef PRODUCT 754 if (G1PrintReachableAtInitialMark) { 755 print_reachable("at-cycle-start", 756 VerifyOption_G1UsePrevMarking, true /* all */); 757 } 758 #endif 759 760 // Initialise marking structures. This has to be done in a STW phase. 761 reset(); 762 763 // For each region note start of marking. 764 NoteStartOfMarkHRClosure startcl; 765 g1h->heap_region_iterate(&startcl); 766 } 767 768 769 void ConcurrentMark::checkpointRootsInitialPost() { 770 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 771 772 // If we force an overflow during remark, the remark operation will 773 // actually abort and we'll restart concurrent marking. If we always 774 // force an oveflow during remark we'll never actually complete the 775 // marking phase. So, we initilize this here, at the start of the 776 // cycle, so that at the remaining overflow number will decrease at 777 // every remark and we'll eventually not need to cause one. 778 force_overflow_stw()->init(); 779 780 // Start Concurrent Marking weak-reference discovery. 781 ReferenceProcessor* rp = g1h->ref_processor_cm(); 782 // enable ("weak") refs discovery 783 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 784 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 785 786 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 787 // This is the start of the marking cycle, we're expected all 788 // threads to have SATB queues with active set to false. 789 satb_mq_set.set_active_all_threads(true, /* new active value */ 790 false /* expected_active */); 791 792 _root_regions.prepare_for_scan(); 793 794 // update_g1_committed() will be called at the end of an evac pause 795 // when marking is on. So, it's also called at the end of the 796 // initial-mark pause to update the heap end, if the heap expands 797 // during it. No need to call it here. 798 } 799 800 /* 801 * Notice that in the next two methods, we actually leave the STS 802 * during the barrier sync and join it immediately afterwards. If we 803 * do not do this, the following deadlock can occur: one thread could 804 * be in the barrier sync code, waiting for the other thread to also 805 * sync up, whereas another one could be trying to yield, while also 806 * waiting for the other threads to sync up too. 807 * 808 * Note, however, that this code is also used during remark and in 809 * this case we should not attempt to leave / enter the STS, otherwise 810 * we'll either hit an asseert (debug / fastdebug) or deadlock 811 * (product). So we should only leave / enter the STS if we are 812 * operating concurrently. 813 * 814 * Because the thread that does the sync barrier has left the STS, it 815 * is possible to be suspended for a Full GC or an evacuation pause 816 * could occur. This is actually safe, since the entering the sync 817 * barrier is one of the last things do_marking_step() does, and it 818 * doesn't manipulate any data structures afterwards. 819 */ 820 821 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 822 if (verbose_low()) { 823 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 824 } 825 826 if (concurrent()) { 827 ConcurrentGCThread::stsLeave(); 828 } 829 _first_overflow_barrier_sync.enter(); 830 if (concurrent()) { 831 ConcurrentGCThread::stsJoin(); 832 } 833 // at this point everyone should have synced up and not be doing any 834 // more work 835 836 if (verbose_low()) { 837 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 838 } 839 840 // let task 0 do this 841 if (task_num == 0) { 842 // task 0 is responsible for clearing the global data structures 843 // We should be here because of an overflow. During STW we should 844 // not clear the overflow flag since we rely on it being true when 845 // we exit this method to abort the pause and restart concurent 846 // marking. 847 clear_marking_state(concurrent() /* clear_overflow */); 848 force_overflow()->update(); 849 850 if (G1Log::fine()) { 851 gclog_or_tty->date_stamp(PrintGCDateStamps); 852 gclog_or_tty->stamp(PrintGCTimeStamps); 853 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 854 } 855 } 856 857 // after this, each task should reset its own data structures then 858 // then go into the second barrier 859 } 860 861 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 862 if (verbose_low()) { 863 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 864 } 865 866 if (concurrent()) { 867 ConcurrentGCThread::stsLeave(); 868 } 869 _second_overflow_barrier_sync.enter(); 870 if (concurrent()) { 871 ConcurrentGCThread::stsJoin(); 872 } 873 // at this point everything should be re-initialised and ready to go 874 875 if (verbose_low()) { 876 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 877 } 878 } 879 880 #ifndef PRODUCT 881 void ForceOverflowSettings::init() { 882 _num_remaining = G1ConcMarkForceOverflow; 883 _force = false; 884 update(); 885 } 886 887 void ForceOverflowSettings::update() { 888 if (_num_remaining > 0) { 889 _num_remaining -= 1; 890 _force = true; 891 } else { 892 _force = false; 893 } 894 } 895 896 bool ForceOverflowSettings::should_force() { 897 if (_force) { 898 _force = false; 899 return true; 900 } else { 901 return false; 902 } 903 } 904 #endif // !PRODUCT 905 906 class CMConcurrentMarkingTask: public AbstractGangTask { 907 private: 908 ConcurrentMark* _cm; 909 ConcurrentMarkThread* _cmt; 910 911 public: 912 void work(uint worker_id) { 913 assert(Thread::current()->is_ConcurrentGC_thread(), 914 "this should only be done by a conc GC thread"); 915 ResourceMark rm; 916 917 double start_vtime = os::elapsedVTime(); 918 919 ConcurrentGCThread::stsJoin(); 920 921 assert(worker_id < _cm->active_tasks(), "invariant"); 922 CMTask* the_task = _cm->task(worker_id); 923 the_task->record_start_time(); 924 if (!_cm->has_aborted()) { 925 do { 926 double start_vtime_sec = os::elapsedVTime(); 927 double start_time_sec = os::elapsedTime(); 928 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 929 930 the_task->do_marking_step(mark_step_duration_ms, 931 true /* do_stealing */, 932 true /* do_termination */); 933 934 double end_time_sec = os::elapsedTime(); 935 double end_vtime_sec = os::elapsedVTime(); 936 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 937 double elapsed_time_sec = end_time_sec - start_time_sec; 938 _cm->clear_has_overflown(); 939 940 bool ret = _cm->do_yield_check(worker_id); 941 942 jlong sleep_time_ms; 943 if (!_cm->has_aborted() && the_task->has_aborted()) { 944 sleep_time_ms = 945 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 946 ConcurrentGCThread::stsLeave(); 947 os::sleep(Thread::current(), sleep_time_ms, false); 948 ConcurrentGCThread::stsJoin(); 949 } 950 double end_time2_sec = os::elapsedTime(); 951 double elapsed_time2_sec = end_time2_sec - start_time_sec; 952 953 #if 0 954 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 955 "overhead %1.4lf", 956 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 957 the_task->conc_overhead(os::elapsedTime()) * 8.0); 958 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 959 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 960 #endif 961 } while (!_cm->has_aborted() && the_task->has_aborted()); 962 } 963 the_task->record_end_time(); 964 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 965 966 ConcurrentGCThread::stsLeave(); 967 968 double end_vtime = os::elapsedVTime(); 969 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 970 } 971 972 CMConcurrentMarkingTask(ConcurrentMark* cm, 973 ConcurrentMarkThread* cmt) : 974 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 975 976 ~CMConcurrentMarkingTask() { } 977 }; 978 979 // Calculates the number of active workers for a concurrent 980 // phase. 981 uint ConcurrentMark::calc_parallel_marking_threads() { 982 if (G1CollectedHeap::use_parallel_gc_threads()) { 983 uint n_conc_workers = 0; 984 if (!UseDynamicNumberOfGCThreads || 985 (!FLAG_IS_DEFAULT(ConcGCThreads) && 986 !ForceDynamicNumberOfGCThreads)) { 987 n_conc_workers = max_parallel_marking_threads(); 988 } else { 989 n_conc_workers = 990 AdaptiveSizePolicy::calc_default_active_workers( 991 max_parallel_marking_threads(), 992 1, /* Minimum workers */ 993 parallel_marking_threads(), 994 Threads::number_of_non_daemon_threads()); 995 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 996 // that scaling has already gone into "_max_parallel_marking_threads". 997 } 998 assert(n_conc_workers > 0, "Always need at least 1"); 999 return n_conc_workers; 1000 } 1001 // If we are not running with any parallel GC threads we will not 1002 // have spawned any marking threads either. Hence the number of 1003 // concurrent workers should be 0. 1004 return 0; 1005 } 1006 1007 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1008 // Currently, only survivors can be root regions. 1009 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1010 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1011 1012 const uintx interval = PrefetchScanIntervalInBytes; 1013 HeapWord* curr = hr->bottom(); 1014 const HeapWord* end = hr->top(); 1015 while (curr < end) { 1016 Prefetch::read(curr, interval); 1017 oop obj = oop(curr); 1018 int size = obj->oop_iterate(&cl); 1019 assert(size == obj->size(), "sanity"); 1020 curr += size; 1021 } 1022 } 1023 1024 class CMRootRegionScanTask : public AbstractGangTask { 1025 private: 1026 ConcurrentMark* _cm; 1027 1028 public: 1029 CMRootRegionScanTask(ConcurrentMark* cm) : 1030 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1031 1032 void work(uint worker_id) { 1033 assert(Thread::current()->is_ConcurrentGC_thread(), 1034 "this should only be done by a conc GC thread"); 1035 1036 CMRootRegions* root_regions = _cm->root_regions(); 1037 HeapRegion* hr = root_regions->claim_next(); 1038 while (hr != NULL) { 1039 _cm->scanRootRegion(hr, worker_id); 1040 hr = root_regions->claim_next(); 1041 } 1042 } 1043 }; 1044 1045 void ConcurrentMark::scanRootRegions() { 1046 // scan_in_progress() will have been set to true only if there was 1047 // at least one root region to scan. So, if it's false, we 1048 // should not attempt to do any further work. 1049 if (root_regions()->scan_in_progress()) { 1050 _parallel_marking_threads = calc_parallel_marking_threads(); 1051 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1052 "Maximum number of marking threads exceeded"); 1053 uint active_workers = MAX2(1U, parallel_marking_threads()); 1054 1055 CMRootRegionScanTask task(this); 1056 if (parallel_marking_threads() > 0) { 1057 _parallel_workers->set_active_workers((int) active_workers); 1058 _parallel_workers->run_task(&task); 1059 } else { 1060 task.work(0); 1061 } 1062 1063 // It's possible that has_aborted() is true here without actually 1064 // aborting the survivor scan earlier. This is OK as it's 1065 // mainly used for sanity checking. 1066 root_regions()->scan_finished(); 1067 } 1068 } 1069 1070 void ConcurrentMark::markFromRoots() { 1071 // we might be tempted to assert that: 1072 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1073 // "inconsistent argument?"); 1074 // However that wouldn't be right, because it's possible that 1075 // a safepoint is indeed in progress as a younger generation 1076 // stop-the-world GC happens even as we mark in this generation. 1077 1078 _restart_for_overflow = false; 1079 force_overflow_conc()->init(); 1080 1081 // _g1h has _n_par_threads 1082 _parallel_marking_threads = calc_parallel_marking_threads(); 1083 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1084 "Maximum number of marking threads exceeded"); 1085 1086 uint active_workers = MAX2(1U, parallel_marking_threads()); 1087 1088 // Parallel task terminator is set in "set_phase()" 1089 set_phase(active_workers, true /* concurrent */); 1090 1091 CMConcurrentMarkingTask markingTask(this, cmThread()); 1092 if (parallel_marking_threads() > 0) { 1093 _parallel_workers->set_active_workers((int)active_workers); 1094 // Don't set _n_par_threads because it affects MT in proceess_strong_roots() 1095 // and the decisions on that MT processing is made elsewhere. 1096 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1097 _parallel_workers->run_task(&markingTask); 1098 } else { 1099 markingTask.work(0); 1100 } 1101 print_stats(); 1102 } 1103 1104 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1105 // world is stopped at this checkpoint 1106 assert(SafepointSynchronize::is_at_safepoint(), 1107 "world should be stopped"); 1108 1109 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1110 1111 // If a full collection has happened, we shouldn't do this. 1112 if (has_aborted()) { 1113 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1114 return; 1115 } 1116 1117 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1118 1119 if (VerifyDuringGC) { 1120 HandleMark hm; // handle scope 1121 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1122 Universe::heap()->prepare_for_verify(); 1123 Universe::verify(/* silent */ false, 1124 /* option */ VerifyOption_G1UsePrevMarking); 1125 } 1126 1127 G1CollectorPolicy* g1p = g1h->g1_policy(); 1128 g1p->record_concurrent_mark_remark_start(); 1129 1130 double start = os::elapsedTime(); 1131 1132 checkpointRootsFinalWork(); 1133 1134 double mark_work_end = os::elapsedTime(); 1135 1136 weakRefsWork(clear_all_soft_refs); 1137 1138 if (has_overflown()) { 1139 // Oops. We overflowed. Restart concurrent marking. 1140 _restart_for_overflow = true; 1141 // Clear the flag. We do not need it any more. 1142 clear_has_overflown(); 1143 if (G1TraceMarkStackOverflow) { 1144 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1145 } 1146 } else { 1147 // Aggregate the per-task counting data that we have accumulated 1148 // while marking. 1149 aggregate_count_data(); 1150 1151 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1152 // We're done with marking. 1153 // This is the end of the marking cycle, we're expected all 1154 // threads to have SATB queues with active set to true. 1155 satb_mq_set.set_active_all_threads(false, /* new active value */ 1156 true /* expected_active */); 1157 1158 if (VerifyDuringGC) { 1159 HandleMark hm; // handle scope 1160 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1161 Universe::heap()->prepare_for_verify(); 1162 Universe::verify(/* silent */ false, 1163 /* option */ VerifyOption_G1UseNextMarking); 1164 } 1165 assert(!restart_for_overflow(), "sanity"); 1166 } 1167 1168 // Reset the marking state if marking completed 1169 if (!restart_for_overflow()) { 1170 set_non_marking_state(); 1171 } 1172 1173 #if VERIFY_OBJS_PROCESSED 1174 _scan_obj_cl.objs_processed = 0; 1175 ThreadLocalObjQueue::objs_enqueued = 0; 1176 #endif 1177 1178 // Statistics 1179 double now = os::elapsedTime(); 1180 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1181 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1182 _remark_times.add((now - start) * 1000.0); 1183 1184 g1p->record_concurrent_mark_remark_end(); 1185 } 1186 1187 // Base class of the closures that finalize and verify the 1188 // liveness counting data. 1189 class CMCountDataClosureBase: public HeapRegionClosure { 1190 protected: 1191 ConcurrentMark* _cm; 1192 BitMap* _region_bm; 1193 BitMap* _card_bm; 1194 1195 void set_card_bitmap_range(BitMap::idx_t start_idx, BitMap::idx_t last_idx) { 1196 assert(start_idx <= last_idx, "sanity"); 1197 1198 // Set the inclusive bit range [start_idx, last_idx]. 1199 // For small ranges (up to 8 cards) use a simple loop; otherwise 1200 // use par_at_put_range. 1201 if ((last_idx - start_idx) < 8) { 1202 for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) { 1203 _card_bm->par_set_bit(i); 1204 } 1205 } else { 1206 assert(last_idx < _card_bm->size(), "sanity"); 1207 // Note BitMap::par_at_put_range() is exclusive. 1208 BitMap::idx_t max_idx = MAX2(last_idx+1, _card_bm->size()); 1209 _card_bm->par_at_put_range(start_idx, max_idx, true); 1210 } 1211 } 1212 1213 // It takes a region that's not empty (i.e., it has at least one 1214 // live object in it and sets its corresponding bit on the region 1215 // bitmap to 1. If the region is "starts humongous" it will also set 1216 // to 1 the bits on the region bitmap that correspond to its 1217 // associated "continues humongous" regions. 1218 void set_bit_for_region(HeapRegion* hr) { 1219 assert(!hr->continuesHumongous(), "should have filtered those out"); 1220 1221 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1222 if (!hr->startsHumongous()) { 1223 // Normal (non-humongous) case: just set the bit. 1224 _region_bm->par_at_put(index, true); 1225 } else { 1226 // Starts humongous case: calculate how many regions are part of 1227 // this humongous region and then set the bit range. 1228 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1229 _region_bm->par_at_put_range(index, end_index, true); 1230 } 1231 } 1232 1233 public: 1234 CMCountDataClosureBase(ConcurrentMark *cm, 1235 BitMap* region_bm, BitMap* card_bm): 1236 _cm(cm), _region_bm(region_bm), _card_bm(card_bm) { } 1237 }; 1238 1239 // Closure that calculates the # live objects per region. Used 1240 // for verification purposes during the cleanup pause. 1241 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1242 CMBitMapRO* _bm; 1243 size_t _region_marked_bytes; 1244 1245 public: 1246 CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm, 1247 BitMap* region_bm, BitMap* card_bm) : 1248 CMCountDataClosureBase(cm, region_bm, card_bm), 1249 _bm(bm), _region_marked_bytes(0) { } 1250 1251 bool doHeapRegion(HeapRegion* hr) { 1252 1253 if (hr->continuesHumongous()) { 1254 // We will ignore these here and process them when their 1255 // associated "starts humongous" region is processed (see 1256 // set_bit_for_heap_region()). Note that we cannot rely on their 1257 // associated "starts humongous" region to have their bit set to 1258 // 1 since, due to the region chunking in the parallel region 1259 // iteration, a "continues humongous" region might be visited 1260 // before its associated "starts humongous". 1261 return false; 1262 } 1263 1264 HeapWord* nextTop = hr->next_top_at_mark_start(); 1265 HeapWord* start = hr->bottom(); 1266 1267 assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(), 1268 err_msg("Preconditions not met - " 1269 "start: "PTR_FORMAT", nextTop: "PTR_FORMAT", end: "PTR_FORMAT, 1270 start, nextTop, hr->end())); 1271 1272 // Find the first marked object at or after "start". 1273 start = _bm->getNextMarkedWordAddress(start, nextTop); 1274 1275 size_t marked_bytes = 0; 1276 1277 while (start < nextTop) { 1278 oop obj = oop(start); 1279 int obj_sz = obj->size(); 1280 HeapWord* obj_last = start + obj_sz - 1; 1281 1282 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1283 BitMap::idx_t last_idx = _cm->card_bitmap_index_for(obj_last); 1284 1285 // Set the bits in the card BM for this object (inclusive). 1286 set_card_bitmap_range(start_idx, last_idx); 1287 1288 // Add the size of this object to the number of marked bytes. 1289 marked_bytes += (size_t)obj_sz * HeapWordSize; 1290 1291 // Find the next marked object after this one. 1292 start = _bm->getNextMarkedWordAddress(obj_last + 1, nextTop); 1293 } 1294 1295 // Mark the allocated-since-marking portion... 1296 HeapWord* top = hr->top(); 1297 if (nextTop < top) { 1298 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(nextTop); 1299 BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top - 1); 1300 1301 set_card_bitmap_range(start_idx, last_idx); 1302 1303 // This definitely means the region has live objects. 1304 set_bit_for_region(hr); 1305 } 1306 1307 // Update the live region bitmap. 1308 if (marked_bytes > 0) { 1309 set_bit_for_region(hr); 1310 } 1311 1312 // Set the marked bytes for the current region so that 1313 // it can be queried by a calling verificiation routine 1314 _region_marked_bytes = marked_bytes; 1315 1316 return false; 1317 } 1318 1319 size_t region_marked_bytes() const { return _region_marked_bytes; } 1320 }; 1321 1322 // Heap region closure used for verifying the counting data 1323 // that was accumulated concurrently and aggregated during 1324 // the remark pause. This closure is applied to the heap 1325 // regions during the STW cleanup pause. 1326 1327 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1328 ConcurrentMark* _cm; 1329 CalcLiveObjectsClosure _calc_cl; 1330 BitMap* _region_bm; // Region BM to be verified 1331 BitMap* _card_bm; // Card BM to be verified 1332 bool _verbose; // verbose output? 1333 1334 BitMap* _exp_region_bm; // Expected Region BM values 1335 BitMap* _exp_card_bm; // Expected card BM values 1336 1337 int _failures; 1338 1339 public: 1340 VerifyLiveObjectDataHRClosure(ConcurrentMark* cm, 1341 BitMap* region_bm, 1342 BitMap* card_bm, 1343 BitMap* exp_region_bm, 1344 BitMap* exp_card_bm, 1345 bool verbose) : 1346 _cm(cm), 1347 _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm), 1348 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1349 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1350 _failures(0) { } 1351 1352 int failures() const { return _failures; } 1353 1354 bool doHeapRegion(HeapRegion* hr) { 1355 if (hr->continuesHumongous()) { 1356 // We will ignore these here and process them when their 1357 // associated "starts humongous" region is processed (see 1358 // set_bit_for_heap_region()). Note that we cannot rely on their 1359 // associated "starts humongous" region to have their bit set to 1360 // 1 since, due to the region chunking in the parallel region 1361 // iteration, a "continues humongous" region might be visited 1362 // before its associated "starts humongous". 1363 return false; 1364 } 1365 1366 int failures = 0; 1367 1368 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1369 // this region and set the corresponding bits in the expected region 1370 // and card bitmaps. 1371 bool res = _calc_cl.doHeapRegion(hr); 1372 assert(res == false, "should be continuing"); 1373 1374 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1375 Mutex::_no_safepoint_check_flag); 1376 1377 // Verify the marked bytes for this region. 1378 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1379 size_t act_marked_bytes = hr->next_marked_bytes(); 1380 1381 // We're not OK if expected marked bytes > actual marked bytes. It means 1382 // we have missed accounting some objects during the actual marking. 1383 if (exp_marked_bytes > act_marked_bytes) { 1384 if (_verbose) { 1385 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1386 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1387 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1388 } 1389 failures += 1; 1390 } 1391 1392 // Verify the bit, for this region, in the actual and expected 1393 // (which was just calculated) region bit maps. 1394 // We're not OK if the bit in the calculated expected region 1395 // bitmap is set and the bit in the actual region bitmap is not. 1396 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1397 1398 bool expected = _exp_region_bm->at(index); 1399 bool actual = _region_bm->at(index); 1400 if (expected && !actual) { 1401 if (_verbose) { 1402 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1403 "expected: %s, actual: %s", 1404 hr->hrs_index(), 1405 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1406 } 1407 failures += 1; 1408 } 1409 1410 // Verify that the card bit maps for the cards spanned by the current 1411 // region match. We have an error if we have a set bit in the expected 1412 // bit map and the corresponding bit in the actual bitmap is not set. 1413 1414 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1415 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1416 1417 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1418 expected = _exp_card_bm->at(i); 1419 actual = _card_bm->at(i); 1420 1421 if (expected && !actual) { 1422 if (_verbose) { 1423 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1424 "expected: %s, actual: %s", 1425 hr->hrs_index(), i, 1426 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1427 } 1428 failures += 1; 1429 } 1430 } 1431 1432 if (failures > 0 && _verbose) { 1433 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1434 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1435 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(), 1436 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1437 } 1438 1439 _failures += failures; 1440 1441 // We could stop iteration over the heap when we 1442 // find the first violating region by returning true. 1443 return false; 1444 } 1445 }; 1446 1447 1448 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1449 protected: 1450 G1CollectedHeap* _g1h; 1451 ConcurrentMark* _cm; 1452 BitMap* _actual_region_bm; 1453 BitMap* _actual_card_bm; 1454 1455 uint _n_workers; 1456 1457 BitMap* _expected_region_bm; 1458 BitMap* _expected_card_bm; 1459 1460 int _failures; 1461 bool _verbose; 1462 1463 public: 1464 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1465 BitMap* region_bm, BitMap* card_bm, 1466 BitMap* expected_region_bm, BitMap* expected_card_bm) 1467 : AbstractGangTask("G1 verify final counting"), 1468 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1469 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1470 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1471 _failures(0), _verbose(false), 1472 _n_workers(0) { 1473 assert(VerifyDuringGC, "don't call this otherwise"); 1474 1475 // Use the value already set as the number of active threads 1476 // in the call to run_task(). 1477 if (G1CollectedHeap::use_parallel_gc_threads()) { 1478 assert( _g1h->workers()->active_workers() > 0, 1479 "Should have been previously set"); 1480 _n_workers = _g1h->workers()->active_workers(); 1481 } else { 1482 _n_workers = 1; 1483 } 1484 1485 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1486 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1487 1488 _verbose = _cm->verbose_medium(); 1489 } 1490 1491 void work(uint worker_id) { 1492 assert(worker_id < _n_workers, "invariant"); 1493 1494 VerifyLiveObjectDataHRClosure verify_cl(_cm, 1495 _actual_region_bm, _actual_card_bm, 1496 _expected_region_bm, 1497 _expected_card_bm, 1498 _verbose); 1499 1500 if (G1CollectedHeap::use_parallel_gc_threads()) { 1501 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1502 worker_id, 1503 _n_workers, 1504 HeapRegion::VerifyCountClaimValue); 1505 } else { 1506 _g1h->heap_region_iterate(&verify_cl); 1507 } 1508 1509 Atomic::add(verify_cl.failures(), &_failures); 1510 } 1511 1512 int failures() const { return _failures; } 1513 }; 1514 1515 // Closure that finalizes the liveness counting data. 1516 // Used during the cleanup pause. 1517 // Sets the bits corresponding to the interval [NTAMS, top] 1518 // (which contains the implicitly live objects) in the 1519 // card liveness bitmap. Also sets the bit for each region, 1520 // containing live data, in the region liveness bitmap. 1521 1522 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1523 public: 1524 FinalCountDataUpdateClosure(ConcurrentMark* cm, 1525 BitMap* region_bm, 1526 BitMap* card_bm) : 1527 CMCountDataClosureBase(cm, region_bm, card_bm) { } 1528 1529 bool doHeapRegion(HeapRegion* hr) { 1530 1531 if (hr->continuesHumongous()) { 1532 // We will ignore these here and process them when their 1533 // associated "starts humongous" region is processed (see 1534 // set_bit_for_heap_region()). Note that we cannot rely on their 1535 // associated "starts humongous" region to have their bit set to 1536 // 1 since, due to the region chunking in the parallel region 1537 // iteration, a "continues humongous" region might be visited 1538 // before its associated "starts humongous". 1539 return false; 1540 } 1541 1542 HeapWord* ntams = hr->next_top_at_mark_start(); 1543 HeapWord* top = hr->top(); 1544 1545 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1546 1547 // Mark the allocated-since-marking portion... 1548 if (ntams < top) { 1549 // This definitely means the region has live objects. 1550 set_bit_for_region(hr); 1551 } 1552 1553 // Now set the bits for [ntams, top] 1554 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1555 // set_card_bitmap_range() expects the last_idx to be with 1556 // the range of the bit map (see assertion in set_card_bitmap_range()), 1557 // so limit it to that range with this application of MIN2. 1558 BitMap::idx_t last_idx = MIN2(_cm->card_bitmap_index_for(top), 1559 _card_bm->size()-1); 1560 if (start_idx < _card_bm->size()) { 1561 set_card_bitmap_range(start_idx, last_idx); 1562 } else { 1563 // To reach here start_idx must be beyond the end of 1564 // the bit map and last_idx must have been limited by 1565 // the MIN2(). 1566 assert(start_idx == last_idx + 1, 1567 err_msg("Not beyond end start_idx " SIZE_FORMAT " last_idx " 1568 SIZE_FORMAT, start_idx, last_idx)); 1569 } 1570 1571 // Set the bit for the region if it contains live data 1572 if (hr->next_marked_bytes() > 0) { 1573 set_bit_for_region(hr); 1574 } 1575 1576 return false; 1577 } 1578 }; 1579 1580 class G1ParFinalCountTask: public AbstractGangTask { 1581 protected: 1582 G1CollectedHeap* _g1h; 1583 ConcurrentMark* _cm; 1584 BitMap* _actual_region_bm; 1585 BitMap* _actual_card_bm; 1586 1587 uint _n_workers; 1588 1589 public: 1590 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1591 : AbstractGangTask("G1 final counting"), 1592 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1593 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1594 _n_workers(0) { 1595 // Use the value already set as the number of active threads 1596 // in the call to run_task(). 1597 if (G1CollectedHeap::use_parallel_gc_threads()) { 1598 assert( _g1h->workers()->active_workers() > 0, 1599 "Should have been previously set"); 1600 _n_workers = _g1h->workers()->active_workers(); 1601 } else { 1602 _n_workers = 1; 1603 } 1604 } 1605 1606 void work(uint worker_id) { 1607 assert(worker_id < _n_workers, "invariant"); 1608 1609 FinalCountDataUpdateClosure final_update_cl(_cm, 1610 _actual_region_bm, 1611 _actual_card_bm); 1612 1613 if (G1CollectedHeap::use_parallel_gc_threads()) { 1614 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1615 worker_id, 1616 _n_workers, 1617 HeapRegion::FinalCountClaimValue); 1618 } else { 1619 _g1h->heap_region_iterate(&final_update_cl); 1620 } 1621 } 1622 }; 1623 1624 class G1ParNoteEndTask; 1625 1626 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1627 G1CollectedHeap* _g1; 1628 int _worker_num; 1629 size_t _max_live_bytes; 1630 uint _regions_claimed; 1631 size_t _freed_bytes; 1632 FreeRegionList* _local_cleanup_list; 1633 OldRegionSet* _old_proxy_set; 1634 HumongousRegionSet* _humongous_proxy_set; 1635 HRRSCleanupTask* _hrrs_cleanup_task; 1636 double _claimed_region_time; 1637 double _max_region_time; 1638 1639 public: 1640 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1641 int worker_num, 1642 FreeRegionList* local_cleanup_list, 1643 OldRegionSet* old_proxy_set, 1644 HumongousRegionSet* humongous_proxy_set, 1645 HRRSCleanupTask* hrrs_cleanup_task) : 1646 _g1(g1), _worker_num(worker_num), 1647 _max_live_bytes(0), _regions_claimed(0), 1648 _freed_bytes(0), 1649 _claimed_region_time(0.0), _max_region_time(0.0), 1650 _local_cleanup_list(local_cleanup_list), 1651 _old_proxy_set(old_proxy_set), 1652 _humongous_proxy_set(humongous_proxy_set), 1653 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1654 1655 size_t freed_bytes() { return _freed_bytes; } 1656 1657 bool doHeapRegion(HeapRegion *hr) { 1658 if (hr->continuesHumongous()) { 1659 return false; 1660 } 1661 // We use a claim value of zero here because all regions 1662 // were claimed with value 1 in the FinalCount task. 1663 _g1->reset_gc_time_stamps(hr); 1664 double start = os::elapsedTime(); 1665 _regions_claimed++; 1666 hr->note_end_of_marking(); 1667 _max_live_bytes += hr->max_live_bytes(); 1668 _g1->free_region_if_empty(hr, 1669 &_freed_bytes, 1670 _local_cleanup_list, 1671 _old_proxy_set, 1672 _humongous_proxy_set, 1673 _hrrs_cleanup_task, 1674 true /* par */); 1675 double region_time = (os::elapsedTime() - start); 1676 _claimed_region_time += region_time; 1677 if (region_time > _max_region_time) { 1678 _max_region_time = region_time; 1679 } 1680 return false; 1681 } 1682 1683 size_t max_live_bytes() { return _max_live_bytes; } 1684 uint regions_claimed() { return _regions_claimed; } 1685 double claimed_region_time_sec() { return _claimed_region_time; } 1686 double max_region_time_sec() { return _max_region_time; } 1687 }; 1688 1689 class G1ParNoteEndTask: public AbstractGangTask { 1690 friend class G1NoteEndOfConcMarkClosure; 1691 1692 protected: 1693 G1CollectedHeap* _g1h; 1694 size_t _max_live_bytes; 1695 size_t _freed_bytes; 1696 FreeRegionList* _cleanup_list; 1697 1698 public: 1699 G1ParNoteEndTask(G1CollectedHeap* g1h, 1700 FreeRegionList* cleanup_list) : 1701 AbstractGangTask("G1 note end"), _g1h(g1h), 1702 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1703 1704 void work(uint worker_id) { 1705 double start = os::elapsedTime(); 1706 FreeRegionList local_cleanup_list("Local Cleanup List"); 1707 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set"); 1708 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1709 HRRSCleanupTask hrrs_cleanup_task; 1710 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list, 1711 &old_proxy_set, 1712 &humongous_proxy_set, 1713 &hrrs_cleanup_task); 1714 if (G1CollectedHeap::use_parallel_gc_threads()) { 1715 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1716 _g1h->workers()->active_workers(), 1717 HeapRegion::NoteEndClaimValue); 1718 } else { 1719 _g1h->heap_region_iterate(&g1_note_end); 1720 } 1721 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1722 1723 // Now update the lists 1724 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1725 NULL /* free_list */, 1726 &old_proxy_set, 1727 &humongous_proxy_set, 1728 true /* par */); 1729 { 1730 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1731 _max_live_bytes += g1_note_end.max_live_bytes(); 1732 _freed_bytes += g1_note_end.freed_bytes(); 1733 1734 // If we iterate over the global cleanup list at the end of 1735 // cleanup to do this printing we will not guarantee to only 1736 // generate output for the newly-reclaimed regions (the list 1737 // might not be empty at the beginning of cleanup; we might 1738 // still be working on its previous contents). So we do the 1739 // printing here, before we append the new regions to the global 1740 // cleanup list. 1741 1742 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1743 if (hr_printer->is_active()) { 1744 HeapRegionLinkedListIterator iter(&local_cleanup_list); 1745 while (iter.more_available()) { 1746 HeapRegion* hr = iter.get_next(); 1747 hr_printer->cleanup(hr); 1748 } 1749 } 1750 1751 _cleanup_list->add_as_tail(&local_cleanup_list); 1752 assert(local_cleanup_list.is_empty(), "post-condition"); 1753 1754 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1755 } 1756 } 1757 size_t max_live_bytes() { return _max_live_bytes; } 1758 size_t freed_bytes() { return _freed_bytes; } 1759 }; 1760 1761 class G1ParScrubRemSetTask: public AbstractGangTask { 1762 protected: 1763 G1RemSet* _g1rs; 1764 BitMap* _region_bm; 1765 BitMap* _card_bm; 1766 public: 1767 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1768 BitMap* region_bm, BitMap* card_bm) : 1769 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1770 _region_bm(region_bm), _card_bm(card_bm) { } 1771 1772 void work(uint worker_id) { 1773 if (G1CollectedHeap::use_parallel_gc_threads()) { 1774 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1775 HeapRegion::ScrubRemSetClaimValue); 1776 } else { 1777 _g1rs->scrub(_region_bm, _card_bm); 1778 } 1779 } 1780 1781 }; 1782 1783 void ConcurrentMark::cleanup() { 1784 // world is stopped at this checkpoint 1785 assert(SafepointSynchronize::is_at_safepoint(), 1786 "world should be stopped"); 1787 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1788 1789 // If a full collection has happened, we shouldn't do this. 1790 if (has_aborted()) { 1791 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1792 return; 1793 } 1794 1795 HRSPhaseSetter x(HRSPhaseCleanup); 1796 g1h->verify_region_sets_optional(); 1797 1798 if (VerifyDuringGC) { 1799 HandleMark hm; // handle scope 1800 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1801 Universe::heap()->prepare_for_verify(); 1802 Universe::verify(/* silent */ false, 1803 /* option */ VerifyOption_G1UsePrevMarking); 1804 } 1805 1806 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1807 g1p->record_concurrent_mark_cleanup_start(); 1808 1809 double start = os::elapsedTime(); 1810 1811 HeapRegionRemSet::reset_for_cleanup_tasks(); 1812 1813 uint n_workers; 1814 1815 // Do counting once more with the world stopped for good measure. 1816 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1817 1818 if (G1CollectedHeap::use_parallel_gc_threads()) { 1819 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 1820 "sanity check"); 1821 1822 g1h->set_par_threads(); 1823 n_workers = g1h->n_par_threads(); 1824 assert(g1h->n_par_threads() == n_workers, 1825 "Should not have been reset"); 1826 g1h->workers()->run_task(&g1_par_count_task); 1827 // Done with the parallel phase so reset to 0. 1828 g1h->set_par_threads(0); 1829 1830 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 1831 "sanity check"); 1832 } else { 1833 n_workers = 1; 1834 g1_par_count_task.work(0); 1835 } 1836 1837 if (VerifyDuringGC) { 1838 // Verify that the counting data accumulated during marking matches 1839 // that calculated by walking the marking bitmap. 1840 1841 // Bitmaps to hold expected values 1842 BitMap expected_region_bm(_region_bm.size(), false); 1843 BitMap expected_card_bm(_card_bm.size(), false); 1844 1845 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1846 &_region_bm, 1847 &_card_bm, 1848 &expected_region_bm, 1849 &expected_card_bm); 1850 1851 if (G1CollectedHeap::use_parallel_gc_threads()) { 1852 g1h->set_par_threads((int)n_workers); 1853 g1h->workers()->run_task(&g1_par_verify_task); 1854 // Done with the parallel phase so reset to 0. 1855 g1h->set_par_threads(0); 1856 1857 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 1858 "sanity check"); 1859 } else { 1860 g1_par_verify_task.work(0); 1861 } 1862 1863 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1864 } 1865 1866 size_t start_used_bytes = g1h->used(); 1867 g1h->set_marking_complete(); 1868 1869 double count_end = os::elapsedTime(); 1870 double this_final_counting_time = (count_end - start); 1871 _total_counting_time += this_final_counting_time; 1872 1873 if (G1PrintRegionLivenessInfo) { 1874 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1875 _g1h->heap_region_iterate(&cl); 1876 } 1877 1878 // Install newly created mark bitMap as "prev". 1879 swapMarkBitMaps(); 1880 1881 g1h->reset_gc_time_stamp(); 1882 1883 // Note end of marking in all heap regions. 1884 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 1885 if (G1CollectedHeap::use_parallel_gc_threads()) { 1886 g1h->set_par_threads((int)n_workers); 1887 g1h->workers()->run_task(&g1_par_note_end_task); 1888 g1h->set_par_threads(0); 1889 1890 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 1891 "sanity check"); 1892 } else { 1893 g1_par_note_end_task.work(0); 1894 } 1895 g1h->check_gc_time_stamps(); 1896 1897 if (!cleanup_list_is_empty()) { 1898 // The cleanup list is not empty, so we'll have to process it 1899 // concurrently. Notify anyone else that might be wanting free 1900 // regions that there will be more free regions coming soon. 1901 g1h->set_free_regions_coming(); 1902 } 1903 1904 // call below, since it affects the metric by which we sort the heap 1905 // regions. 1906 if (G1ScrubRemSets) { 1907 double rs_scrub_start = os::elapsedTime(); 1908 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1909 if (G1CollectedHeap::use_parallel_gc_threads()) { 1910 g1h->set_par_threads((int)n_workers); 1911 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1912 g1h->set_par_threads(0); 1913 1914 assert(g1h->check_heap_region_claim_values( 1915 HeapRegion::ScrubRemSetClaimValue), 1916 "sanity check"); 1917 } else { 1918 g1_par_scrub_rs_task.work(0); 1919 } 1920 1921 double rs_scrub_end = os::elapsedTime(); 1922 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1923 _total_rs_scrub_time += this_rs_scrub_time; 1924 } 1925 1926 // this will also free any regions totally full of garbage objects, 1927 // and sort the regions. 1928 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 1929 1930 // Statistics. 1931 double end = os::elapsedTime(); 1932 _cleanup_times.add((end - start) * 1000.0); 1933 1934 if (G1Log::fine()) { 1935 g1h->print_size_transition(gclog_or_tty, 1936 start_used_bytes, 1937 g1h->used(), 1938 g1h->capacity()); 1939 } 1940 1941 // Clean up will have freed any regions completely full of garbage. 1942 // Update the soft reference policy with the new heap occupancy. 1943 Universe::update_heap_info_at_gc(); 1944 1945 // We need to make this be a "collection" so any collection pause that 1946 // races with it goes around and waits for completeCleanup to finish. 1947 g1h->increment_total_collections(); 1948 1949 // We reclaimed old regions so we should calculate the sizes to make 1950 // sure we update the old gen/space data. 1951 g1h->g1mm()->update_sizes(); 1952 1953 if (VerifyDuringGC) { 1954 HandleMark hm; // handle scope 1955 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1956 Universe::heap()->prepare_for_verify(); 1957 Universe::verify(/* silent */ false, 1958 /* option */ VerifyOption_G1UsePrevMarking); 1959 } 1960 1961 g1h->verify_region_sets_optional(); 1962 } 1963 1964 void ConcurrentMark::completeCleanup() { 1965 if (has_aborted()) return; 1966 1967 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1968 1969 _cleanup_list.verify_optional(); 1970 FreeRegionList tmp_free_list("Tmp Free List"); 1971 1972 if (G1ConcRegionFreeingVerbose) { 1973 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1974 "cleanup list has %u entries", 1975 _cleanup_list.length()); 1976 } 1977 1978 // Noone else should be accessing the _cleanup_list at this point, 1979 // so it's not necessary to take any locks 1980 while (!_cleanup_list.is_empty()) { 1981 HeapRegion* hr = _cleanup_list.remove_head(); 1982 assert(hr != NULL, "the list was not empty"); 1983 hr->par_clear(); 1984 tmp_free_list.add_as_tail(hr); 1985 1986 // Instead of adding one region at a time to the secondary_free_list, 1987 // we accumulate them in the local list and move them a few at a 1988 // time. This also cuts down on the number of notify_all() calls 1989 // we do during this process. We'll also append the local list when 1990 // _cleanup_list is empty (which means we just removed the last 1991 // region from the _cleanup_list). 1992 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1993 _cleanup_list.is_empty()) { 1994 if (G1ConcRegionFreeingVerbose) { 1995 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1996 "appending %u entries to the secondary_free_list, " 1997 "cleanup list still has %u entries", 1998 tmp_free_list.length(), 1999 _cleanup_list.length()); 2000 } 2001 2002 { 2003 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2004 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 2005 SecondaryFreeList_lock->notify_all(); 2006 } 2007 2008 if (G1StressConcRegionFreeing) { 2009 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2010 os::sleep(Thread::current(), (jlong) 1, false); 2011 } 2012 } 2013 } 2014 } 2015 assert(tmp_free_list.is_empty(), "post-condition"); 2016 } 2017 2018 // Support closures for reference procssing in G1 2019 2020 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2021 HeapWord* addr = (HeapWord*)obj; 2022 return addr != NULL && 2023 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2024 } 2025 2026 class G1CMKeepAliveClosure: public ExtendedOopClosure { 2027 G1CollectedHeap* _g1; 2028 ConcurrentMark* _cm; 2029 public: 2030 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) : 2031 _g1(g1), _cm(cm) { 2032 assert(Thread::current()->is_VM_thread(), "otherwise fix worker id"); 2033 } 2034 2035 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2036 virtual void do_oop( oop* p) { do_oop_work(p); } 2037 2038 template <class T> void do_oop_work(T* p) { 2039 oop obj = oopDesc::load_decode_heap_oop(p); 2040 HeapWord* addr = (HeapWord*)obj; 2041 2042 if (_cm->verbose_high()) { 2043 gclog_or_tty->print_cr("\t[0] we're looking at location " 2044 "*"PTR_FORMAT" = "PTR_FORMAT, 2045 p, (void*) obj); 2046 } 2047 2048 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) { 2049 _cm->mark_and_count(obj); 2050 _cm->mark_stack_push(obj); 2051 } 2052 } 2053 }; 2054 2055 class G1CMDrainMarkingStackClosure: public VoidClosure { 2056 ConcurrentMark* _cm; 2057 CMMarkStack* _markStack; 2058 G1CMKeepAliveClosure* _oopClosure; 2059 public: 2060 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack, 2061 G1CMKeepAliveClosure* oopClosure) : 2062 _cm(cm), 2063 _markStack(markStack), 2064 _oopClosure(oopClosure) { } 2065 2066 void do_void() { 2067 _markStack->drain(_oopClosure, _cm->nextMarkBitMap(), false); 2068 } 2069 }; 2070 2071 // 'Keep Alive' closure used by parallel reference processing. 2072 // An instance of this closure is used in the parallel reference processing 2073 // code rather than an instance of G1CMKeepAliveClosure. We could have used 2074 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are 2075 // placed on to discovered ref lists once so we can mark and push with no 2076 // need to check whether the object has already been marked. Using the 2077 // G1CMKeepAliveClosure would mean, however, having all the worker threads 2078 // operating on the global mark stack. This means that an individual 2079 // worker would be doing lock-free pushes while it processes its own 2080 // discovered ref list followed by drain call. If the discovered ref lists 2081 // are unbalanced then this could cause interference with the other 2082 // workers. Using a CMTask (and its embedded local data structures) 2083 // avoids that potential interference. 2084 class G1CMParKeepAliveAndDrainClosure: public OopClosure { 2085 ConcurrentMark* _cm; 2086 CMTask* _task; 2087 int _ref_counter_limit; 2088 int _ref_counter; 2089 public: 2090 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) : 2091 _cm(cm), _task(task), 2092 _ref_counter_limit(G1RefProcDrainInterval) { 2093 assert(_ref_counter_limit > 0, "sanity"); 2094 _ref_counter = _ref_counter_limit; 2095 } 2096 2097 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2098 virtual void do_oop( oop* p) { do_oop_work(p); } 2099 2100 template <class T> void do_oop_work(T* p) { 2101 if (!_cm->has_overflown()) { 2102 oop obj = oopDesc::load_decode_heap_oop(p); 2103 if (_cm->verbose_high()) { 2104 gclog_or_tty->print_cr("\t[%d] we're looking at location " 2105 "*"PTR_FORMAT" = "PTR_FORMAT, 2106 _task->task_id(), p, (void*) obj); 2107 } 2108 2109 _task->deal_with_reference(obj); 2110 _ref_counter--; 2111 2112 if (_ref_counter == 0) { 2113 // We have dealt with _ref_counter_limit references, pushing them and objects 2114 // reachable from them on to the local stack (and possibly the global stack). 2115 // Call do_marking_step() to process these entries. We call the routine in a 2116 // loop, which we'll exit if there's nothing more to do (i.e. we're done 2117 // with the entries that we've pushed as a result of the deal_with_reference 2118 // calls above) or we overflow. 2119 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2120 // while there may still be some work to do. (See the comment at the 2121 // beginning of CMTask::do_marking_step() for those conditions - one of which 2122 // is reaching the specified time target.) It is only when 2123 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2124 // that the marking has completed. 2125 do { 2126 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2127 _task->do_marking_step(mark_step_duration_ms, 2128 false /* do_stealing */, 2129 false /* do_termination */); 2130 } while (_task->has_aborted() && !_cm->has_overflown()); 2131 _ref_counter = _ref_counter_limit; 2132 } 2133 } else { 2134 if (_cm->verbose_high()) { 2135 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id()); 2136 } 2137 } 2138 } 2139 }; 2140 2141 class G1CMParDrainMarkingStackClosure: public VoidClosure { 2142 ConcurrentMark* _cm; 2143 CMTask* _task; 2144 public: 2145 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) : 2146 _cm(cm), _task(task) { } 2147 2148 void do_void() { 2149 do { 2150 if (_cm->verbose_high()) { 2151 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", 2152 _task->task_id()); 2153 } 2154 2155 // We call CMTask::do_marking_step() to completely drain the local and 2156 // global marking stacks. The routine is called in a loop, which we'll 2157 // exit if there's nothing more to do (i.e. we'completely drained the 2158 // entries that were pushed as a result of applying the 2159 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref 2160 // lists above) or we overflow the global marking stack. 2161 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2162 // while there may still be some work to do. (See the comment at the 2163 // beginning of CMTask::do_marking_step() for those conditions - one of which 2164 // is reaching the specified time target.) It is only when 2165 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2166 // that the marking has completed. 2167 2168 _task->do_marking_step(1000000000.0 /* something very large */, 2169 true /* do_stealing */, 2170 true /* do_termination */); 2171 } while (_task->has_aborted() && !_cm->has_overflown()); 2172 } 2173 }; 2174 2175 // Implementation of AbstractRefProcTaskExecutor for parallel 2176 // reference processing at the end of G1 concurrent marking 2177 2178 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2179 private: 2180 G1CollectedHeap* _g1h; 2181 ConcurrentMark* _cm; 2182 WorkGang* _workers; 2183 int _active_workers; 2184 2185 public: 2186 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2187 ConcurrentMark* cm, 2188 WorkGang* workers, 2189 int n_workers) : 2190 _g1h(g1h), _cm(cm), 2191 _workers(workers), _active_workers(n_workers) { } 2192 2193 // Executes the given task using concurrent marking worker threads. 2194 virtual void execute(ProcessTask& task); 2195 virtual void execute(EnqueueTask& task); 2196 }; 2197 2198 class G1CMRefProcTaskProxy: public AbstractGangTask { 2199 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2200 ProcessTask& _proc_task; 2201 G1CollectedHeap* _g1h; 2202 ConcurrentMark* _cm; 2203 2204 public: 2205 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2206 G1CollectedHeap* g1h, 2207 ConcurrentMark* cm) : 2208 AbstractGangTask("Process reference objects in parallel"), 2209 _proc_task(proc_task), _g1h(g1h), _cm(cm) { } 2210 2211 virtual void work(uint worker_id) { 2212 CMTask* marking_task = _cm->task(worker_id); 2213 G1CMIsAliveClosure g1_is_alive(_g1h); 2214 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task); 2215 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task); 2216 2217 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2218 } 2219 }; 2220 2221 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2222 assert(_workers != NULL, "Need parallel worker threads."); 2223 2224 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2225 2226 // We need to reset the phase for each task execution so that 2227 // the termination protocol of CMTask::do_marking_step works. 2228 _cm->set_phase(_active_workers, false /* concurrent */); 2229 _g1h->set_par_threads(_active_workers); 2230 _workers->run_task(&proc_task_proxy); 2231 _g1h->set_par_threads(0); 2232 } 2233 2234 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2235 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2236 EnqueueTask& _enq_task; 2237 2238 public: 2239 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2240 AbstractGangTask("Enqueue reference objects in parallel"), 2241 _enq_task(enq_task) { } 2242 2243 virtual void work(uint worker_id) { 2244 _enq_task.work(worker_id); 2245 } 2246 }; 2247 2248 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2249 assert(_workers != NULL, "Need parallel worker threads."); 2250 2251 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2252 2253 _g1h->set_par_threads(_active_workers); 2254 _workers->run_task(&enq_task_proxy); 2255 _g1h->set_par_threads(0); 2256 } 2257 2258 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2259 ResourceMark rm; 2260 HandleMark hm; 2261 2262 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2263 2264 // Is alive closure. 2265 G1CMIsAliveClosure g1_is_alive(g1h); 2266 2267 // Inner scope to exclude the cleaning of the string and symbol 2268 // tables from the displayed time. 2269 { 2270 if (G1Log::finer()) { 2271 gclog_or_tty->put(' '); 2272 } 2273 TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty); 2274 2275 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2276 2277 // See the comment in G1CollectedHeap::ref_processing_init() 2278 // about how reference processing currently works in G1. 2279 2280 // Process weak references. 2281 rp->setup_policy(clear_all_soft_refs); 2282 assert(_markStack.isEmpty(), "mark stack should be empty"); 2283 2284 G1CMKeepAliveClosure g1_keep_alive(g1h, this); 2285 G1CMDrainMarkingStackClosure 2286 g1_drain_mark_stack(this, &_markStack, &g1_keep_alive); 2287 2288 // We use the work gang from the G1CollectedHeap and we utilize all 2289 // the worker threads. 2290 uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U; 2291 active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U); 2292 2293 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2294 g1h->workers(), active_workers); 2295 2296 if (rp->processing_is_mt()) { 2297 // Set the degree of MT here. If the discovery is done MT, there 2298 // may have been a different number of threads doing the discovery 2299 // and a different number of discovered lists may have Ref objects. 2300 // That is OK as long as the Reference lists are balanced (see 2301 // balance_all_queues() and balance_queues()). 2302 rp->set_active_mt_degree(active_workers); 2303 2304 rp->process_discovered_references(&g1_is_alive, 2305 &g1_keep_alive, 2306 &g1_drain_mark_stack, 2307 &par_task_executor); 2308 2309 // The work routines of the parallel keep_alive and drain_marking_stack 2310 // will set the has_overflown flag if we overflow the global marking 2311 // stack. 2312 } else { 2313 rp->process_discovered_references(&g1_is_alive, 2314 &g1_keep_alive, 2315 &g1_drain_mark_stack, 2316 NULL); 2317 } 2318 2319 assert(_markStack.overflow() || _markStack.isEmpty(), 2320 "mark stack should be empty (unless it overflowed)"); 2321 if (_markStack.overflow()) { 2322 // Should have been done already when we tried to push an 2323 // entry on to the global mark stack. But let's do it again. 2324 set_has_overflown(); 2325 } 2326 2327 if (rp->processing_is_mt()) { 2328 assert(rp->num_q() == active_workers, "why not"); 2329 rp->enqueue_discovered_references(&par_task_executor); 2330 } else { 2331 rp->enqueue_discovered_references(); 2332 } 2333 2334 rp->verify_no_references_recorded(); 2335 assert(!rp->discovery_enabled(), "Post condition"); 2336 } 2337 2338 // Now clean up stale oops in StringTable 2339 StringTable::unlink(&g1_is_alive); 2340 // Clean up unreferenced symbols in symbol table. 2341 SymbolTable::unlink(); 2342 } 2343 2344 void ConcurrentMark::swapMarkBitMaps() { 2345 CMBitMapRO* temp = _prevMarkBitMap; 2346 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2347 _nextMarkBitMap = (CMBitMap*) temp; 2348 } 2349 2350 class CMRemarkTask: public AbstractGangTask { 2351 private: 2352 ConcurrentMark *_cm; 2353 2354 public: 2355 void work(uint worker_id) { 2356 // Since all available tasks are actually started, we should 2357 // only proceed if we're supposed to be actived. 2358 if (worker_id < _cm->active_tasks()) { 2359 CMTask* task = _cm->task(worker_id); 2360 task->record_start_time(); 2361 do { 2362 task->do_marking_step(1000000000.0 /* something very large */, 2363 true /* do_stealing */, 2364 true /* do_termination */); 2365 } while (task->has_aborted() && !_cm->has_overflown()); 2366 // If we overflow, then we do not want to restart. We instead 2367 // want to abort remark and do concurrent marking again. 2368 task->record_end_time(); 2369 } 2370 } 2371 2372 CMRemarkTask(ConcurrentMark* cm, int active_workers) : 2373 AbstractGangTask("Par Remark"), _cm(cm) { 2374 _cm->terminator()->reset_for_reuse(active_workers); 2375 } 2376 }; 2377 2378 void ConcurrentMark::checkpointRootsFinalWork() { 2379 ResourceMark rm; 2380 HandleMark hm; 2381 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2382 2383 g1h->ensure_parsability(false); 2384 2385 if (G1CollectedHeap::use_parallel_gc_threads()) { 2386 G1CollectedHeap::StrongRootsScope srs(g1h); 2387 // this is remark, so we'll use up all active threads 2388 uint active_workers = g1h->workers()->active_workers(); 2389 if (active_workers == 0) { 2390 assert(active_workers > 0, "Should have been set earlier"); 2391 active_workers = (uint) ParallelGCThreads; 2392 g1h->workers()->set_active_workers(active_workers); 2393 } 2394 set_phase(active_workers, false /* concurrent */); 2395 // Leave _parallel_marking_threads at it's 2396 // value originally calculated in the ConcurrentMark 2397 // constructor and pass values of the active workers 2398 // through the gang in the task. 2399 2400 CMRemarkTask remarkTask(this, active_workers); 2401 g1h->set_par_threads(active_workers); 2402 g1h->workers()->run_task(&remarkTask); 2403 g1h->set_par_threads(0); 2404 } else { 2405 G1CollectedHeap::StrongRootsScope srs(g1h); 2406 // this is remark, so we'll use up all available threads 2407 uint active_workers = 1; 2408 set_phase(active_workers, false /* concurrent */); 2409 2410 CMRemarkTask remarkTask(this, active_workers); 2411 // We will start all available threads, even if we decide that the 2412 // active_workers will be fewer. The extra ones will just bail out 2413 // immediately. 2414 remarkTask.work(0); 2415 } 2416 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2417 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2418 2419 print_stats(); 2420 2421 #if VERIFY_OBJS_PROCESSED 2422 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2423 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2424 _scan_obj_cl.objs_processed, 2425 ThreadLocalObjQueue::objs_enqueued); 2426 guarantee(_scan_obj_cl.objs_processed == 2427 ThreadLocalObjQueue::objs_enqueued, 2428 "Different number of objs processed and enqueued."); 2429 } 2430 #endif 2431 } 2432 2433 #ifndef PRODUCT 2434 2435 class PrintReachableOopClosure: public OopClosure { 2436 private: 2437 G1CollectedHeap* _g1h; 2438 outputStream* _out; 2439 VerifyOption _vo; 2440 bool _all; 2441 2442 public: 2443 PrintReachableOopClosure(outputStream* out, 2444 VerifyOption vo, 2445 bool all) : 2446 _g1h(G1CollectedHeap::heap()), 2447 _out(out), _vo(vo), _all(all) { } 2448 2449 void do_oop(narrowOop* p) { do_oop_work(p); } 2450 void do_oop( oop* p) { do_oop_work(p); } 2451 2452 template <class T> void do_oop_work(T* p) { 2453 oop obj = oopDesc::load_decode_heap_oop(p); 2454 const char* str = NULL; 2455 const char* str2 = ""; 2456 2457 if (obj == NULL) { 2458 str = ""; 2459 } else if (!_g1h->is_in_g1_reserved(obj)) { 2460 str = " O"; 2461 } else { 2462 HeapRegion* hr = _g1h->heap_region_containing(obj); 2463 guarantee(hr != NULL, "invariant"); 2464 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2465 bool marked = _g1h->is_marked(obj, _vo); 2466 2467 if (over_tams) { 2468 str = " >"; 2469 if (marked) { 2470 str2 = " AND MARKED"; 2471 } 2472 } else if (marked) { 2473 str = " M"; 2474 } else { 2475 str = " NOT"; 2476 } 2477 } 2478 2479 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2480 p, (void*) obj, str, str2); 2481 } 2482 }; 2483 2484 class PrintReachableObjectClosure : public ObjectClosure { 2485 private: 2486 G1CollectedHeap* _g1h; 2487 outputStream* _out; 2488 VerifyOption _vo; 2489 bool _all; 2490 HeapRegion* _hr; 2491 2492 public: 2493 PrintReachableObjectClosure(outputStream* out, 2494 VerifyOption vo, 2495 bool all, 2496 HeapRegion* hr) : 2497 _g1h(G1CollectedHeap::heap()), 2498 _out(out), _vo(vo), _all(all), _hr(hr) { } 2499 2500 void do_object(oop o) { 2501 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2502 bool marked = _g1h->is_marked(o, _vo); 2503 bool print_it = _all || over_tams || marked; 2504 2505 if (print_it) { 2506 _out->print_cr(" "PTR_FORMAT"%s", 2507 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2508 PrintReachableOopClosure oopCl(_out, _vo, _all); 2509 o->oop_iterate_no_header(&oopCl); 2510 } 2511 } 2512 }; 2513 2514 class PrintReachableRegionClosure : public HeapRegionClosure { 2515 private: 2516 G1CollectedHeap* _g1h; 2517 outputStream* _out; 2518 VerifyOption _vo; 2519 bool _all; 2520 2521 public: 2522 bool doHeapRegion(HeapRegion* hr) { 2523 HeapWord* b = hr->bottom(); 2524 HeapWord* e = hr->end(); 2525 HeapWord* t = hr->top(); 2526 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2527 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2528 "TAMS: "PTR_FORMAT, b, e, t, p); 2529 _out->cr(); 2530 2531 HeapWord* from = b; 2532 HeapWord* to = t; 2533 2534 if (to > from) { 2535 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2536 _out->cr(); 2537 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2538 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2539 _out->cr(); 2540 } 2541 2542 return false; 2543 } 2544 2545 PrintReachableRegionClosure(outputStream* out, 2546 VerifyOption vo, 2547 bool all) : 2548 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2549 }; 2550 2551 void ConcurrentMark::print_reachable(const char* str, 2552 VerifyOption vo, 2553 bool all) { 2554 gclog_or_tty->cr(); 2555 gclog_or_tty->print_cr("== Doing heap dump... "); 2556 2557 if (G1PrintReachableBaseFile == NULL) { 2558 gclog_or_tty->print_cr(" #### error: no base file defined"); 2559 return; 2560 } 2561 2562 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2563 (JVM_MAXPATHLEN - 1)) { 2564 gclog_or_tty->print_cr(" #### error: file name too long"); 2565 return; 2566 } 2567 2568 char file_name[JVM_MAXPATHLEN]; 2569 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2570 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2571 2572 fileStream fout(file_name); 2573 if (!fout.is_open()) { 2574 gclog_or_tty->print_cr(" #### error: could not open file"); 2575 return; 2576 } 2577 2578 outputStream* out = &fout; 2579 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2580 out->cr(); 2581 2582 out->print_cr("--- ITERATING OVER REGIONS"); 2583 out->cr(); 2584 PrintReachableRegionClosure rcl(out, vo, all); 2585 _g1h->heap_region_iterate(&rcl); 2586 out->cr(); 2587 2588 gclog_or_tty->print_cr(" done"); 2589 gclog_or_tty->flush(); 2590 } 2591 2592 #endif // PRODUCT 2593 2594 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2595 // Note we are overriding the read-only view of the prev map here, via 2596 // the cast. 2597 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2598 } 2599 2600 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2601 _nextMarkBitMap->clearRange(mr); 2602 } 2603 2604 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2605 clearRangePrevBitmap(mr); 2606 clearRangeNextBitmap(mr); 2607 } 2608 2609 HeapRegion* 2610 ConcurrentMark::claim_region(int task_num) { 2611 // "checkpoint" the finger 2612 HeapWord* finger = _finger; 2613 2614 // _heap_end will not change underneath our feet; it only changes at 2615 // yield points. 2616 while (finger < _heap_end) { 2617 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2618 2619 // Note on how this code handles humongous regions. In the 2620 // normal case the finger will reach the start of a "starts 2621 // humongous" (SH) region. Its end will either be the end of the 2622 // last "continues humongous" (CH) region in the sequence, or the 2623 // standard end of the SH region (if the SH is the only region in 2624 // the sequence). That way claim_region() will skip over the CH 2625 // regions. However, there is a subtle race between a CM thread 2626 // executing this method and a mutator thread doing a humongous 2627 // object allocation. The two are not mutually exclusive as the CM 2628 // thread does not need to hold the Heap_lock when it gets 2629 // here. So there is a chance that claim_region() will come across 2630 // a free region that's in the progress of becoming a SH or a CH 2631 // region. In the former case, it will either 2632 // a) Miss the update to the region's end, in which case it will 2633 // visit every subsequent CH region, will find their bitmaps 2634 // empty, and do nothing, or 2635 // b) Will observe the update of the region's end (in which case 2636 // it will skip the subsequent CH regions). 2637 // If it comes across a region that suddenly becomes CH, the 2638 // scenario will be similar to b). So, the race between 2639 // claim_region() and a humongous object allocation might force us 2640 // to do a bit of unnecessary work (due to some unnecessary bitmap 2641 // iterations) but it should not introduce and correctness issues. 2642 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2643 HeapWord* bottom = curr_region->bottom(); 2644 HeapWord* end = curr_region->end(); 2645 HeapWord* limit = curr_region->next_top_at_mark_start(); 2646 2647 if (verbose_low()) { 2648 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 2649 "["PTR_FORMAT", "PTR_FORMAT"), " 2650 "limit = "PTR_FORMAT, 2651 task_num, curr_region, bottom, end, limit); 2652 } 2653 2654 // Is the gap between reading the finger and doing the CAS too long? 2655 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2656 if (res == finger) { 2657 // we succeeded 2658 2659 // notice that _finger == end cannot be guaranteed here since, 2660 // someone else might have moved the finger even further 2661 assert(_finger >= end, "the finger should have moved forward"); 2662 2663 if (verbose_low()) { 2664 gclog_or_tty->print_cr("[%d] we were successful with region = " 2665 PTR_FORMAT, task_num, curr_region); 2666 } 2667 2668 if (limit > bottom) { 2669 if (verbose_low()) { 2670 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 2671 "returning it ", task_num, curr_region); 2672 } 2673 return curr_region; 2674 } else { 2675 assert(limit == bottom, 2676 "the region limit should be at bottom"); 2677 if (verbose_low()) { 2678 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 2679 "returning NULL", task_num, curr_region); 2680 } 2681 // we return NULL and the caller should try calling 2682 // claim_region() again. 2683 return NULL; 2684 } 2685 } else { 2686 assert(_finger > finger, "the finger should have moved forward"); 2687 if (verbose_low()) { 2688 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 2689 "global finger = "PTR_FORMAT", " 2690 "our finger = "PTR_FORMAT, 2691 task_num, _finger, finger); 2692 } 2693 2694 // read it again 2695 finger = _finger; 2696 } 2697 } 2698 2699 return NULL; 2700 } 2701 2702 #ifndef PRODUCT 2703 enum VerifyNoCSetOopsPhase { 2704 VerifyNoCSetOopsStack, 2705 VerifyNoCSetOopsQueues, 2706 VerifyNoCSetOopsSATBCompleted, 2707 VerifyNoCSetOopsSATBThread 2708 }; 2709 2710 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 2711 private: 2712 G1CollectedHeap* _g1h; 2713 VerifyNoCSetOopsPhase _phase; 2714 int _info; 2715 2716 const char* phase_str() { 2717 switch (_phase) { 2718 case VerifyNoCSetOopsStack: return "Stack"; 2719 case VerifyNoCSetOopsQueues: return "Queue"; 2720 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 2721 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 2722 default: ShouldNotReachHere(); 2723 } 2724 return NULL; 2725 } 2726 2727 void do_object_work(oop obj) { 2728 guarantee(!_g1h->obj_in_cs(obj), 2729 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 2730 (void*) obj, phase_str(), _info)); 2731 } 2732 2733 public: 2734 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 2735 2736 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 2737 _phase = phase; 2738 _info = info; 2739 } 2740 2741 virtual void do_oop(oop* p) { 2742 oop obj = oopDesc::load_decode_heap_oop(p); 2743 do_object_work(obj); 2744 } 2745 2746 virtual void do_oop(narrowOop* p) { 2747 // We should not come across narrow oops while scanning marking 2748 // stacks and SATB buffers. 2749 ShouldNotReachHere(); 2750 } 2751 2752 virtual void do_object(oop obj) { 2753 do_object_work(obj); 2754 } 2755 }; 2756 2757 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 2758 bool verify_enqueued_buffers, 2759 bool verify_thread_buffers, 2760 bool verify_fingers) { 2761 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2762 if (!G1CollectedHeap::heap()->mark_in_progress()) { 2763 return; 2764 } 2765 2766 VerifyNoCSetOopsClosure cl; 2767 2768 if (verify_stacks) { 2769 // Verify entries on the global mark stack 2770 cl.set_phase(VerifyNoCSetOopsStack); 2771 _markStack.oops_do(&cl); 2772 2773 // Verify entries on the task queues 2774 for (int i = 0; i < (int) _max_task_num; i += 1) { 2775 cl.set_phase(VerifyNoCSetOopsQueues, i); 2776 OopTaskQueue* queue = _task_queues->queue(i); 2777 queue->oops_do(&cl); 2778 } 2779 } 2780 2781 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 2782 2783 // Verify entries on the enqueued SATB buffers 2784 if (verify_enqueued_buffers) { 2785 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 2786 satb_qs.iterate_completed_buffers_read_only(&cl); 2787 } 2788 2789 // Verify entries on the per-thread SATB buffers 2790 if (verify_thread_buffers) { 2791 cl.set_phase(VerifyNoCSetOopsSATBThread); 2792 satb_qs.iterate_thread_buffers_read_only(&cl); 2793 } 2794 2795 if (verify_fingers) { 2796 // Verify the global finger 2797 HeapWord* global_finger = finger(); 2798 if (global_finger != NULL && global_finger < _heap_end) { 2799 // The global finger always points to a heap region boundary. We 2800 // use heap_region_containing_raw() to get the containing region 2801 // given that the global finger could be pointing to a free region 2802 // which subsequently becomes continues humongous. If that 2803 // happens, heap_region_containing() will return the bottom of the 2804 // corresponding starts humongous region and the check below will 2805 // not hold any more. 2806 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 2807 guarantee(global_finger == global_hr->bottom(), 2808 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 2809 global_finger, HR_FORMAT_PARAMS(global_hr))); 2810 } 2811 2812 // Verify the task fingers 2813 assert(parallel_marking_threads() <= _max_task_num, "sanity"); 2814 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 2815 CMTask* task = _tasks[i]; 2816 HeapWord* task_finger = task->finger(); 2817 if (task_finger != NULL && task_finger < _heap_end) { 2818 // See above note on the global finger verification. 2819 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 2820 guarantee(task_finger == task_hr->bottom() || 2821 !task_hr->in_collection_set(), 2822 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 2823 task_finger, HR_FORMAT_PARAMS(task_hr))); 2824 } 2825 } 2826 } 2827 } 2828 #endif // PRODUCT 2829 2830 void ConcurrentMark::clear_marking_state(bool clear_overflow) { 2831 _markStack.setEmpty(); 2832 _markStack.clear_overflow(); 2833 if (clear_overflow) { 2834 clear_has_overflown(); 2835 } else { 2836 assert(has_overflown(), "pre-condition"); 2837 } 2838 _finger = _heap_start; 2839 2840 for (int i = 0; i < (int)_max_task_num; ++i) { 2841 OopTaskQueue* queue = _task_queues->queue(i); 2842 queue->set_empty(); 2843 } 2844 } 2845 2846 // Aggregate the counting data that was constructed concurrently 2847 // with marking. 2848 class AggregateCountDataHRClosure: public HeapRegionClosure { 2849 ConcurrentMark* _cm; 2850 BitMap* _cm_card_bm; 2851 size_t _max_task_num; 2852 2853 public: 2854 AggregateCountDataHRClosure(ConcurrentMark *cm, 2855 BitMap* cm_card_bm, 2856 size_t max_task_num) : 2857 _cm(cm), _cm_card_bm(cm_card_bm), 2858 _max_task_num(max_task_num) { } 2859 2860 bool is_card_aligned(HeapWord* p) { 2861 return ((uintptr_t(p) & (CardTableModRefBS::card_size - 1)) == 0); 2862 } 2863 2864 bool doHeapRegion(HeapRegion* hr) { 2865 if (hr->continuesHumongous()) { 2866 // We will ignore these here and process them when their 2867 // associated "starts humongous" region is processed. 2868 // Note that we cannot rely on their associated 2869 // "starts humongous" region to have their bit set to 1 2870 // since, due to the region chunking in the parallel region 2871 // iteration, a "continues humongous" region might be visited 2872 // before its associated "starts humongous". 2873 return false; 2874 } 2875 2876 HeapWord* start = hr->bottom(); 2877 HeapWord* limit = hr->next_top_at_mark_start(); 2878 HeapWord* end = hr->end(); 2879 2880 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2881 err_msg("Preconditions not met - " 2882 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 2883 "top: "PTR_FORMAT", end: "PTR_FORMAT, 2884 start, limit, hr->top(), hr->end())); 2885 2886 assert(hr->next_marked_bytes() == 0, "Precondition"); 2887 2888 if (start == limit) { 2889 // NTAMS of this region has not been set so nothing to do. 2890 return false; 2891 } 2892 2893 assert(is_card_aligned(start), "sanity"); 2894 assert(is_card_aligned(end), "sanity"); 2895 2896 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2897 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2898 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2899 2900 // If ntams is not card aligned then we bump the index for 2901 // limit so that we get the card spanning ntams. 2902 if (!is_card_aligned(limit)) { 2903 limit_idx += 1; 2904 } 2905 2906 assert(limit_idx <= end_idx, "or else use atomics"); 2907 2908 // Aggregate the "stripe" in the count data associated with hr. 2909 uint hrs_index = hr->hrs_index(); 2910 size_t marked_bytes = 0; 2911 2912 for (int i = 0; (size_t)i < _max_task_num; i += 1) { 2913 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2914 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2915 2916 // Fetch the marked_bytes in this region for task i and 2917 // add it to the running total for this region. 2918 marked_bytes += marked_bytes_array[hrs_index]; 2919 2920 // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx) 2921 // into the global card bitmap. 2922 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2923 2924 while (scan_idx < limit_idx) { 2925 assert(task_card_bm->at(scan_idx) == true, "should be"); 2926 _cm_card_bm->set_bit(scan_idx); 2927 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2928 2929 // BitMap::get_next_one_offset() can handle the case when 2930 // its left_offset parameter is greater than its right_offset 2931 // parameter. If does, however, have an early exit if 2932 // left_offset == right_offset. So let's limit the value 2933 // passed in for left offset here. 2934 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2935 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2936 } 2937 } 2938 2939 // Update the marked bytes for this region. 2940 hr->add_to_marked_bytes(marked_bytes); 2941 2942 // Next heap region 2943 return false; 2944 } 2945 }; 2946 2947 class G1AggregateCountDataTask: public AbstractGangTask { 2948 protected: 2949 G1CollectedHeap* _g1h; 2950 ConcurrentMark* _cm; 2951 BitMap* _cm_card_bm; 2952 size_t _max_task_num; 2953 int _active_workers; 2954 2955 public: 2956 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2957 ConcurrentMark* cm, 2958 BitMap* cm_card_bm, 2959 size_t max_task_num, 2960 int n_workers) : 2961 AbstractGangTask("Count Aggregation"), 2962 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2963 _max_task_num(max_task_num), 2964 _active_workers(n_workers) { } 2965 2966 void work(uint worker_id) { 2967 AggregateCountDataHRClosure cl(_cm, _cm_card_bm, _max_task_num); 2968 2969 if (G1CollectedHeap::use_parallel_gc_threads()) { 2970 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 2971 _active_workers, 2972 HeapRegion::AggregateCountClaimValue); 2973 } else { 2974 _g1h->heap_region_iterate(&cl); 2975 } 2976 } 2977 }; 2978 2979 2980 void ConcurrentMark::aggregate_count_data() { 2981 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 2982 _g1h->workers()->active_workers() : 2983 1); 2984 2985 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 2986 _max_task_num, n_workers); 2987 2988 if (G1CollectedHeap::use_parallel_gc_threads()) { 2989 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 2990 "sanity check"); 2991 _g1h->set_par_threads(n_workers); 2992 _g1h->workers()->run_task(&g1_par_agg_task); 2993 _g1h->set_par_threads(0); 2994 2995 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 2996 "sanity check"); 2997 _g1h->reset_heap_region_claim_values(); 2998 } else { 2999 g1_par_agg_task.work(0); 3000 } 3001 } 3002 3003 // Clear the per-worker arrays used to store the per-region counting data 3004 void ConcurrentMark::clear_all_count_data() { 3005 // Clear the global card bitmap - it will be filled during 3006 // liveness count aggregation (during remark) and the 3007 // final counting task. 3008 _card_bm.clear(); 3009 3010 // Clear the global region bitmap - it will be filled as part 3011 // of the final counting task. 3012 _region_bm.clear(); 3013 3014 uint max_regions = _g1h->max_regions(); 3015 assert(_max_task_num != 0, "unitialized"); 3016 3017 for (int i = 0; (size_t) i < _max_task_num; i += 1) { 3018 BitMap* task_card_bm = count_card_bitmap_for(i); 3019 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3020 3021 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3022 assert(marked_bytes_array != NULL, "uninitialized"); 3023 3024 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3025 task_card_bm->clear(); 3026 } 3027 } 3028 3029 void ConcurrentMark::print_stats() { 3030 if (verbose_stats()) { 3031 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3032 for (size_t i = 0; i < _active_tasks; ++i) { 3033 _tasks[i]->print_stats(); 3034 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3035 } 3036 } 3037 } 3038 3039 // abandon current marking iteration due to a Full GC 3040 void ConcurrentMark::abort() { 3041 // Clear all marks to force marking thread to do nothing 3042 _nextMarkBitMap->clearAll(); 3043 // Clear the liveness counting data 3044 clear_all_count_data(); 3045 // Empty mark stack 3046 clear_marking_state(); 3047 for (int i = 0; i < (int)_max_task_num; ++i) { 3048 _tasks[i]->clear_region_fields(); 3049 } 3050 _has_aborted = true; 3051 3052 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3053 satb_mq_set.abandon_partial_marking(); 3054 // This can be called either during or outside marking, we'll read 3055 // the expected_active value from the SATB queue set. 3056 satb_mq_set.set_active_all_threads( 3057 false, /* new active value */ 3058 satb_mq_set.is_active() /* expected_active */); 3059 } 3060 3061 static void print_ms_time_info(const char* prefix, const char* name, 3062 NumberSeq& ns) { 3063 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3064 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3065 if (ns.num() > 0) { 3066 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3067 prefix, ns.sd(), ns.maximum()); 3068 } 3069 } 3070 3071 void ConcurrentMark::print_summary_info() { 3072 gclog_or_tty->print_cr(" Concurrent marking:"); 3073 print_ms_time_info(" ", "init marks", _init_times); 3074 print_ms_time_info(" ", "remarks", _remark_times); 3075 { 3076 print_ms_time_info(" ", "final marks", _remark_mark_times); 3077 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3078 3079 } 3080 print_ms_time_info(" ", "cleanups", _cleanup_times); 3081 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3082 _total_counting_time, 3083 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3084 (double)_cleanup_times.num() 3085 : 0.0)); 3086 if (G1ScrubRemSets) { 3087 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3088 _total_rs_scrub_time, 3089 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3090 (double)_cleanup_times.num() 3091 : 0.0)); 3092 } 3093 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3094 (_init_times.sum() + _remark_times.sum() + 3095 _cleanup_times.sum())/1000.0); 3096 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3097 "(%8.2f s marking).", 3098 cmThread()->vtime_accum(), 3099 cmThread()->vtime_mark_accum()); 3100 } 3101 3102 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3103 _parallel_workers->print_worker_threads_on(st); 3104 } 3105 3106 // We take a break if someone is trying to stop the world. 3107 bool ConcurrentMark::do_yield_check(uint worker_id) { 3108 if (should_yield()) { 3109 if (worker_id == 0) { 3110 _g1h->g1_policy()->record_concurrent_pause(); 3111 } 3112 cmThread()->yield(); 3113 return true; 3114 } else { 3115 return false; 3116 } 3117 } 3118 3119 bool ConcurrentMark::should_yield() { 3120 return cmThread()->should_yield(); 3121 } 3122 3123 bool ConcurrentMark::containing_card_is_marked(void* p) { 3124 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3125 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3126 } 3127 3128 bool ConcurrentMark::containing_cards_are_marked(void* start, 3129 void* last) { 3130 return containing_card_is_marked(start) && 3131 containing_card_is_marked(last); 3132 } 3133 3134 #ifndef PRODUCT 3135 // for debugging purposes 3136 void ConcurrentMark::print_finger() { 3137 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3138 _heap_start, _heap_end, _finger); 3139 for (int i = 0; i < (int) _max_task_num; ++i) { 3140 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 3141 } 3142 gclog_or_tty->print_cr(""); 3143 } 3144 #endif 3145 3146 void CMTask::scan_object(oop obj) { 3147 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3148 3149 if (_cm->verbose_high()) { 3150 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT, 3151 _task_id, (void*) obj); 3152 } 3153 3154 size_t obj_size = obj->size(); 3155 _words_scanned += obj_size; 3156 3157 obj->oop_iterate(_cm_oop_closure); 3158 statsOnly( ++_objs_scanned ); 3159 check_limits(); 3160 } 3161 3162 // Closure for iteration over bitmaps 3163 class CMBitMapClosure : public BitMapClosure { 3164 private: 3165 // the bitmap that is being iterated over 3166 CMBitMap* _nextMarkBitMap; 3167 ConcurrentMark* _cm; 3168 CMTask* _task; 3169 3170 public: 3171 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3172 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3173 3174 bool do_bit(size_t offset) { 3175 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3176 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3177 assert( addr < _cm->finger(), "invariant"); 3178 3179 statsOnly( _task->increase_objs_found_on_bitmap() ); 3180 assert(addr >= _task->finger(), "invariant"); 3181 3182 // We move that task's local finger along. 3183 _task->move_finger_to(addr); 3184 3185 _task->scan_object(oop(addr)); 3186 // we only partially drain the local queue and global stack 3187 _task->drain_local_queue(true); 3188 _task->drain_global_stack(true); 3189 3190 // if the has_aborted flag has been raised, we need to bail out of 3191 // the iteration 3192 return !_task->has_aborted(); 3193 } 3194 }; 3195 3196 // Closure for iterating over objects, currently only used for 3197 // processing SATB buffers. 3198 class CMObjectClosure : public ObjectClosure { 3199 private: 3200 CMTask* _task; 3201 3202 public: 3203 void do_object(oop obj) { 3204 _task->deal_with_reference(obj); 3205 } 3206 3207 CMObjectClosure(CMTask* task) : _task(task) { } 3208 }; 3209 3210 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3211 ConcurrentMark* cm, 3212 CMTask* task) 3213 : _g1h(g1h), _cm(cm), _task(task) { 3214 assert(_ref_processor == NULL, "should be initialized to NULL"); 3215 3216 if (G1UseConcMarkReferenceProcessing) { 3217 _ref_processor = g1h->ref_processor_cm(); 3218 assert(_ref_processor != NULL, "should not be NULL"); 3219 } 3220 } 3221 3222 void CMTask::setup_for_region(HeapRegion* hr) { 3223 // Separated the asserts so that we know which one fires. 3224 assert(hr != NULL, 3225 "claim_region() should have filtered out continues humongous regions"); 3226 assert(!hr->continuesHumongous(), 3227 "claim_region() should have filtered out continues humongous regions"); 3228 3229 if (_cm->verbose_low()) { 3230 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 3231 _task_id, hr); 3232 } 3233 3234 _curr_region = hr; 3235 _finger = hr->bottom(); 3236 update_region_limit(); 3237 } 3238 3239 void CMTask::update_region_limit() { 3240 HeapRegion* hr = _curr_region; 3241 HeapWord* bottom = hr->bottom(); 3242 HeapWord* limit = hr->next_top_at_mark_start(); 3243 3244 if (limit == bottom) { 3245 if (_cm->verbose_low()) { 3246 gclog_or_tty->print_cr("[%d] found an empty region " 3247 "["PTR_FORMAT", "PTR_FORMAT")", 3248 _task_id, bottom, limit); 3249 } 3250 // The region was collected underneath our feet. 3251 // We set the finger to bottom to ensure that the bitmap 3252 // iteration that will follow this will not do anything. 3253 // (this is not a condition that holds when we set the region up, 3254 // as the region is not supposed to be empty in the first place) 3255 _finger = bottom; 3256 } else if (limit >= _region_limit) { 3257 assert(limit >= _finger, "peace of mind"); 3258 } else { 3259 assert(limit < _region_limit, "only way to get here"); 3260 // This can happen under some pretty unusual circumstances. An 3261 // evacuation pause empties the region underneath our feet (NTAMS 3262 // at bottom). We then do some allocation in the region (NTAMS 3263 // stays at bottom), followed by the region being used as a GC 3264 // alloc region (NTAMS will move to top() and the objects 3265 // originally below it will be grayed). All objects now marked in 3266 // the region are explicitly grayed, if below the global finger, 3267 // and we do not need in fact to scan anything else. So, we simply 3268 // set _finger to be limit to ensure that the bitmap iteration 3269 // doesn't do anything. 3270 _finger = limit; 3271 } 3272 3273 _region_limit = limit; 3274 } 3275 3276 void CMTask::giveup_current_region() { 3277 assert(_curr_region != NULL, "invariant"); 3278 if (_cm->verbose_low()) { 3279 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 3280 _task_id, _curr_region); 3281 } 3282 clear_region_fields(); 3283 } 3284 3285 void CMTask::clear_region_fields() { 3286 // Values for these three fields that indicate that we're not 3287 // holding on to a region. 3288 _curr_region = NULL; 3289 _finger = NULL; 3290 _region_limit = NULL; 3291 } 3292 3293 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3294 if (cm_oop_closure == NULL) { 3295 assert(_cm_oop_closure != NULL, "invariant"); 3296 } else { 3297 assert(_cm_oop_closure == NULL, "invariant"); 3298 } 3299 _cm_oop_closure = cm_oop_closure; 3300 } 3301 3302 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3303 guarantee(nextMarkBitMap != NULL, "invariant"); 3304 3305 if (_cm->verbose_low()) { 3306 gclog_or_tty->print_cr("[%d] resetting", _task_id); 3307 } 3308 3309 _nextMarkBitMap = nextMarkBitMap; 3310 clear_region_fields(); 3311 3312 _calls = 0; 3313 _elapsed_time_ms = 0.0; 3314 _termination_time_ms = 0.0; 3315 _termination_start_time_ms = 0.0; 3316 3317 #if _MARKING_STATS_ 3318 _local_pushes = 0; 3319 _local_pops = 0; 3320 _local_max_size = 0; 3321 _objs_scanned = 0; 3322 _global_pushes = 0; 3323 _global_pops = 0; 3324 _global_max_size = 0; 3325 _global_transfers_to = 0; 3326 _global_transfers_from = 0; 3327 _regions_claimed = 0; 3328 _objs_found_on_bitmap = 0; 3329 _satb_buffers_processed = 0; 3330 _steal_attempts = 0; 3331 _steals = 0; 3332 _aborted = 0; 3333 _aborted_overflow = 0; 3334 _aborted_cm_aborted = 0; 3335 _aborted_yield = 0; 3336 _aborted_timed_out = 0; 3337 _aborted_satb = 0; 3338 _aborted_termination = 0; 3339 #endif // _MARKING_STATS_ 3340 } 3341 3342 bool CMTask::should_exit_termination() { 3343 regular_clock_call(); 3344 // This is called when we are in the termination protocol. We should 3345 // quit if, for some reason, this task wants to abort or the global 3346 // stack is not empty (this means that we can get work from it). 3347 return !_cm->mark_stack_empty() || has_aborted(); 3348 } 3349 3350 void CMTask::reached_limit() { 3351 assert(_words_scanned >= _words_scanned_limit || 3352 _refs_reached >= _refs_reached_limit , 3353 "shouldn't have been called otherwise"); 3354 regular_clock_call(); 3355 } 3356 3357 void CMTask::regular_clock_call() { 3358 if (has_aborted()) return; 3359 3360 // First, we need to recalculate the words scanned and refs reached 3361 // limits for the next clock call. 3362 recalculate_limits(); 3363 3364 // During the regular clock call we do the following 3365 3366 // (1) If an overflow has been flagged, then we abort. 3367 if (_cm->has_overflown()) { 3368 set_has_aborted(); 3369 return; 3370 } 3371 3372 // If we are not concurrent (i.e. we're doing remark) we don't need 3373 // to check anything else. The other steps are only needed during 3374 // the concurrent marking phase. 3375 if (!concurrent()) return; 3376 3377 // (2) If marking has been aborted for Full GC, then we also abort. 3378 if (_cm->has_aborted()) { 3379 set_has_aborted(); 3380 statsOnly( ++_aborted_cm_aborted ); 3381 return; 3382 } 3383 3384 double curr_time_ms = os::elapsedVTime() * 1000.0; 3385 3386 // (3) If marking stats are enabled, then we update the step history. 3387 #if _MARKING_STATS_ 3388 if (_words_scanned >= _words_scanned_limit) { 3389 ++_clock_due_to_scanning; 3390 } 3391 if (_refs_reached >= _refs_reached_limit) { 3392 ++_clock_due_to_marking; 3393 } 3394 3395 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3396 _interval_start_time_ms = curr_time_ms; 3397 _all_clock_intervals_ms.add(last_interval_ms); 3398 3399 if (_cm->verbose_medium()) { 3400 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 3401 "scanned = %d%s, refs reached = %d%s", 3402 _task_id, last_interval_ms, 3403 _words_scanned, 3404 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3405 _refs_reached, 3406 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3407 } 3408 #endif // _MARKING_STATS_ 3409 3410 // (4) We check whether we should yield. If we have to, then we abort. 3411 if (_cm->should_yield()) { 3412 // We should yield. To do this we abort the task. The caller is 3413 // responsible for yielding. 3414 set_has_aborted(); 3415 statsOnly( ++_aborted_yield ); 3416 return; 3417 } 3418 3419 // (5) We check whether we've reached our time quota. If we have, 3420 // then we abort. 3421 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3422 if (elapsed_time_ms > _time_target_ms) { 3423 set_has_aborted(); 3424 _has_timed_out = true; 3425 statsOnly( ++_aborted_timed_out ); 3426 return; 3427 } 3428 3429 // (6) Finally, we check whether there are enough completed STAB 3430 // buffers available for processing. If there are, we abort. 3431 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3432 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3433 if (_cm->verbose_low()) { 3434 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 3435 _task_id); 3436 } 3437 // we do need to process SATB buffers, we'll abort and restart 3438 // the marking task to do so 3439 set_has_aborted(); 3440 statsOnly( ++_aborted_satb ); 3441 return; 3442 } 3443 } 3444 3445 void CMTask::recalculate_limits() { 3446 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3447 _words_scanned_limit = _real_words_scanned_limit; 3448 3449 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3450 _refs_reached_limit = _real_refs_reached_limit; 3451 } 3452 3453 void CMTask::decrease_limits() { 3454 // This is called when we believe that we're going to do an infrequent 3455 // operation which will increase the per byte scanned cost (i.e. move 3456 // entries to/from the global stack). It basically tries to decrease the 3457 // scanning limit so that the clock is called earlier. 3458 3459 if (_cm->verbose_medium()) { 3460 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 3461 } 3462 3463 _words_scanned_limit = _real_words_scanned_limit - 3464 3 * words_scanned_period / 4; 3465 _refs_reached_limit = _real_refs_reached_limit - 3466 3 * refs_reached_period / 4; 3467 } 3468 3469 void CMTask::move_entries_to_global_stack() { 3470 // local array where we'll store the entries that will be popped 3471 // from the local queue 3472 oop buffer[global_stack_transfer_size]; 3473 3474 int n = 0; 3475 oop obj; 3476 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3477 buffer[n] = obj; 3478 ++n; 3479 } 3480 3481 if (n > 0) { 3482 // we popped at least one entry from the local queue 3483 3484 statsOnly( ++_global_transfers_to; _local_pops += n ); 3485 3486 if (!_cm->mark_stack_push(buffer, n)) { 3487 if (_cm->verbose_low()) { 3488 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", 3489 _task_id); 3490 } 3491 set_has_aborted(); 3492 } else { 3493 // the transfer was successful 3494 3495 if (_cm->verbose_medium()) { 3496 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 3497 _task_id, n); 3498 } 3499 statsOnly( int tmp_size = _cm->mark_stack_size(); 3500 if (tmp_size > _global_max_size) { 3501 _global_max_size = tmp_size; 3502 } 3503 _global_pushes += n ); 3504 } 3505 } 3506 3507 // this operation was quite expensive, so decrease the limits 3508 decrease_limits(); 3509 } 3510 3511 void CMTask::get_entries_from_global_stack() { 3512 // local array where we'll store the entries that will be popped 3513 // from the global stack. 3514 oop buffer[global_stack_transfer_size]; 3515 int n; 3516 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3517 assert(n <= global_stack_transfer_size, 3518 "we should not pop more than the given limit"); 3519 if (n > 0) { 3520 // yes, we did actually pop at least one entry 3521 3522 statsOnly( ++_global_transfers_from; _global_pops += n ); 3523 if (_cm->verbose_medium()) { 3524 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 3525 _task_id, n); 3526 } 3527 for (int i = 0; i < n; ++i) { 3528 bool success = _task_queue->push(buffer[i]); 3529 // We only call this when the local queue is empty or under a 3530 // given target limit. So, we do not expect this push to fail. 3531 assert(success, "invariant"); 3532 } 3533 3534 statsOnly( int tmp_size = _task_queue->size(); 3535 if (tmp_size > _local_max_size) { 3536 _local_max_size = tmp_size; 3537 } 3538 _local_pushes += n ); 3539 } 3540 3541 // this operation was quite expensive, so decrease the limits 3542 decrease_limits(); 3543 } 3544 3545 void CMTask::drain_local_queue(bool partially) { 3546 if (has_aborted()) return; 3547 3548 // Decide what the target size is, depending whether we're going to 3549 // drain it partially (so that other tasks can steal if they run out 3550 // of things to do) or totally (at the very end). 3551 size_t target_size; 3552 if (partially) { 3553 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3554 } else { 3555 target_size = 0; 3556 } 3557 3558 if (_task_queue->size() > target_size) { 3559 if (_cm->verbose_high()) { 3560 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 3561 _task_id, target_size); 3562 } 3563 3564 oop obj; 3565 bool ret = _task_queue->pop_local(obj); 3566 while (ret) { 3567 statsOnly( ++_local_pops ); 3568 3569 if (_cm->verbose_high()) { 3570 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 3571 (void*) obj); 3572 } 3573 3574 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3575 assert(!_g1h->is_on_master_free_list( 3576 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3577 3578 scan_object(obj); 3579 3580 if (_task_queue->size() <= target_size || has_aborted()) { 3581 ret = false; 3582 } else { 3583 ret = _task_queue->pop_local(obj); 3584 } 3585 } 3586 3587 if (_cm->verbose_high()) { 3588 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 3589 _task_id, _task_queue->size()); 3590 } 3591 } 3592 } 3593 3594 void CMTask::drain_global_stack(bool partially) { 3595 if (has_aborted()) return; 3596 3597 // We have a policy to drain the local queue before we attempt to 3598 // drain the global stack. 3599 assert(partially || _task_queue->size() == 0, "invariant"); 3600 3601 // Decide what the target size is, depending whether we're going to 3602 // drain it partially (so that other tasks can steal if they run out 3603 // of things to do) or totally (at the very end). Notice that, 3604 // because we move entries from the global stack in chunks or 3605 // because another task might be doing the same, we might in fact 3606 // drop below the target. But, this is not a problem. 3607 size_t target_size; 3608 if (partially) { 3609 target_size = _cm->partial_mark_stack_size_target(); 3610 } else { 3611 target_size = 0; 3612 } 3613 3614 if (_cm->mark_stack_size() > target_size) { 3615 if (_cm->verbose_low()) { 3616 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 3617 _task_id, target_size); 3618 } 3619 3620 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3621 get_entries_from_global_stack(); 3622 drain_local_queue(partially); 3623 } 3624 3625 if (_cm->verbose_low()) { 3626 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 3627 _task_id, _cm->mark_stack_size()); 3628 } 3629 } 3630 } 3631 3632 // SATB Queue has several assumptions on whether to call the par or 3633 // non-par versions of the methods. this is why some of the code is 3634 // replicated. We should really get rid of the single-threaded version 3635 // of the code to simplify things. 3636 void CMTask::drain_satb_buffers() { 3637 if (has_aborted()) return; 3638 3639 // We set this so that the regular clock knows that we're in the 3640 // middle of draining buffers and doesn't set the abort flag when it 3641 // notices that SATB buffers are available for draining. It'd be 3642 // very counter productive if it did that. :-) 3643 _draining_satb_buffers = true; 3644 3645 CMObjectClosure oc(this); 3646 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3647 if (G1CollectedHeap::use_parallel_gc_threads()) { 3648 satb_mq_set.set_par_closure(_task_id, &oc); 3649 } else { 3650 satb_mq_set.set_closure(&oc); 3651 } 3652 3653 // This keeps claiming and applying the closure to completed buffers 3654 // until we run out of buffers or we need to abort. 3655 if (G1CollectedHeap::use_parallel_gc_threads()) { 3656 while (!has_aborted() && 3657 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 3658 if (_cm->verbose_medium()) { 3659 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3660 } 3661 statsOnly( ++_satb_buffers_processed ); 3662 regular_clock_call(); 3663 } 3664 } else { 3665 while (!has_aborted() && 3666 satb_mq_set.apply_closure_to_completed_buffer()) { 3667 if (_cm->verbose_medium()) { 3668 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3669 } 3670 statsOnly( ++_satb_buffers_processed ); 3671 regular_clock_call(); 3672 } 3673 } 3674 3675 if (!concurrent() && !has_aborted()) { 3676 // We should only do this during remark. 3677 if (G1CollectedHeap::use_parallel_gc_threads()) { 3678 satb_mq_set.par_iterate_closure_all_threads(_task_id); 3679 } else { 3680 satb_mq_set.iterate_closure_all_threads(); 3681 } 3682 } 3683 3684 _draining_satb_buffers = false; 3685 3686 assert(has_aborted() || 3687 concurrent() || 3688 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3689 3690 if (G1CollectedHeap::use_parallel_gc_threads()) { 3691 satb_mq_set.set_par_closure(_task_id, NULL); 3692 } else { 3693 satb_mq_set.set_closure(NULL); 3694 } 3695 3696 // again, this was a potentially expensive operation, decrease the 3697 // limits to get the regular clock call early 3698 decrease_limits(); 3699 } 3700 3701 void CMTask::print_stats() { 3702 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 3703 _task_id, _calls); 3704 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3705 _elapsed_time_ms, _termination_time_ms); 3706 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3707 _step_times_ms.num(), _step_times_ms.avg(), 3708 _step_times_ms.sd()); 3709 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3710 _step_times_ms.maximum(), _step_times_ms.sum()); 3711 3712 #if _MARKING_STATS_ 3713 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3714 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3715 _all_clock_intervals_ms.sd()); 3716 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3717 _all_clock_intervals_ms.maximum(), 3718 _all_clock_intervals_ms.sum()); 3719 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3720 _clock_due_to_scanning, _clock_due_to_marking); 3721 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3722 _objs_scanned, _objs_found_on_bitmap); 3723 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3724 _local_pushes, _local_pops, _local_max_size); 3725 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3726 _global_pushes, _global_pops, _global_max_size); 3727 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3728 _global_transfers_to,_global_transfers_from); 3729 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 3730 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3731 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3732 _steal_attempts, _steals); 3733 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3734 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3735 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3736 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3737 _aborted_timed_out, _aborted_satb, _aborted_termination); 3738 #endif // _MARKING_STATS_ 3739 } 3740 3741 /***************************************************************************** 3742 3743 The do_marking_step(time_target_ms) method is the building block 3744 of the parallel marking framework. It can be called in parallel 3745 with other invocations of do_marking_step() on different tasks 3746 (but only one per task, obviously) and concurrently with the 3747 mutator threads, or during remark, hence it eliminates the need 3748 for two versions of the code. When called during remark, it will 3749 pick up from where the task left off during the concurrent marking 3750 phase. Interestingly, tasks are also claimable during evacuation 3751 pauses too, since do_marking_step() ensures that it aborts before 3752 it needs to yield. 3753 3754 The data structures that is uses to do marking work are the 3755 following: 3756 3757 (1) Marking Bitmap. If there are gray objects that appear only 3758 on the bitmap (this happens either when dealing with an overflow 3759 or when the initial marking phase has simply marked the roots 3760 and didn't push them on the stack), then tasks claim heap 3761 regions whose bitmap they then scan to find gray objects. A 3762 global finger indicates where the end of the last claimed region 3763 is. A local finger indicates how far into the region a task has 3764 scanned. The two fingers are used to determine how to gray an 3765 object (i.e. whether simply marking it is OK, as it will be 3766 visited by a task in the future, or whether it needs to be also 3767 pushed on a stack). 3768 3769 (2) Local Queue. The local queue of the task which is accessed 3770 reasonably efficiently by the task. Other tasks can steal from 3771 it when they run out of work. Throughout the marking phase, a 3772 task attempts to keep its local queue short but not totally 3773 empty, so that entries are available for stealing by other 3774 tasks. Only when there is no more work, a task will totally 3775 drain its local queue. 3776 3777 (3) Global Mark Stack. This handles local queue overflow. During 3778 marking only sets of entries are moved between it and the local 3779 queues, as access to it requires a mutex and more fine-grain 3780 interaction with it which might cause contention. If it 3781 overflows, then the marking phase should restart and iterate 3782 over the bitmap to identify gray objects. Throughout the marking 3783 phase, tasks attempt to keep the global mark stack at a small 3784 length but not totally empty, so that entries are available for 3785 popping by other tasks. Only when there is no more work, tasks 3786 will totally drain the global mark stack. 3787 3788 (4) SATB Buffer Queue. This is where completed SATB buffers are 3789 made available. Buffers are regularly removed from this queue 3790 and scanned for roots, so that the queue doesn't get too 3791 long. During remark, all completed buffers are processed, as 3792 well as the filled in parts of any uncompleted buffers. 3793 3794 The do_marking_step() method tries to abort when the time target 3795 has been reached. There are a few other cases when the 3796 do_marking_step() method also aborts: 3797 3798 (1) When the marking phase has been aborted (after a Full GC). 3799 3800 (2) When a global overflow (on the global stack) has been 3801 triggered. Before the task aborts, it will actually sync up with 3802 the other tasks to ensure that all the marking data structures 3803 (local queues, stacks, fingers etc.) are re-initialised so that 3804 when do_marking_step() completes, the marking phase can 3805 immediately restart. 3806 3807 (3) When enough completed SATB buffers are available. The 3808 do_marking_step() method only tries to drain SATB buffers right 3809 at the beginning. So, if enough buffers are available, the 3810 marking step aborts and the SATB buffers are processed at 3811 the beginning of the next invocation. 3812 3813 (4) To yield. when we have to yield then we abort and yield 3814 right at the end of do_marking_step(). This saves us from a lot 3815 of hassle as, by yielding we might allow a Full GC. If this 3816 happens then objects will be compacted underneath our feet, the 3817 heap might shrink, etc. We save checking for this by just 3818 aborting and doing the yield right at the end. 3819 3820 From the above it follows that the do_marking_step() method should 3821 be called in a loop (or, otherwise, regularly) until it completes. 3822 3823 If a marking step completes without its has_aborted() flag being 3824 true, it means it has completed the current marking phase (and 3825 also all other marking tasks have done so and have all synced up). 3826 3827 A method called regular_clock_call() is invoked "regularly" (in 3828 sub ms intervals) throughout marking. It is this clock method that 3829 checks all the abort conditions which were mentioned above and 3830 decides when the task should abort. A work-based scheme is used to 3831 trigger this clock method: when the number of object words the 3832 marking phase has scanned or the number of references the marking 3833 phase has visited reach a given limit. Additional invocations to 3834 the method clock have been planted in a few other strategic places 3835 too. The initial reason for the clock method was to avoid calling 3836 vtime too regularly, as it is quite expensive. So, once it was in 3837 place, it was natural to piggy-back all the other conditions on it 3838 too and not constantly check them throughout the code. 3839 3840 *****************************************************************************/ 3841 3842 void CMTask::do_marking_step(double time_target_ms, 3843 bool do_stealing, 3844 bool do_termination) { 3845 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3846 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3847 3848 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3849 assert(_task_queues != NULL, "invariant"); 3850 assert(_task_queue != NULL, "invariant"); 3851 assert(_task_queues->queue(_task_id) == _task_queue, "invariant"); 3852 3853 assert(!_claimed, 3854 "only one thread should claim this task at any one time"); 3855 3856 // OK, this doesn't safeguard again all possible scenarios, as it is 3857 // possible for two threads to set the _claimed flag at the same 3858 // time. But it is only for debugging purposes anyway and it will 3859 // catch most problems. 3860 _claimed = true; 3861 3862 _start_time_ms = os::elapsedVTime() * 1000.0; 3863 statsOnly( _interval_start_time_ms = _start_time_ms ); 3864 3865 double diff_prediction_ms = 3866 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 3867 _time_target_ms = time_target_ms - diff_prediction_ms; 3868 3869 // set up the variables that are used in the work-based scheme to 3870 // call the regular clock method 3871 _words_scanned = 0; 3872 _refs_reached = 0; 3873 recalculate_limits(); 3874 3875 // clear all flags 3876 clear_has_aborted(); 3877 _has_timed_out = false; 3878 _draining_satb_buffers = false; 3879 3880 ++_calls; 3881 3882 if (_cm->verbose_low()) { 3883 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 3884 "target = %1.2lfms >>>>>>>>>>", 3885 _task_id, _calls, _time_target_ms); 3886 } 3887 3888 // Set up the bitmap and oop closures. Anything that uses them is 3889 // eventually called from this method, so it is OK to allocate these 3890 // statically. 3891 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3892 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3893 set_cm_oop_closure(&cm_oop_closure); 3894 3895 if (_cm->has_overflown()) { 3896 // This can happen if the mark stack overflows during a GC pause 3897 // and this task, after a yield point, restarts. We have to abort 3898 // as we need to get into the overflow protocol which happens 3899 // right at the end of this task. 3900 set_has_aborted(); 3901 } 3902 3903 // First drain any available SATB buffers. After this, we will not 3904 // look at SATB buffers before the next invocation of this method. 3905 // If enough completed SATB buffers are queued up, the regular clock 3906 // will abort this task so that it restarts. 3907 drain_satb_buffers(); 3908 // ...then partially drain the local queue and the global stack 3909 drain_local_queue(true); 3910 drain_global_stack(true); 3911 3912 do { 3913 if (!has_aborted() && _curr_region != NULL) { 3914 // This means that we're already holding on to a region. 3915 assert(_finger != NULL, "if region is not NULL, then the finger " 3916 "should not be NULL either"); 3917 3918 // We might have restarted this task after an evacuation pause 3919 // which might have evacuated the region we're holding on to 3920 // underneath our feet. Let's read its limit again to make sure 3921 // that we do not iterate over a region of the heap that 3922 // contains garbage (update_region_limit() will also move 3923 // _finger to the start of the region if it is found empty). 3924 update_region_limit(); 3925 // We will start from _finger not from the start of the region, 3926 // as we might be restarting this task after aborting half-way 3927 // through scanning this region. In this case, _finger points to 3928 // the address where we last found a marked object. If this is a 3929 // fresh region, _finger points to start(). 3930 MemRegion mr = MemRegion(_finger, _region_limit); 3931 3932 if (_cm->verbose_low()) { 3933 gclog_or_tty->print_cr("[%d] we're scanning part " 3934 "["PTR_FORMAT", "PTR_FORMAT") " 3935 "of region "PTR_FORMAT, 3936 _task_id, _finger, _region_limit, _curr_region); 3937 } 3938 3939 // Let's iterate over the bitmap of the part of the 3940 // region that is left. 3941 if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3942 // We successfully completed iterating over the region. Now, 3943 // let's give up the region. 3944 giveup_current_region(); 3945 regular_clock_call(); 3946 } else { 3947 assert(has_aborted(), "currently the only way to do so"); 3948 // The only way to abort the bitmap iteration is to return 3949 // false from the do_bit() method. However, inside the 3950 // do_bit() method we move the _finger to point to the 3951 // object currently being looked at. So, if we bail out, we 3952 // have definitely set _finger to something non-null. 3953 assert(_finger != NULL, "invariant"); 3954 3955 // Region iteration was actually aborted. So now _finger 3956 // points to the address of the object we last scanned. If we 3957 // leave it there, when we restart this task, we will rescan 3958 // the object. It is easy to avoid this. We move the finger by 3959 // enough to point to the next possible object header (the 3960 // bitmap knows by how much we need to move it as it knows its 3961 // granularity). 3962 assert(_finger < _region_limit, "invariant"); 3963 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 3964 // Check if bitmap iteration was aborted while scanning the last object 3965 if (new_finger >= _region_limit) { 3966 giveup_current_region(); 3967 } else { 3968 move_finger_to(new_finger); 3969 } 3970 } 3971 } 3972 // At this point we have either completed iterating over the 3973 // region we were holding on to, or we have aborted. 3974 3975 // We then partially drain the local queue and the global stack. 3976 // (Do we really need this?) 3977 drain_local_queue(true); 3978 drain_global_stack(true); 3979 3980 // Read the note on the claim_region() method on why it might 3981 // return NULL with potentially more regions available for 3982 // claiming and why we have to check out_of_regions() to determine 3983 // whether we're done or not. 3984 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 3985 // We are going to try to claim a new region. We should have 3986 // given up on the previous one. 3987 // Separated the asserts so that we know which one fires. 3988 assert(_curr_region == NULL, "invariant"); 3989 assert(_finger == NULL, "invariant"); 3990 assert(_region_limit == NULL, "invariant"); 3991 if (_cm->verbose_low()) { 3992 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 3993 } 3994 HeapRegion* claimed_region = _cm->claim_region(_task_id); 3995 if (claimed_region != NULL) { 3996 // Yes, we managed to claim one 3997 statsOnly( ++_regions_claimed ); 3998 3999 if (_cm->verbose_low()) { 4000 gclog_or_tty->print_cr("[%d] we successfully claimed " 4001 "region "PTR_FORMAT, 4002 _task_id, claimed_region); 4003 } 4004 4005 setup_for_region(claimed_region); 4006 assert(_curr_region == claimed_region, "invariant"); 4007 } 4008 // It is important to call the regular clock here. It might take 4009 // a while to claim a region if, for example, we hit a large 4010 // block of empty regions. So we need to call the regular clock 4011 // method once round the loop to make sure it's called 4012 // frequently enough. 4013 regular_clock_call(); 4014 } 4015 4016 if (!has_aborted() && _curr_region == NULL) { 4017 assert(_cm->out_of_regions(), 4018 "at this point we should be out of regions"); 4019 } 4020 } while ( _curr_region != NULL && !has_aborted()); 4021 4022 if (!has_aborted()) { 4023 // We cannot check whether the global stack is empty, since other 4024 // tasks might be pushing objects to it concurrently. 4025 assert(_cm->out_of_regions(), 4026 "at this point we should be out of regions"); 4027 4028 if (_cm->verbose_low()) { 4029 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 4030 } 4031 4032 // Try to reduce the number of available SATB buffers so that 4033 // remark has less work to do. 4034 drain_satb_buffers(); 4035 } 4036 4037 // Since we've done everything else, we can now totally drain the 4038 // local queue and global stack. 4039 drain_local_queue(false); 4040 drain_global_stack(false); 4041 4042 // Attempt at work stealing from other task's queues. 4043 if (do_stealing && !has_aborted()) { 4044 // We have not aborted. This means that we have finished all that 4045 // we could. Let's try to do some stealing... 4046 4047 // We cannot check whether the global stack is empty, since other 4048 // tasks might be pushing objects to it concurrently. 4049 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4050 "only way to reach here"); 4051 4052 if (_cm->verbose_low()) { 4053 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 4054 } 4055 4056 while (!has_aborted()) { 4057 oop obj; 4058 statsOnly( ++_steal_attempts ); 4059 4060 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 4061 if (_cm->verbose_medium()) { 4062 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 4063 _task_id, (void*) obj); 4064 } 4065 4066 statsOnly( ++_steals ); 4067 4068 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4069 "any stolen object should be marked"); 4070 scan_object(obj); 4071 4072 // And since we're towards the end, let's totally drain the 4073 // local queue and global stack. 4074 drain_local_queue(false); 4075 drain_global_stack(false); 4076 } else { 4077 break; 4078 } 4079 } 4080 } 4081 4082 // If we are about to wrap up and go into termination, check if we 4083 // should raise the overflow flag. 4084 if (do_termination && !has_aborted()) { 4085 if (_cm->force_overflow()->should_force()) { 4086 _cm->set_has_overflown(); 4087 regular_clock_call(); 4088 } 4089 } 4090 4091 // We still haven't aborted. Now, let's try to get into the 4092 // termination protocol. 4093 if (do_termination && !has_aborted()) { 4094 // We cannot check whether the global stack is empty, since other 4095 // tasks might be concurrently pushing objects on it. 4096 // Separated the asserts so that we know which one fires. 4097 assert(_cm->out_of_regions(), "only way to reach here"); 4098 assert(_task_queue->size() == 0, "only way to reach here"); 4099 4100 if (_cm->verbose_low()) { 4101 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 4102 } 4103 4104 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4105 // The CMTask class also extends the TerminatorTerminator class, 4106 // hence its should_exit_termination() method will also decide 4107 // whether to exit the termination protocol or not. 4108 bool finished = _cm->terminator()->offer_termination(this); 4109 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4110 _termination_time_ms += 4111 termination_end_time_ms - _termination_start_time_ms; 4112 4113 if (finished) { 4114 // We're all done. 4115 4116 if (_task_id == 0) { 4117 // let's allow task 0 to do this 4118 if (concurrent()) { 4119 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4120 // we need to set this to false before the next 4121 // safepoint. This way we ensure that the marking phase 4122 // doesn't observe any more heap expansions. 4123 _cm->clear_concurrent_marking_in_progress(); 4124 } 4125 } 4126 4127 // We can now guarantee that the global stack is empty, since 4128 // all other tasks have finished. We separated the guarantees so 4129 // that, if a condition is false, we can immediately find out 4130 // which one. 4131 guarantee(_cm->out_of_regions(), "only way to reach here"); 4132 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4133 guarantee(_task_queue->size() == 0, "only way to reach here"); 4134 guarantee(!_cm->has_overflown(), "only way to reach here"); 4135 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4136 4137 if (_cm->verbose_low()) { 4138 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 4139 } 4140 } else { 4141 // Apparently there's more work to do. Let's abort this task. It 4142 // will restart it and we can hopefully find more things to do. 4143 4144 if (_cm->verbose_low()) { 4145 gclog_or_tty->print_cr("[%d] apparently there is more work to do", 4146 _task_id); 4147 } 4148 4149 set_has_aborted(); 4150 statsOnly( ++_aborted_termination ); 4151 } 4152 } 4153 4154 // Mainly for debugging purposes to make sure that a pointer to the 4155 // closure which was statically allocated in this frame doesn't 4156 // escape it by accident. 4157 set_cm_oop_closure(NULL); 4158 double end_time_ms = os::elapsedVTime() * 1000.0; 4159 double elapsed_time_ms = end_time_ms - _start_time_ms; 4160 // Update the step history. 4161 _step_times_ms.add(elapsed_time_ms); 4162 4163 if (has_aborted()) { 4164 // The task was aborted for some reason. 4165 4166 statsOnly( ++_aborted ); 4167 4168 if (_has_timed_out) { 4169 double diff_ms = elapsed_time_ms - _time_target_ms; 4170 // Keep statistics of how well we did with respect to hitting 4171 // our target only if we actually timed out (if we aborted for 4172 // other reasons, then the results might get skewed). 4173 _marking_step_diffs_ms.add(diff_ms); 4174 } 4175 4176 if (_cm->has_overflown()) { 4177 // This is the interesting one. We aborted because a global 4178 // overflow was raised. This means we have to restart the 4179 // marking phase and start iterating over regions. However, in 4180 // order to do this we have to make sure that all tasks stop 4181 // what they are doing and re-initialise in a safe manner. We 4182 // will achieve this with the use of two barrier sync points. 4183 4184 if (_cm->verbose_low()) { 4185 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 4186 } 4187 4188 _cm->enter_first_sync_barrier(_task_id); 4189 // When we exit this sync barrier we know that all tasks have 4190 // stopped doing marking work. So, it's now safe to 4191 // re-initialise our data structures. At the end of this method, 4192 // task 0 will clear the global data structures. 4193 4194 statsOnly( ++_aborted_overflow ); 4195 4196 // We clear the local state of this task... 4197 clear_region_fields(); 4198 4199 // ...and enter the second barrier. 4200 _cm->enter_second_sync_barrier(_task_id); 4201 // At this point everything has bee re-initialised and we're 4202 // ready to restart. 4203 } 4204 4205 if (_cm->verbose_low()) { 4206 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4207 "elapsed = %1.2lfms <<<<<<<<<<", 4208 _task_id, _time_target_ms, elapsed_time_ms); 4209 if (_cm->has_aborted()) { 4210 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 4211 _task_id); 4212 } 4213 } 4214 } else { 4215 if (_cm->verbose_low()) { 4216 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4217 "elapsed = %1.2lfms <<<<<<<<<<", 4218 _task_id, _time_target_ms, elapsed_time_ms); 4219 } 4220 } 4221 4222 _claimed = false; 4223 } 4224 4225 CMTask::CMTask(int task_id, 4226 ConcurrentMark* cm, 4227 size_t* marked_bytes, 4228 BitMap* card_bm, 4229 CMTaskQueue* task_queue, 4230 CMTaskQueueSet* task_queues) 4231 : _g1h(G1CollectedHeap::heap()), 4232 _task_id(task_id), _cm(cm), 4233 _claimed(false), 4234 _nextMarkBitMap(NULL), _hash_seed(17), 4235 _task_queue(task_queue), 4236 _task_queues(task_queues), 4237 _cm_oop_closure(NULL), 4238 _marked_bytes_array(marked_bytes), 4239 _card_bm(card_bm) { 4240 guarantee(task_queue != NULL, "invariant"); 4241 guarantee(task_queues != NULL, "invariant"); 4242 4243 statsOnly( _clock_due_to_scanning = 0; 4244 _clock_due_to_marking = 0 ); 4245 4246 _marking_step_diffs_ms.add(0.5); 4247 } 4248 4249 // These are formatting macros that are used below to ensure 4250 // consistent formatting. The *_H_* versions are used to format the 4251 // header for a particular value and they should be kept consistent 4252 // with the corresponding macro. Also note that most of the macros add 4253 // the necessary white space (as a prefix) which makes them a bit 4254 // easier to compose. 4255 4256 // All the output lines are prefixed with this string to be able to 4257 // identify them easily in a large log file. 4258 #define G1PPRL_LINE_PREFIX "###" 4259 4260 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4261 #ifdef _LP64 4262 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4263 #else // _LP64 4264 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4265 #endif // _LP64 4266 4267 // For per-region info 4268 #define G1PPRL_TYPE_FORMAT " %-4s" 4269 #define G1PPRL_TYPE_H_FORMAT " %4s" 4270 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4271 #define G1PPRL_BYTE_H_FORMAT " %9s" 4272 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4273 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4274 4275 // For summary info 4276 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4277 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4278 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4279 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4280 4281 G1PrintRegionLivenessInfoClosure:: 4282 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4283 : _out(out), 4284 _total_used_bytes(0), _total_capacity_bytes(0), 4285 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4286 _hum_used_bytes(0), _hum_capacity_bytes(0), 4287 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) { 4288 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4289 MemRegion g1_committed = g1h->g1_committed(); 4290 MemRegion g1_reserved = g1h->g1_reserved(); 4291 double now = os::elapsedTime(); 4292 4293 // Print the header of the output. 4294 _out->cr(); 4295 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4296 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4297 G1PPRL_SUM_ADDR_FORMAT("committed") 4298 G1PPRL_SUM_ADDR_FORMAT("reserved") 4299 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4300 g1_committed.start(), g1_committed.end(), 4301 g1_reserved.start(), g1_reserved.end(), 4302 HeapRegion::GrainBytes); 4303 _out->print_cr(G1PPRL_LINE_PREFIX); 4304 _out->print_cr(G1PPRL_LINE_PREFIX 4305 G1PPRL_TYPE_H_FORMAT 4306 G1PPRL_ADDR_BASE_H_FORMAT 4307 G1PPRL_BYTE_H_FORMAT 4308 G1PPRL_BYTE_H_FORMAT 4309 G1PPRL_BYTE_H_FORMAT 4310 G1PPRL_DOUBLE_H_FORMAT, 4311 "type", "address-range", 4312 "used", "prev-live", "next-live", "gc-eff"); 4313 _out->print_cr(G1PPRL_LINE_PREFIX 4314 G1PPRL_TYPE_H_FORMAT 4315 G1PPRL_ADDR_BASE_H_FORMAT 4316 G1PPRL_BYTE_H_FORMAT 4317 G1PPRL_BYTE_H_FORMAT 4318 G1PPRL_BYTE_H_FORMAT 4319 G1PPRL_DOUBLE_H_FORMAT, 4320 "", "", 4321 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)"); 4322 } 4323 4324 // It takes as a parameter a reference to one of the _hum_* fields, it 4325 // deduces the corresponding value for a region in a humongous region 4326 // series (either the region size, or what's left if the _hum_* field 4327 // is < the region size), and updates the _hum_* field accordingly. 4328 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4329 size_t bytes = 0; 4330 // The > 0 check is to deal with the prev and next live bytes which 4331 // could be 0. 4332 if (*hum_bytes > 0) { 4333 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4334 *hum_bytes -= bytes; 4335 } 4336 return bytes; 4337 } 4338 4339 // It deduces the values for a region in a humongous region series 4340 // from the _hum_* fields and updates those accordingly. It assumes 4341 // that that _hum_* fields have already been set up from the "starts 4342 // humongous" region and we visit the regions in address order. 4343 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4344 size_t* capacity_bytes, 4345 size_t* prev_live_bytes, 4346 size_t* next_live_bytes) { 4347 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4348 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4349 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4350 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4351 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4352 } 4353 4354 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4355 const char* type = ""; 4356 HeapWord* bottom = r->bottom(); 4357 HeapWord* end = r->end(); 4358 size_t capacity_bytes = r->capacity(); 4359 size_t used_bytes = r->used(); 4360 size_t prev_live_bytes = r->live_bytes(); 4361 size_t next_live_bytes = r->next_live_bytes(); 4362 double gc_eff = r->gc_efficiency(); 4363 if (r->used() == 0) { 4364 type = "FREE"; 4365 } else if (r->is_survivor()) { 4366 type = "SURV"; 4367 } else if (r->is_young()) { 4368 type = "EDEN"; 4369 } else if (r->startsHumongous()) { 4370 type = "HUMS"; 4371 4372 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4373 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4374 "they should have been zeroed after the last time we used them"); 4375 // Set up the _hum_* fields. 4376 _hum_capacity_bytes = capacity_bytes; 4377 _hum_used_bytes = used_bytes; 4378 _hum_prev_live_bytes = prev_live_bytes; 4379 _hum_next_live_bytes = next_live_bytes; 4380 get_hum_bytes(&used_bytes, &capacity_bytes, 4381 &prev_live_bytes, &next_live_bytes); 4382 end = bottom + HeapRegion::GrainWords; 4383 } else if (r->continuesHumongous()) { 4384 type = "HUMC"; 4385 get_hum_bytes(&used_bytes, &capacity_bytes, 4386 &prev_live_bytes, &next_live_bytes); 4387 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4388 } else { 4389 type = "OLD"; 4390 } 4391 4392 _total_used_bytes += used_bytes; 4393 _total_capacity_bytes += capacity_bytes; 4394 _total_prev_live_bytes += prev_live_bytes; 4395 _total_next_live_bytes += next_live_bytes; 4396 4397 // Print a line for this particular region. 4398 _out->print_cr(G1PPRL_LINE_PREFIX 4399 G1PPRL_TYPE_FORMAT 4400 G1PPRL_ADDR_BASE_FORMAT 4401 G1PPRL_BYTE_FORMAT 4402 G1PPRL_BYTE_FORMAT 4403 G1PPRL_BYTE_FORMAT 4404 G1PPRL_DOUBLE_FORMAT, 4405 type, bottom, end, 4406 used_bytes, prev_live_bytes, next_live_bytes, gc_eff); 4407 4408 return false; 4409 } 4410 4411 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4412 // Print the footer of the output. 4413 _out->print_cr(G1PPRL_LINE_PREFIX); 4414 _out->print_cr(G1PPRL_LINE_PREFIX 4415 " SUMMARY" 4416 G1PPRL_SUM_MB_FORMAT("capacity") 4417 G1PPRL_SUM_MB_PERC_FORMAT("used") 4418 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4419 G1PPRL_SUM_MB_PERC_FORMAT("next-live"), 4420 bytes_to_mb(_total_capacity_bytes), 4421 bytes_to_mb(_total_used_bytes), 4422 perc(_total_used_bytes, _total_capacity_bytes), 4423 bytes_to_mb(_total_prev_live_bytes), 4424 perc(_total_prev_live_bytes, _total_capacity_bytes), 4425 bytes_to_mb(_total_next_live_bytes), 4426 perc(_total_next_live_bytes, _total_capacity_bytes)); 4427 _out->cr(); 4428 }