1 /* 2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 32 #include "gc_implementation/g1/g1Log.hpp" 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 34 #include "gc_implementation/g1/g1RemSet.hpp" 35 #include "gc_implementation/g1/heapRegion.inline.hpp" 36 #include "gc_implementation/g1/heapRegionRemSet.hpp" 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 38 #include "gc_implementation/shared/vmGCOperations.hpp" 39 #include "gc_implementation/shared/gcTimer.hpp" 40 #include "gc_implementation/shared/gcTrace.hpp" 41 #include "gc_implementation/shared/gcTraceTime.hpp" 42 #include "memory/genOopClosures.inline.hpp" 43 #include "memory/referencePolicy.hpp" 44 #include "memory/resourceArea.hpp" 45 #include "oops/oop.inline.hpp" 46 #include "runtime/handles.inline.hpp" 47 #include "runtime/java.hpp" 48 #include "services/memTracker.hpp" 49 50 // Concurrent marking bit map wrapper 51 52 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) : 53 _bm((uintptr_t*)NULL,0), 54 _shifter(shifter) { 55 _bmStartWord = (HeapWord*)(rs.base()); 56 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 57 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 58 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 59 60 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC); 61 62 guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map"); 63 // For now we'll just commit all of the bit map up fromt. 64 // Later on we'll try to be more parsimonious with swap. 65 guarantee(_virtual_space.initialize(brs, brs.size()), 66 "couldn't reseve backing store for concurrent marking bit map"); 67 assert(_virtual_space.committed_size() == brs.size(), 68 "didn't reserve backing store for all of concurrent marking bit map?"); 69 _bm.set_map((uintptr_t*)_virtual_space.low()); 70 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 71 _bmWordSize, "inconsistency in bit map sizing"); 72 _bm.set_size(_bmWordSize >> _shifter); 73 } 74 75 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 76 HeapWord* limit) const { 77 // First we must round addr *up* to a possible object boundary. 78 addr = (HeapWord*)align_size_up((intptr_t)addr, 79 HeapWordSize << _shifter); 80 size_t addrOffset = heapWordToOffset(addr); 81 if (limit == NULL) { 82 limit = _bmStartWord + _bmWordSize; 83 } 84 size_t limitOffset = heapWordToOffset(limit); 85 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 86 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 87 assert(nextAddr >= addr, "get_next_one postcondition"); 88 assert(nextAddr == limit || isMarked(nextAddr), 89 "get_next_one postcondition"); 90 return nextAddr; 91 } 92 93 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 94 HeapWord* limit) const { 95 size_t addrOffset = heapWordToOffset(addr); 96 if (limit == NULL) { 97 limit = _bmStartWord + _bmWordSize; 98 } 99 size_t limitOffset = heapWordToOffset(limit); 100 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 101 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 102 assert(nextAddr >= addr, "get_next_one postcondition"); 103 assert(nextAddr == limit || !isMarked(nextAddr), 104 "get_next_one postcondition"); 105 return nextAddr; 106 } 107 108 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 109 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 110 return (int) (diff >> _shifter); 111 } 112 113 #ifndef PRODUCT 114 bool CMBitMapRO::covers(ReservedSpace rs) const { 115 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 116 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 117 "size inconsistency"); 118 return _bmStartWord == (HeapWord*)(rs.base()) && 119 _bmWordSize == rs.size()>>LogHeapWordSize; 120 } 121 #endif 122 123 void CMBitMap::clearAll() { 124 _bm.clear(); 125 return; 126 } 127 128 void CMBitMap::markRange(MemRegion mr) { 129 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 130 assert(!mr.is_empty(), "unexpected empty region"); 131 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 132 ((HeapWord *) mr.end())), 133 "markRange memory region end is not card aligned"); 134 // convert address range into offset range 135 _bm.at_put_range(heapWordToOffset(mr.start()), 136 heapWordToOffset(mr.end()), true); 137 } 138 139 void CMBitMap::clearRange(MemRegion mr) { 140 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 141 assert(!mr.is_empty(), "unexpected empty region"); 142 // convert address range into offset range 143 _bm.at_put_range(heapWordToOffset(mr.start()), 144 heapWordToOffset(mr.end()), false); 145 } 146 147 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 148 HeapWord* end_addr) { 149 HeapWord* start = getNextMarkedWordAddress(addr); 150 start = MIN2(start, end_addr); 151 HeapWord* end = getNextUnmarkedWordAddress(start); 152 end = MIN2(end, end_addr); 153 assert(start <= end, "Consistency check"); 154 MemRegion mr(start, end); 155 if (!mr.is_empty()) { 156 clearRange(mr); 157 } 158 return mr; 159 } 160 161 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 162 _base(NULL), _cm(cm) 163 #ifdef ASSERT 164 , _drain_in_progress(false) 165 , _drain_in_progress_yields(false) 166 #endif 167 {} 168 169 void CMMarkStack::allocate(size_t size) { 170 _base = NEW_C_HEAP_ARRAY(oop, size, mtGC); 171 if (_base == NULL) { 172 vm_exit_during_initialization("Failed to allocate CM region mark stack"); 173 } 174 _index = 0; 175 _capacity = (jint) size; 176 _saved_index = -1; 177 NOT_PRODUCT(_max_depth = 0); 178 } 179 180 CMMarkStack::~CMMarkStack() { 181 if (_base != NULL) { 182 FREE_C_HEAP_ARRAY(oop, _base, mtGC); 183 } 184 } 185 186 void CMMarkStack::par_push(oop ptr) { 187 while (true) { 188 if (isFull()) { 189 _overflow = true; 190 return; 191 } 192 // Otherwise... 193 jint index = _index; 194 jint next_index = index+1; 195 jint res = Atomic::cmpxchg(next_index, &_index, index); 196 if (res == index) { 197 _base[index] = ptr; 198 // Note that we don't maintain this atomically. We could, but it 199 // doesn't seem necessary. 200 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 201 return; 202 } 203 // Otherwise, we need to try again. 204 } 205 } 206 207 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 208 while (true) { 209 if (isFull()) { 210 _overflow = true; 211 return; 212 } 213 // Otherwise... 214 jint index = _index; 215 jint next_index = index + n; 216 if (next_index > _capacity) { 217 _overflow = true; 218 return; 219 } 220 jint res = Atomic::cmpxchg(next_index, &_index, index); 221 if (res == index) { 222 for (int i = 0; i < n; i++) { 223 int ind = index + i; 224 assert(ind < _capacity, "By overflow test above."); 225 _base[ind] = ptr_arr[i]; 226 } 227 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 228 return; 229 } 230 // Otherwise, we need to try again. 231 } 232 } 233 234 235 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 236 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 237 jint start = _index; 238 jint next_index = start + n; 239 if (next_index > _capacity) { 240 _overflow = true; 241 return; 242 } 243 // Otherwise. 244 _index = next_index; 245 for (int i = 0; i < n; i++) { 246 int ind = start + i; 247 assert(ind < _capacity, "By overflow test above."); 248 _base[ind] = ptr_arr[i]; 249 } 250 } 251 252 253 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 254 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 255 jint index = _index; 256 if (index == 0) { 257 *n = 0; 258 return false; 259 } else { 260 int k = MIN2(max, index); 261 jint new_ind = index - k; 262 for (int j = 0; j < k; j++) { 263 ptr_arr[j] = _base[new_ind + j]; 264 } 265 _index = new_ind; 266 *n = k; 267 return true; 268 } 269 } 270 271 template<class OopClosureClass> 272 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 273 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 274 || SafepointSynchronize::is_at_safepoint(), 275 "Drain recursion must be yield-safe."); 276 bool res = true; 277 debug_only(_drain_in_progress = true); 278 debug_only(_drain_in_progress_yields = yield_after); 279 while (!isEmpty()) { 280 oop newOop = pop(); 281 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 282 assert(newOop->is_oop(), "Expected an oop"); 283 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 284 "only grey objects on this stack"); 285 newOop->oop_iterate(cl); 286 if (yield_after && _cm->do_yield_check()) { 287 res = false; 288 break; 289 } 290 } 291 debug_only(_drain_in_progress = false); 292 return res; 293 } 294 295 void CMMarkStack::note_start_of_gc() { 296 assert(_saved_index == -1, 297 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 298 _saved_index = _index; 299 } 300 301 void CMMarkStack::note_end_of_gc() { 302 // This is intentionally a guarantee, instead of an assert. If we 303 // accidentally add something to the mark stack during GC, it 304 // will be a correctness issue so it's better if we crash. we'll 305 // only check this once per GC anyway, so it won't be a performance 306 // issue in any way. 307 guarantee(_saved_index == _index, 308 err_msg("saved index: %d index: %d", _saved_index, _index)); 309 _saved_index = -1; 310 } 311 312 void CMMarkStack::oops_do(OopClosure* f) { 313 assert(_saved_index == _index, 314 err_msg("saved index: %d index: %d", _saved_index, _index)); 315 for (int i = 0; i < _index; i += 1) { 316 f->do_oop(&_base[i]); 317 } 318 } 319 320 bool ConcurrentMark::not_yet_marked(oop obj) const { 321 return (_g1h->is_obj_ill(obj) 322 || (_g1h->is_in_permanent(obj) 323 && !nextMarkBitMap()->isMarked((HeapWord*)obj))); 324 } 325 326 CMRootRegions::CMRootRegions() : 327 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 328 _should_abort(false), _next_survivor(NULL) { } 329 330 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 331 _young_list = g1h->young_list(); 332 _cm = cm; 333 } 334 335 void CMRootRegions::prepare_for_scan() { 336 assert(!scan_in_progress(), "pre-condition"); 337 338 // Currently, only survivors can be root regions. 339 assert(_next_survivor == NULL, "pre-condition"); 340 _next_survivor = _young_list->first_survivor_region(); 341 _scan_in_progress = (_next_survivor != NULL); 342 _should_abort = false; 343 } 344 345 HeapRegion* CMRootRegions::claim_next() { 346 if (_should_abort) { 347 // If someone has set the should_abort flag, we return NULL to 348 // force the caller to bail out of their loop. 349 return NULL; 350 } 351 352 // Currently, only survivors can be root regions. 353 HeapRegion* res = _next_survivor; 354 if (res != NULL) { 355 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 356 // Read it again in case it changed while we were waiting for the lock. 357 res = _next_survivor; 358 if (res != NULL) { 359 if (res == _young_list->last_survivor_region()) { 360 // We just claimed the last survivor so store NULL to indicate 361 // that we're done. 362 _next_survivor = NULL; 363 } else { 364 _next_survivor = res->get_next_young_region(); 365 } 366 } else { 367 // Someone else claimed the last survivor while we were trying 368 // to take the lock so nothing else to do. 369 } 370 } 371 assert(res == NULL || res->is_survivor(), "post-condition"); 372 373 return res; 374 } 375 376 void CMRootRegions::scan_finished() { 377 assert(scan_in_progress(), "pre-condition"); 378 379 // Currently, only survivors can be root regions. 380 if (!_should_abort) { 381 assert(_next_survivor == NULL, "we should have claimed all survivors"); 382 } 383 _next_survivor = NULL; 384 385 { 386 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 387 _scan_in_progress = false; 388 RootRegionScan_lock->notify_all(); 389 } 390 } 391 392 bool CMRootRegions::wait_until_scan_finished() { 393 if (!scan_in_progress()) return false; 394 395 { 396 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 397 while (scan_in_progress()) { 398 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 399 } 400 } 401 return true; 402 } 403 404 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 405 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 406 #endif // _MSC_VER 407 408 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 409 return MAX2((n_par_threads + 2) / 4, 1U); 410 } 411 412 ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) : 413 _markBitMap1(rs, MinObjAlignment - 1), 414 _markBitMap2(rs, MinObjAlignment - 1), 415 416 _parallel_marking_threads(0), 417 _max_parallel_marking_threads(0), 418 _sleep_factor(0.0), 419 _marking_task_overhead(1.0), 420 _cleanup_sleep_factor(0.0), 421 _cleanup_task_overhead(1.0), 422 _cleanup_list("Cleanup List"), 423 _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/), 424 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 425 CardTableModRefBS::card_shift, 426 false /* in_resource_area*/), 427 428 _prevMarkBitMap(&_markBitMap1), 429 _nextMarkBitMap(&_markBitMap2), 430 431 _markStack(this), 432 // _finger set in set_non_marking_state 433 434 _max_task_num(MAX2((uint)ParallelGCThreads, 1U)), 435 // _active_tasks set in set_non_marking_state 436 // _tasks set inside the constructor 437 _task_queues(new CMTaskQueueSet((int) _max_task_num)), 438 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 439 440 _has_overflown(false), 441 _concurrent(false), 442 _has_aborted(false), 443 _restart_for_overflow(false), 444 _concurrent_marking_in_progress(false), 445 446 // _verbose_level set below 447 448 _init_times(), 449 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 450 _cleanup_times(), 451 _total_counting_time(0.0), 452 _total_rs_scrub_time(0.0), 453 454 _parallel_workers(NULL), 455 456 _count_card_bitmaps(NULL), 457 _count_marked_bytes(NULL) { 458 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 459 if (verbose_level < no_verbose) { 460 verbose_level = no_verbose; 461 } 462 if (verbose_level > high_verbose) { 463 verbose_level = high_verbose; 464 } 465 _verbose_level = verbose_level; 466 467 if (verbose_low()) { 468 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 469 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 470 } 471 472 _markStack.allocate(MarkStackSize); 473 474 // Create & start a ConcurrentMark thread. 475 _cmThread = new ConcurrentMarkThread(this); 476 assert(cmThread() != NULL, "CM Thread should have been created"); 477 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 478 479 _g1h = G1CollectedHeap::heap(); 480 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 481 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 482 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 483 484 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 485 satb_qs.set_buffer_size(G1SATBBufferSize); 486 487 _root_regions.init(_g1h, this); 488 489 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num, mtGC); 490 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num, mtGC); 491 492 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_task_num, mtGC); 493 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num, mtGC); 494 495 BitMap::idx_t card_bm_size = _card_bm.size(); 496 497 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 498 _active_tasks = _max_task_num; 499 for (int i = 0; i < (int) _max_task_num; ++i) { 500 CMTaskQueue* task_queue = new CMTaskQueue(); 501 task_queue->initialize(); 502 _task_queues->register_queue(i, task_queue); 503 504 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 505 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC); 506 507 _tasks[i] = new CMTask(i, this, 508 _count_marked_bytes[i], 509 &_count_card_bitmaps[i], 510 task_queue, _task_queues); 511 512 _accum_task_vtime[i] = 0.0; 513 } 514 515 // Calculate the card number for the bottom of the heap. Used 516 // in biasing indexes into the accounting card bitmaps. 517 _heap_bottom_card_num = 518 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 519 CardTableModRefBS::card_shift); 520 521 // Clear all the liveness counting data 522 clear_all_count_data(); 523 524 if (ConcGCThreads > ParallelGCThreads) { 525 vm_exit_during_initialization("Can't have more ConcGCThreads " 526 "than ParallelGCThreads."); 527 } 528 if (ParallelGCThreads == 0) { 529 // if we are not running with any parallel GC threads we will not 530 // spawn any marking threads either 531 _parallel_marking_threads = 0; 532 _max_parallel_marking_threads = 0; 533 _sleep_factor = 0.0; 534 _marking_task_overhead = 1.0; 535 } else { 536 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 537 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 538 // if both are set 539 _sleep_factor = 0.0; 540 _marking_task_overhead = 1.0; 541 } else if (G1MarkingOverheadPercent > 0) { 542 // We will calculate the number of parallel marking threads based 543 // on a target overhead with respect to the soft real-time goal 544 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 545 double overall_cm_overhead = 546 (double) MaxGCPauseMillis * marking_overhead / 547 (double) GCPauseIntervalMillis; 548 double cpu_ratio = 1.0 / (double) os::processor_count(); 549 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 550 double marking_task_overhead = 551 overall_cm_overhead / marking_thread_num * 552 (double) os::processor_count(); 553 double sleep_factor = 554 (1.0 - marking_task_overhead) / marking_task_overhead; 555 556 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num); 557 _sleep_factor = sleep_factor; 558 _marking_task_overhead = marking_task_overhead; 559 } else { 560 // Calculate the number of parallel marking threads by scaling 561 // the number of parallel GC threads. 562 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads); 563 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num); 564 _sleep_factor = 0.0; 565 _marking_task_overhead = 1.0; 566 } 567 568 assert(ConcGCThreads > 0, "Should have been set"); 569 _parallel_marking_threads = (uint) ConcGCThreads; 570 _max_parallel_marking_threads = _parallel_marking_threads; 571 572 if (parallel_marking_threads() > 1) { 573 _cleanup_task_overhead = 1.0; 574 } else { 575 _cleanup_task_overhead = marking_task_overhead(); 576 } 577 _cleanup_sleep_factor = 578 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 579 580 #if 0 581 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 582 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 583 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 584 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 585 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 586 #endif 587 588 guarantee(parallel_marking_threads() > 0, "peace of mind"); 589 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 590 _max_parallel_marking_threads, false, true); 591 if (_parallel_workers == NULL) { 592 vm_exit_during_initialization("Failed necessary allocation."); 593 } else { 594 _parallel_workers->initialize_workers(); 595 } 596 } 597 598 // so that the call below can read a sensible value 599 _heap_start = (HeapWord*) rs.base(); 600 set_non_marking_state(); 601 } 602 603 void ConcurrentMark::update_g1_committed(bool force) { 604 // If concurrent marking is not in progress, then we do not need to 605 // update _heap_end. 606 if (!concurrent_marking_in_progress() && !force) return; 607 608 MemRegion committed = _g1h->g1_committed(); 609 assert(committed.start() == _heap_start, "start shouldn't change"); 610 HeapWord* new_end = committed.end(); 611 if (new_end > _heap_end) { 612 // The heap has been expanded. 613 614 _heap_end = new_end; 615 } 616 // Notice that the heap can also shrink. However, this only happens 617 // during a Full GC (at least currently) and the entire marking 618 // phase will bail out and the task will not be restarted. So, let's 619 // do nothing. 620 } 621 622 void ConcurrentMark::reset() { 623 // Starting values for these two. This should be called in a STW 624 // phase. CM will be notified of any future g1_committed expansions 625 // will be at the end of evacuation pauses, when tasks are 626 // inactive. 627 MemRegion committed = _g1h->g1_committed(); 628 _heap_start = committed.start(); 629 _heap_end = committed.end(); 630 631 // Separated the asserts so that we know which one fires. 632 assert(_heap_start != NULL, "heap bounds should look ok"); 633 assert(_heap_end != NULL, "heap bounds should look ok"); 634 assert(_heap_start < _heap_end, "heap bounds should look ok"); 635 636 // Reset all the marking data structures and any necessary flags 637 reset_marking_state(); 638 639 if (verbose_low()) { 640 gclog_or_tty->print_cr("[global] resetting"); 641 } 642 643 // We do reset all of them, since different phases will use 644 // different number of active threads. So, it's easiest to have all 645 // of them ready. 646 for (int i = 0; i < (int) _max_task_num; ++i) { 647 _tasks[i]->reset(_nextMarkBitMap); 648 } 649 650 // we need this to make sure that the flag is on during the evac 651 // pause with initial mark piggy-backed 652 set_concurrent_marking_in_progress(); 653 } 654 655 656 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 657 _markStack.setEmpty(); 658 _markStack.clear_overflow(); 659 if (clear_overflow) { 660 clear_has_overflown(); 661 } else { 662 assert(has_overflown(), "pre-condition"); 663 } 664 _finger = _heap_start; 665 666 for (uint i = 0; i < _max_task_num; ++i) { 667 CMTaskQueue* queue = _task_queues->queue(i); 668 queue->set_empty(); 669 } 670 } 671 672 void ConcurrentMark::set_concurrency(uint active_tasks) { 673 assert(active_tasks <= _max_task_num, "we should not have more"); 674 675 _active_tasks = active_tasks; 676 // Need to update the three data structures below according to the 677 // number of active threads for this phase. 678 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 679 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 680 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 681 } 682 683 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 684 set_concurrency(active_tasks); 685 686 _concurrent = concurrent; 687 // We propagate this to all tasks, not just the active ones. 688 for (int i = 0; i < (int) _max_task_num; ++i) 689 _tasks[i]->set_concurrent(concurrent); 690 691 if (concurrent) { 692 set_concurrent_marking_in_progress(); 693 } else { 694 // We currently assume that the concurrent flag has been set to 695 // false before we start remark. At this point we should also be 696 // in a STW phase. 697 assert(!concurrent_marking_in_progress(), "invariant"); 698 assert(_finger == _heap_end, 699 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT, 700 _finger, _heap_end)); 701 update_g1_committed(true); 702 } 703 } 704 705 void ConcurrentMark::set_non_marking_state() { 706 // We set the global marking state to some default values when we're 707 // not doing marking. 708 reset_marking_state(); 709 _active_tasks = 0; 710 clear_concurrent_marking_in_progress(); 711 } 712 713 ConcurrentMark::~ConcurrentMark() { 714 // The ConcurrentMark instance is never freed. 715 ShouldNotReachHere(); 716 } 717 718 void ConcurrentMark::clearNextBitmap() { 719 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 720 G1CollectorPolicy* g1p = g1h->g1_policy(); 721 722 // Make sure that the concurrent mark thread looks to still be in 723 // the current cycle. 724 guarantee(cmThread()->during_cycle(), "invariant"); 725 726 // We are finishing up the current cycle by clearing the next 727 // marking bitmap and getting it ready for the next cycle. During 728 // this time no other cycle can start. So, let's make sure that this 729 // is the case. 730 guarantee(!g1h->mark_in_progress(), "invariant"); 731 732 // clear the mark bitmap (no grey objects to start with). 733 // We need to do this in chunks and offer to yield in between 734 // each chunk. 735 HeapWord* start = _nextMarkBitMap->startWord(); 736 HeapWord* end = _nextMarkBitMap->endWord(); 737 HeapWord* cur = start; 738 size_t chunkSize = M; 739 while (cur < end) { 740 HeapWord* next = cur + chunkSize; 741 if (next > end) { 742 next = end; 743 } 744 MemRegion mr(cur,next); 745 _nextMarkBitMap->clearRange(mr); 746 cur = next; 747 do_yield_check(); 748 749 // Repeat the asserts from above. We'll do them as asserts here to 750 // minimize their overhead on the product. However, we'll have 751 // them as guarantees at the beginning / end of the bitmap 752 // clearing to get some checking in the product. 753 assert(cmThread()->during_cycle(), "invariant"); 754 assert(!g1h->mark_in_progress(), "invariant"); 755 } 756 757 // Clear the liveness counting data 758 clear_all_count_data(); 759 760 // Repeat the asserts from above. 761 guarantee(cmThread()->during_cycle(), "invariant"); 762 guarantee(!g1h->mark_in_progress(), "invariant"); 763 } 764 765 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 766 public: 767 bool doHeapRegion(HeapRegion* r) { 768 if (!r->continuesHumongous()) { 769 r->note_start_of_marking(); 770 } 771 return false; 772 } 773 }; 774 775 void ConcurrentMark::checkpointRootsInitialPre() { 776 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 777 G1CollectorPolicy* g1p = g1h->g1_policy(); 778 779 _has_aborted = false; 780 781 #ifndef PRODUCT 782 if (G1PrintReachableAtInitialMark) { 783 print_reachable("at-cycle-start", 784 VerifyOption_G1UsePrevMarking, true /* all */); 785 } 786 #endif 787 788 // Initialise marking structures. This has to be done in a STW phase. 789 reset(); 790 791 // For each region note start of marking. 792 NoteStartOfMarkHRClosure startcl; 793 g1h->heap_region_iterate(&startcl); 794 } 795 796 797 void ConcurrentMark::checkpointRootsInitialPost() { 798 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 799 800 // If we force an overflow during remark, the remark operation will 801 // actually abort and we'll restart concurrent marking. If we always 802 // force an oveflow during remark we'll never actually complete the 803 // marking phase. So, we initilize this here, at the start of the 804 // cycle, so that at the remaining overflow number will decrease at 805 // every remark and we'll eventually not need to cause one. 806 force_overflow_stw()->init(); 807 808 // Start Concurrent Marking weak-reference discovery. 809 ReferenceProcessor* rp = g1h->ref_processor_cm(); 810 // enable ("weak") refs discovery 811 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 812 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 813 814 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 815 // This is the start of the marking cycle, we're expected all 816 // threads to have SATB queues with active set to false. 817 satb_mq_set.set_active_all_threads(true, /* new active value */ 818 false /* expected_active */); 819 820 _root_regions.prepare_for_scan(); 821 822 // update_g1_committed() will be called at the end of an evac pause 823 // when marking is on. So, it's also called at the end of the 824 // initial-mark pause to update the heap end, if the heap expands 825 // during it. No need to call it here. 826 } 827 828 /* 829 * Notice that in the next two methods, we actually leave the STS 830 * during the barrier sync and join it immediately afterwards. If we 831 * do not do this, the following deadlock can occur: one thread could 832 * be in the barrier sync code, waiting for the other thread to also 833 * sync up, whereas another one could be trying to yield, while also 834 * waiting for the other threads to sync up too. 835 * 836 * Note, however, that this code is also used during remark and in 837 * this case we should not attempt to leave / enter the STS, otherwise 838 * we'll either hit an asseert (debug / fastdebug) or deadlock 839 * (product). So we should only leave / enter the STS if we are 840 * operating concurrently. 841 * 842 * Because the thread that does the sync barrier has left the STS, it 843 * is possible to be suspended for a Full GC or an evacuation pause 844 * could occur. This is actually safe, since the entering the sync 845 * barrier is one of the last things do_marking_step() does, and it 846 * doesn't manipulate any data structures afterwards. 847 */ 848 849 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 850 if (verbose_low()) { 851 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 852 } 853 854 if (concurrent()) { 855 ConcurrentGCThread::stsLeave(); 856 } 857 _first_overflow_barrier_sync.enter(); 858 if (concurrent()) { 859 ConcurrentGCThread::stsJoin(); 860 } 861 // at this point everyone should have synced up and not be doing any 862 // more work 863 864 if (verbose_low()) { 865 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 866 } 867 868 // If we're executing the concurrent phase of marking, reset the marking 869 // state; otherwise the marking state is reset after reference processing, 870 // during the remark pause. 871 // If we reset here as a result of an overflow during the remark we will 872 // see assertion failures from any subsequent set_concurrency_and_phase() 873 // calls. 874 if (concurrent()) { 875 // let the task 0 do this 876 if (task_num == 0) { 877 // task 0 is responsible for clearing the global data structures 878 // We should be here because of an overflow. During STW we should 879 // not clear the overflow flag since we rely on it being true when 880 // we exit this method to abort the pause and restart concurent 881 // marking. 882 reset_marking_state(true /* clear_overflow */); 883 force_overflow()->update(); 884 885 if (G1Log::fine()) { 886 gclog_or_tty->date_stamp(PrintGCDateStamps); 887 gclog_or_tty->stamp(PrintGCTimeStamps); 888 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 889 } 890 } 891 } 892 893 // after this, each task should reset its own data structures then 894 // then go into the second barrier 895 } 896 897 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 898 if (verbose_low()) { 899 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 900 } 901 902 if (concurrent()) { 903 ConcurrentGCThread::stsLeave(); 904 } 905 _second_overflow_barrier_sync.enter(); 906 if (concurrent()) { 907 ConcurrentGCThread::stsJoin(); 908 } 909 // at this point everything should be re-initialized and ready to go 910 911 if (verbose_low()) { 912 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 913 } 914 } 915 916 #ifndef PRODUCT 917 void ForceOverflowSettings::init() { 918 _num_remaining = G1ConcMarkForceOverflow; 919 _force = false; 920 update(); 921 } 922 923 void ForceOverflowSettings::update() { 924 if (_num_remaining > 0) { 925 _num_remaining -= 1; 926 _force = true; 927 } else { 928 _force = false; 929 } 930 } 931 932 bool ForceOverflowSettings::should_force() { 933 if (_force) { 934 _force = false; 935 return true; 936 } else { 937 return false; 938 } 939 } 940 #endif // !PRODUCT 941 942 class CMConcurrentMarkingTask: public AbstractGangTask { 943 private: 944 ConcurrentMark* _cm; 945 ConcurrentMarkThread* _cmt; 946 947 public: 948 void work(uint worker_id) { 949 assert(Thread::current()->is_ConcurrentGC_thread(), 950 "this should only be done by a conc GC thread"); 951 ResourceMark rm; 952 953 double start_vtime = os::elapsedVTime(); 954 955 ConcurrentGCThread::stsJoin(); 956 957 assert(worker_id < _cm->active_tasks(), "invariant"); 958 CMTask* the_task = _cm->task(worker_id); 959 the_task->record_start_time(); 960 if (!_cm->has_aborted()) { 961 do { 962 double start_vtime_sec = os::elapsedVTime(); 963 double start_time_sec = os::elapsedTime(); 964 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 965 966 the_task->do_marking_step(mark_step_duration_ms, 967 true /* do_termination */, 968 false /* is_serial*/); 969 970 double end_time_sec = os::elapsedTime(); 971 double end_vtime_sec = os::elapsedVTime(); 972 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 973 double elapsed_time_sec = end_time_sec - start_time_sec; 974 _cm->clear_has_overflown(); 975 976 bool ret = _cm->do_yield_check(worker_id); 977 978 jlong sleep_time_ms; 979 if (!_cm->has_aborted() && the_task->has_aborted()) { 980 sleep_time_ms = 981 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 982 ConcurrentGCThread::stsLeave(); 983 os::sleep(Thread::current(), sleep_time_ms, false); 984 ConcurrentGCThread::stsJoin(); 985 } 986 double end_time2_sec = os::elapsedTime(); 987 double elapsed_time2_sec = end_time2_sec - start_time_sec; 988 989 #if 0 990 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 991 "overhead %1.4lf", 992 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 993 the_task->conc_overhead(os::elapsedTime()) * 8.0); 994 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 995 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 996 #endif 997 } while (!_cm->has_aborted() && the_task->has_aborted()); 998 } 999 the_task->record_end_time(); 1000 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1001 1002 ConcurrentGCThread::stsLeave(); 1003 1004 double end_vtime = os::elapsedVTime(); 1005 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1006 } 1007 1008 CMConcurrentMarkingTask(ConcurrentMark* cm, 1009 ConcurrentMarkThread* cmt) : 1010 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1011 1012 ~CMConcurrentMarkingTask() { } 1013 }; 1014 1015 // Calculates the number of active workers for a concurrent 1016 // phase. 1017 uint ConcurrentMark::calc_parallel_marking_threads() { 1018 if (G1CollectedHeap::use_parallel_gc_threads()) { 1019 uint n_conc_workers = 0; 1020 if (!UseDynamicNumberOfGCThreads || 1021 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1022 !ForceDynamicNumberOfGCThreads)) { 1023 n_conc_workers = max_parallel_marking_threads(); 1024 } else { 1025 n_conc_workers = 1026 AdaptiveSizePolicy::calc_default_active_workers( 1027 max_parallel_marking_threads(), 1028 1, /* Minimum workers */ 1029 parallel_marking_threads(), 1030 Threads::number_of_non_daemon_threads()); 1031 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1032 // that scaling has already gone into "_max_parallel_marking_threads". 1033 } 1034 assert(n_conc_workers > 0, "Always need at least 1"); 1035 return n_conc_workers; 1036 } 1037 // If we are not running with any parallel GC threads we will not 1038 // have spawned any marking threads either. Hence the number of 1039 // concurrent workers should be 0. 1040 return 0; 1041 } 1042 1043 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1044 // Currently, only survivors can be root regions. 1045 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1046 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1047 1048 const uintx interval = PrefetchScanIntervalInBytes; 1049 HeapWord* curr = hr->bottom(); 1050 const HeapWord* end = hr->top(); 1051 while (curr < end) { 1052 Prefetch::read(curr, interval); 1053 oop obj = oop(curr); 1054 int size = obj->oop_iterate(&cl); 1055 assert(size == obj->size(), "sanity"); 1056 curr += size; 1057 } 1058 } 1059 1060 class CMRootRegionScanTask : public AbstractGangTask { 1061 private: 1062 ConcurrentMark* _cm; 1063 1064 public: 1065 CMRootRegionScanTask(ConcurrentMark* cm) : 1066 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1067 1068 void work(uint worker_id) { 1069 assert(Thread::current()->is_ConcurrentGC_thread(), 1070 "this should only be done by a conc GC thread"); 1071 1072 CMRootRegions* root_regions = _cm->root_regions(); 1073 HeapRegion* hr = root_regions->claim_next(); 1074 while (hr != NULL) { 1075 _cm->scanRootRegion(hr, worker_id); 1076 hr = root_regions->claim_next(); 1077 } 1078 } 1079 }; 1080 1081 void ConcurrentMark::scanRootRegions() { 1082 // scan_in_progress() will have been set to true only if there was 1083 // at least one root region to scan. So, if it's false, we 1084 // should not attempt to do any further work. 1085 if (root_regions()->scan_in_progress()) { 1086 _parallel_marking_threads = calc_parallel_marking_threads(); 1087 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1088 "Maximum number of marking threads exceeded"); 1089 uint active_workers = MAX2(1U, parallel_marking_threads()); 1090 1091 CMRootRegionScanTask task(this); 1092 if (use_parallel_marking_threads()) { 1093 _parallel_workers->set_active_workers((int) active_workers); 1094 _parallel_workers->run_task(&task); 1095 } else { 1096 task.work(0); 1097 } 1098 1099 // It's possible that has_aborted() is true here without actually 1100 // aborting the survivor scan earlier. This is OK as it's 1101 // mainly used for sanity checking. 1102 root_regions()->scan_finished(); 1103 } 1104 } 1105 1106 void ConcurrentMark::markFromRoots() { 1107 // we might be tempted to assert that: 1108 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1109 // "inconsistent argument?"); 1110 // However that wouldn't be right, because it's possible that 1111 // a safepoint is indeed in progress as a younger generation 1112 // stop-the-world GC happens even as we mark in this generation. 1113 1114 _restart_for_overflow = false; 1115 force_overflow_conc()->init(); 1116 1117 // _g1h has _n_par_threads 1118 _parallel_marking_threads = calc_parallel_marking_threads(); 1119 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1120 "Maximum number of marking threads exceeded"); 1121 1122 uint active_workers = MAX2(1U, parallel_marking_threads()); 1123 1124 // Parallel task terminator is set in "set_concurrency_and_phase()" 1125 set_concurrency_and_phase(active_workers, true /* concurrent */); 1126 1127 CMConcurrentMarkingTask markingTask(this, cmThread()); 1128 if (use_parallel_marking_threads()) { 1129 _parallel_workers->set_active_workers((int)active_workers); 1130 // Don't set _n_par_threads because it affects MT in proceess_strong_roots() 1131 // and the decisions on that MT processing is made elsewhere. 1132 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1133 _parallel_workers->run_task(&markingTask); 1134 } else { 1135 markingTask.work(0); 1136 } 1137 print_stats(); 1138 } 1139 1140 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1141 // world is stopped at this checkpoint 1142 assert(SafepointSynchronize::is_at_safepoint(), 1143 "world should be stopped"); 1144 1145 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1146 1147 // If a full collection has happened, we shouldn't do this. 1148 if (has_aborted()) { 1149 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1150 return; 1151 } 1152 1153 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1154 1155 if (VerifyDuringGC) { 1156 HandleMark hm; // handle scope 1157 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1158 Universe::heap()->prepare_for_verify(); 1159 Universe::verify(/* silent */ false, 1160 /* option */ VerifyOption_G1UsePrevMarking); 1161 } 1162 1163 G1CollectorPolicy* g1p = g1h->g1_policy(); 1164 g1p->record_concurrent_mark_remark_start(); 1165 1166 double start = os::elapsedTime(); 1167 1168 checkpointRootsFinalWork(); 1169 1170 double mark_work_end = os::elapsedTime(); 1171 1172 weakRefsWork(clear_all_soft_refs); 1173 1174 if (has_overflown()) { 1175 // Oops. We overflowed. Restart concurrent marking. 1176 _restart_for_overflow = true; 1177 if (G1TraceMarkStackOverflow) { 1178 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1179 } 1180 1181 // Verify the heap w.r.t. the previous marking bitmap. 1182 if (VerifyDuringGC) { 1183 HandleMark hm; // handle scope 1184 gclog_or_tty->print(" VerifyDuringGC:(overflow)"); 1185 Universe::heap()->prepare_for_verify(); 1186 Universe::verify(/* silent */ false, 1187 /* option */ VerifyOption_G1UsePrevMarking); 1188 } 1189 1190 // Clear the marking state because we will be restarting 1191 // marking due to overflowing the global mark stack. 1192 reset_marking_state(); 1193 } else { 1194 // Aggregate the per-task counting data that we have accumulated 1195 // while marking. 1196 aggregate_count_data(); 1197 1198 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1199 // We're done with marking. 1200 // This is the end of the marking cycle, we're expected all 1201 // threads to have SATB queues with active set to true. 1202 satb_mq_set.set_active_all_threads(false, /* new active value */ 1203 true /* expected_active */); 1204 1205 if (VerifyDuringGC) { 1206 HandleMark hm; // handle scope 1207 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1208 Universe::heap()->prepare_for_verify(); 1209 Universe::verify(/* silent */ false, 1210 /* option */ VerifyOption_G1UseNextMarking); 1211 } 1212 assert(!restart_for_overflow(), "sanity"); 1213 // Completely reset the marking state since marking completed 1214 set_non_marking_state(); 1215 } 1216 1217 #if VERIFY_OBJS_PROCESSED 1218 _scan_obj_cl.objs_processed = 0; 1219 ThreadLocalObjQueue::objs_enqueued = 0; 1220 #endif 1221 1222 // Statistics 1223 double now = os::elapsedTime(); 1224 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1225 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1226 _remark_times.add((now - start) * 1000.0); 1227 1228 g1p->record_concurrent_mark_remark_end(); 1229 1230 G1CMIsAliveClosure is_alive(g1h); 1231 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1232 } 1233 1234 // Base class of the closures that finalize and verify the 1235 // liveness counting data. 1236 class CMCountDataClosureBase: public HeapRegionClosure { 1237 protected: 1238 G1CollectedHeap* _g1h; 1239 ConcurrentMark* _cm; 1240 CardTableModRefBS* _ct_bs; 1241 1242 BitMap* _region_bm; 1243 BitMap* _card_bm; 1244 1245 // Takes a region that's not empty (i.e., it has at least one 1246 // live object in it and sets its corresponding bit on the region 1247 // bitmap to 1. If the region is "starts humongous" it will also set 1248 // to 1 the bits on the region bitmap that correspond to its 1249 // associated "continues humongous" regions. 1250 void set_bit_for_region(HeapRegion* hr) { 1251 assert(!hr->continuesHumongous(), "should have filtered those out"); 1252 1253 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1254 if (!hr->startsHumongous()) { 1255 // Normal (non-humongous) case: just set the bit. 1256 _region_bm->par_at_put(index, true); 1257 } else { 1258 // Starts humongous case: calculate how many regions are part of 1259 // this humongous region and then set the bit range. 1260 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1261 _region_bm->par_at_put_range(index, end_index, true); 1262 } 1263 } 1264 1265 public: 1266 CMCountDataClosureBase(G1CollectedHeap* g1h, 1267 BitMap* region_bm, BitMap* card_bm): 1268 _g1h(g1h), _cm(g1h->concurrent_mark()), 1269 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 1270 _region_bm(region_bm), _card_bm(card_bm) { } 1271 }; 1272 1273 // Closure that calculates the # live objects per region. Used 1274 // for verification purposes during the cleanup pause. 1275 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1276 CMBitMapRO* _bm; 1277 size_t _region_marked_bytes; 1278 1279 public: 1280 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1281 BitMap* region_bm, BitMap* card_bm) : 1282 CMCountDataClosureBase(g1h, region_bm, card_bm), 1283 _bm(bm), _region_marked_bytes(0) { } 1284 1285 bool doHeapRegion(HeapRegion* hr) { 1286 1287 if (hr->continuesHumongous()) { 1288 // We will ignore these here and process them when their 1289 // associated "starts humongous" region is processed (see 1290 // set_bit_for_heap_region()). Note that we cannot rely on their 1291 // associated "starts humongous" region to have their bit set to 1292 // 1 since, due to the region chunking in the parallel region 1293 // iteration, a "continues humongous" region might be visited 1294 // before its associated "starts humongous". 1295 return false; 1296 } 1297 1298 HeapWord* ntams = hr->next_top_at_mark_start(); 1299 HeapWord* start = hr->bottom(); 1300 1301 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1302 err_msg("Preconditions not met - " 1303 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT, 1304 start, ntams, hr->end())); 1305 1306 // Find the first marked object at or after "start". 1307 start = _bm->getNextMarkedWordAddress(start, ntams); 1308 1309 size_t marked_bytes = 0; 1310 1311 while (start < ntams) { 1312 oop obj = oop(start); 1313 int obj_sz = obj->size(); 1314 HeapWord* obj_end = start + obj_sz; 1315 1316 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1317 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1318 1319 // Note: if we're looking at the last region in heap - obj_end 1320 // could be actually just beyond the end of the heap; end_idx 1321 // will then correspond to a (non-existent) card that is also 1322 // just beyond the heap. 1323 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1324 // end of object is not card aligned - increment to cover 1325 // all the cards spanned by the object 1326 end_idx += 1; 1327 } 1328 1329 // Set the bits in the card BM for the cards spanned by this object. 1330 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1331 1332 // Add the size of this object to the number of marked bytes. 1333 marked_bytes += (size_t)obj_sz * HeapWordSize; 1334 1335 // Find the next marked object after this one. 1336 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1337 } 1338 1339 // Mark the allocated-since-marking portion... 1340 HeapWord* top = hr->top(); 1341 if (ntams < top) { 1342 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1343 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1344 1345 // Note: if we're looking at the last region in heap - top 1346 // could be actually just beyond the end of the heap; end_idx 1347 // will then correspond to a (non-existent) card that is also 1348 // just beyond the heap. 1349 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1350 // end of object is not card aligned - increment to cover 1351 // all the cards spanned by the object 1352 end_idx += 1; 1353 } 1354 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1355 1356 // This definitely means the region has live objects. 1357 set_bit_for_region(hr); 1358 } 1359 1360 // Update the live region bitmap. 1361 if (marked_bytes > 0) { 1362 set_bit_for_region(hr); 1363 } 1364 1365 // Set the marked bytes for the current region so that 1366 // it can be queried by a calling verificiation routine 1367 _region_marked_bytes = marked_bytes; 1368 1369 return false; 1370 } 1371 1372 size_t region_marked_bytes() const { return _region_marked_bytes; } 1373 }; 1374 1375 // Heap region closure used for verifying the counting data 1376 // that was accumulated concurrently and aggregated during 1377 // the remark pause. This closure is applied to the heap 1378 // regions during the STW cleanup pause. 1379 1380 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1381 G1CollectedHeap* _g1h; 1382 ConcurrentMark* _cm; 1383 CalcLiveObjectsClosure _calc_cl; 1384 BitMap* _region_bm; // Region BM to be verified 1385 BitMap* _card_bm; // Card BM to be verified 1386 bool _verbose; // verbose output? 1387 1388 BitMap* _exp_region_bm; // Expected Region BM values 1389 BitMap* _exp_card_bm; // Expected card BM values 1390 1391 int _failures; 1392 1393 public: 1394 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1395 BitMap* region_bm, 1396 BitMap* card_bm, 1397 BitMap* exp_region_bm, 1398 BitMap* exp_card_bm, 1399 bool verbose) : 1400 _g1h(g1h), _cm(g1h->concurrent_mark()), 1401 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1402 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1403 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1404 _failures(0) { } 1405 1406 int failures() const { return _failures; } 1407 1408 bool doHeapRegion(HeapRegion* hr) { 1409 if (hr->continuesHumongous()) { 1410 // We will ignore these here and process them when their 1411 // associated "starts humongous" region is processed (see 1412 // set_bit_for_heap_region()). Note that we cannot rely on their 1413 // associated "starts humongous" region to have their bit set to 1414 // 1 since, due to the region chunking in the parallel region 1415 // iteration, a "continues humongous" region might be visited 1416 // before its associated "starts humongous". 1417 return false; 1418 } 1419 1420 int failures = 0; 1421 1422 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1423 // this region and set the corresponding bits in the expected region 1424 // and card bitmaps. 1425 bool res = _calc_cl.doHeapRegion(hr); 1426 assert(res == false, "should be continuing"); 1427 1428 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1429 Mutex::_no_safepoint_check_flag); 1430 1431 // Verify the marked bytes for this region. 1432 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1433 size_t act_marked_bytes = hr->next_marked_bytes(); 1434 1435 // We're not OK if expected marked bytes > actual marked bytes. It means 1436 // we have missed accounting some objects during the actual marking. 1437 if (exp_marked_bytes > act_marked_bytes) { 1438 if (_verbose) { 1439 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1440 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1441 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1442 } 1443 failures += 1; 1444 } 1445 1446 // Verify the bit, for this region, in the actual and expected 1447 // (which was just calculated) region bit maps. 1448 // We're not OK if the bit in the calculated expected region 1449 // bitmap is set and the bit in the actual region bitmap is not. 1450 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1451 1452 bool expected = _exp_region_bm->at(index); 1453 bool actual = _region_bm->at(index); 1454 if (expected && !actual) { 1455 if (_verbose) { 1456 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1457 "expected: %s, actual: %s", 1458 hr->hrs_index(), 1459 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1460 } 1461 failures += 1; 1462 } 1463 1464 // Verify that the card bit maps for the cards spanned by the current 1465 // region match. We have an error if we have a set bit in the expected 1466 // bit map and the corresponding bit in the actual bitmap is not set. 1467 1468 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1469 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1470 1471 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1472 expected = _exp_card_bm->at(i); 1473 actual = _card_bm->at(i); 1474 1475 if (expected && !actual) { 1476 if (_verbose) { 1477 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1478 "expected: %s, actual: %s", 1479 hr->hrs_index(), i, 1480 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1481 } 1482 failures += 1; 1483 } 1484 } 1485 1486 if (failures > 0 && _verbose) { 1487 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1488 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1489 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(), 1490 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1491 } 1492 1493 _failures += failures; 1494 1495 // We could stop iteration over the heap when we 1496 // find the first violating region by returning true. 1497 return false; 1498 } 1499 }; 1500 1501 1502 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1503 protected: 1504 G1CollectedHeap* _g1h; 1505 ConcurrentMark* _cm; 1506 BitMap* _actual_region_bm; 1507 BitMap* _actual_card_bm; 1508 1509 uint _n_workers; 1510 1511 BitMap* _expected_region_bm; 1512 BitMap* _expected_card_bm; 1513 1514 int _failures; 1515 bool _verbose; 1516 1517 public: 1518 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1519 BitMap* region_bm, BitMap* card_bm, 1520 BitMap* expected_region_bm, BitMap* expected_card_bm) 1521 : AbstractGangTask("G1 verify final counting"), 1522 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1523 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1524 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1525 _failures(0), _verbose(false), 1526 _n_workers(0) { 1527 assert(VerifyDuringGC, "don't call this otherwise"); 1528 1529 // Use the value already set as the number of active threads 1530 // in the call to run_task(). 1531 if (G1CollectedHeap::use_parallel_gc_threads()) { 1532 assert( _g1h->workers()->active_workers() > 0, 1533 "Should have been previously set"); 1534 _n_workers = _g1h->workers()->active_workers(); 1535 } else { 1536 _n_workers = 1; 1537 } 1538 1539 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1540 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1541 1542 _verbose = _cm->verbose_medium(); 1543 } 1544 1545 void work(uint worker_id) { 1546 assert(worker_id < _n_workers, "invariant"); 1547 1548 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1549 _actual_region_bm, _actual_card_bm, 1550 _expected_region_bm, 1551 _expected_card_bm, 1552 _verbose); 1553 1554 if (G1CollectedHeap::use_parallel_gc_threads()) { 1555 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1556 worker_id, 1557 _n_workers, 1558 HeapRegion::VerifyCountClaimValue); 1559 } else { 1560 _g1h->heap_region_iterate(&verify_cl); 1561 } 1562 1563 Atomic::add(verify_cl.failures(), &_failures); 1564 } 1565 1566 int failures() const { return _failures; } 1567 }; 1568 1569 // Closure that finalizes the liveness counting data. 1570 // Used during the cleanup pause. 1571 // Sets the bits corresponding to the interval [NTAMS, top] 1572 // (which contains the implicitly live objects) in the 1573 // card liveness bitmap. Also sets the bit for each region, 1574 // containing live data, in the region liveness bitmap. 1575 1576 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1577 public: 1578 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1579 BitMap* region_bm, 1580 BitMap* card_bm) : 1581 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1582 1583 bool doHeapRegion(HeapRegion* hr) { 1584 1585 if (hr->continuesHumongous()) { 1586 // We will ignore these here and process them when their 1587 // associated "starts humongous" region is processed (see 1588 // set_bit_for_heap_region()). Note that we cannot rely on their 1589 // associated "starts humongous" region to have their bit set to 1590 // 1 since, due to the region chunking in the parallel region 1591 // iteration, a "continues humongous" region might be visited 1592 // before its associated "starts humongous". 1593 return false; 1594 } 1595 1596 HeapWord* ntams = hr->next_top_at_mark_start(); 1597 HeapWord* top = hr->top(); 1598 1599 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1600 1601 // Mark the allocated-since-marking portion... 1602 if (ntams < top) { 1603 // This definitely means the region has live objects. 1604 set_bit_for_region(hr); 1605 1606 // Now set the bits in the card bitmap for [ntams, top) 1607 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1608 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1609 1610 // Note: if we're looking at the last region in heap - top 1611 // could be actually just beyond the end of the heap; end_idx 1612 // will then correspond to a (non-existent) card that is also 1613 // just beyond the heap. 1614 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1615 // end of object is not card aligned - increment to cover 1616 // all the cards spanned by the object 1617 end_idx += 1; 1618 } 1619 1620 assert(end_idx <= _card_bm->size(), 1621 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1622 end_idx, _card_bm->size())); 1623 assert(start_idx < _card_bm->size(), 1624 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1625 start_idx, _card_bm->size())); 1626 1627 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1628 } 1629 1630 // Set the bit for the region if it contains live data 1631 if (hr->next_marked_bytes() > 0) { 1632 set_bit_for_region(hr); 1633 } 1634 1635 return false; 1636 } 1637 }; 1638 1639 class G1ParFinalCountTask: public AbstractGangTask { 1640 protected: 1641 G1CollectedHeap* _g1h; 1642 ConcurrentMark* _cm; 1643 BitMap* _actual_region_bm; 1644 BitMap* _actual_card_bm; 1645 1646 uint _n_workers; 1647 1648 public: 1649 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1650 : AbstractGangTask("G1 final counting"), 1651 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1652 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1653 _n_workers(0) { 1654 // Use the value already set as the number of active threads 1655 // in the call to run_task(). 1656 if (G1CollectedHeap::use_parallel_gc_threads()) { 1657 assert( _g1h->workers()->active_workers() > 0, 1658 "Should have been previously set"); 1659 _n_workers = _g1h->workers()->active_workers(); 1660 } else { 1661 _n_workers = 1; 1662 } 1663 } 1664 1665 void work(uint worker_id) { 1666 assert(worker_id < _n_workers, "invariant"); 1667 1668 FinalCountDataUpdateClosure final_update_cl(_g1h, 1669 _actual_region_bm, 1670 _actual_card_bm); 1671 1672 if (G1CollectedHeap::use_parallel_gc_threads()) { 1673 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1674 worker_id, 1675 _n_workers, 1676 HeapRegion::FinalCountClaimValue); 1677 } else { 1678 _g1h->heap_region_iterate(&final_update_cl); 1679 } 1680 } 1681 }; 1682 1683 class G1ParNoteEndTask; 1684 1685 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1686 G1CollectedHeap* _g1; 1687 int _worker_num; 1688 size_t _max_live_bytes; 1689 uint _regions_claimed; 1690 size_t _freed_bytes; 1691 FreeRegionList* _local_cleanup_list; 1692 OldRegionSet* _old_proxy_set; 1693 HumongousRegionSet* _humongous_proxy_set; 1694 HRRSCleanupTask* _hrrs_cleanup_task; 1695 double _claimed_region_time; 1696 double _max_region_time; 1697 1698 public: 1699 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1700 int worker_num, 1701 FreeRegionList* local_cleanup_list, 1702 OldRegionSet* old_proxy_set, 1703 HumongousRegionSet* humongous_proxy_set, 1704 HRRSCleanupTask* hrrs_cleanup_task) : 1705 _g1(g1), _worker_num(worker_num), 1706 _max_live_bytes(0), _regions_claimed(0), 1707 _freed_bytes(0), 1708 _claimed_region_time(0.0), _max_region_time(0.0), 1709 _local_cleanup_list(local_cleanup_list), 1710 _old_proxy_set(old_proxy_set), 1711 _humongous_proxy_set(humongous_proxy_set), 1712 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1713 1714 size_t freed_bytes() { return _freed_bytes; } 1715 1716 bool doHeapRegion(HeapRegion *hr) { 1717 if (hr->continuesHumongous()) { 1718 return false; 1719 } 1720 // We use a claim value of zero here because all regions 1721 // were claimed with value 1 in the FinalCount task. 1722 _g1->reset_gc_time_stamps(hr); 1723 double start = os::elapsedTime(); 1724 _regions_claimed++; 1725 hr->note_end_of_marking(); 1726 _max_live_bytes += hr->max_live_bytes(); 1727 _g1->free_region_if_empty(hr, 1728 &_freed_bytes, 1729 _local_cleanup_list, 1730 _old_proxy_set, 1731 _humongous_proxy_set, 1732 _hrrs_cleanup_task, 1733 true /* par */); 1734 double region_time = (os::elapsedTime() - start); 1735 _claimed_region_time += region_time; 1736 if (region_time > _max_region_time) { 1737 _max_region_time = region_time; 1738 } 1739 return false; 1740 } 1741 1742 size_t max_live_bytes() { return _max_live_bytes; } 1743 uint regions_claimed() { return _regions_claimed; } 1744 double claimed_region_time_sec() { return _claimed_region_time; } 1745 double max_region_time_sec() { return _max_region_time; } 1746 }; 1747 1748 class G1ParNoteEndTask: public AbstractGangTask { 1749 friend class G1NoteEndOfConcMarkClosure; 1750 1751 protected: 1752 G1CollectedHeap* _g1h; 1753 size_t _max_live_bytes; 1754 size_t _freed_bytes; 1755 FreeRegionList* _cleanup_list; 1756 1757 public: 1758 G1ParNoteEndTask(G1CollectedHeap* g1h, 1759 FreeRegionList* cleanup_list) : 1760 AbstractGangTask("G1 note end"), _g1h(g1h), 1761 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1762 1763 void work(uint worker_id) { 1764 double start = os::elapsedTime(); 1765 FreeRegionList local_cleanup_list("Local Cleanup List"); 1766 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set"); 1767 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1768 HRRSCleanupTask hrrs_cleanup_task; 1769 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list, 1770 &old_proxy_set, 1771 &humongous_proxy_set, 1772 &hrrs_cleanup_task); 1773 if (G1CollectedHeap::use_parallel_gc_threads()) { 1774 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1775 _g1h->workers()->active_workers(), 1776 HeapRegion::NoteEndClaimValue); 1777 } else { 1778 _g1h->heap_region_iterate(&g1_note_end); 1779 } 1780 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1781 1782 // Now update the lists 1783 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1784 NULL /* free_list */, 1785 &old_proxy_set, 1786 &humongous_proxy_set, 1787 true /* par */); 1788 { 1789 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1790 _max_live_bytes += g1_note_end.max_live_bytes(); 1791 _freed_bytes += g1_note_end.freed_bytes(); 1792 1793 // If we iterate over the global cleanup list at the end of 1794 // cleanup to do this printing we will not guarantee to only 1795 // generate output for the newly-reclaimed regions (the list 1796 // might not be empty at the beginning of cleanup; we might 1797 // still be working on its previous contents). So we do the 1798 // printing here, before we append the new regions to the global 1799 // cleanup list. 1800 1801 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1802 if (hr_printer->is_active()) { 1803 HeapRegionLinkedListIterator iter(&local_cleanup_list); 1804 while (iter.more_available()) { 1805 HeapRegion* hr = iter.get_next(); 1806 hr_printer->cleanup(hr); 1807 } 1808 } 1809 1810 _cleanup_list->add_as_tail(&local_cleanup_list); 1811 assert(local_cleanup_list.is_empty(), "post-condition"); 1812 1813 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1814 } 1815 } 1816 size_t max_live_bytes() { return _max_live_bytes; } 1817 size_t freed_bytes() { return _freed_bytes; } 1818 }; 1819 1820 class G1ParScrubRemSetTask: public AbstractGangTask { 1821 protected: 1822 G1RemSet* _g1rs; 1823 BitMap* _region_bm; 1824 BitMap* _card_bm; 1825 public: 1826 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1827 BitMap* region_bm, BitMap* card_bm) : 1828 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1829 _region_bm(region_bm), _card_bm(card_bm) { } 1830 1831 void work(uint worker_id) { 1832 if (G1CollectedHeap::use_parallel_gc_threads()) { 1833 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1834 HeapRegion::ScrubRemSetClaimValue); 1835 } else { 1836 _g1rs->scrub(_region_bm, _card_bm); 1837 } 1838 } 1839 1840 }; 1841 1842 void ConcurrentMark::cleanup() { 1843 // world is stopped at this checkpoint 1844 assert(SafepointSynchronize::is_at_safepoint(), 1845 "world should be stopped"); 1846 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1847 1848 // If a full collection has happened, we shouldn't do this. 1849 if (has_aborted()) { 1850 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1851 return; 1852 } 1853 1854 HRSPhaseSetter x(HRSPhaseCleanup); 1855 g1h->verify_region_sets_optional(); 1856 1857 if (VerifyDuringGC) { 1858 HandleMark hm; // handle scope 1859 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1860 Universe::heap()->prepare_for_verify(); 1861 Universe::verify(/* silent */ false, 1862 /* option */ VerifyOption_G1UsePrevMarking); 1863 } 1864 1865 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1866 g1p->record_concurrent_mark_cleanup_start(); 1867 1868 double start = os::elapsedTime(); 1869 1870 HeapRegionRemSet::reset_for_cleanup_tasks(); 1871 1872 uint n_workers; 1873 1874 // Do counting once more with the world stopped for good measure. 1875 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1876 1877 if (G1CollectedHeap::use_parallel_gc_threads()) { 1878 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 1879 "sanity check"); 1880 1881 g1h->set_par_threads(); 1882 n_workers = g1h->n_par_threads(); 1883 assert(g1h->n_par_threads() == n_workers, 1884 "Should not have been reset"); 1885 g1h->workers()->run_task(&g1_par_count_task); 1886 // Done with the parallel phase so reset to 0. 1887 g1h->set_par_threads(0); 1888 1889 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 1890 "sanity check"); 1891 } else { 1892 n_workers = 1; 1893 g1_par_count_task.work(0); 1894 } 1895 1896 if (VerifyDuringGC) { 1897 // Verify that the counting data accumulated during marking matches 1898 // that calculated by walking the marking bitmap. 1899 1900 // Bitmaps to hold expected values 1901 BitMap expected_region_bm(_region_bm.size(), false); 1902 BitMap expected_card_bm(_card_bm.size(), false); 1903 1904 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1905 &_region_bm, 1906 &_card_bm, 1907 &expected_region_bm, 1908 &expected_card_bm); 1909 1910 if (G1CollectedHeap::use_parallel_gc_threads()) { 1911 g1h->set_par_threads((int)n_workers); 1912 g1h->workers()->run_task(&g1_par_verify_task); 1913 // Done with the parallel phase so reset to 0. 1914 g1h->set_par_threads(0); 1915 1916 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 1917 "sanity check"); 1918 } else { 1919 g1_par_verify_task.work(0); 1920 } 1921 1922 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1923 } 1924 1925 size_t start_used_bytes = g1h->used(); 1926 g1h->set_marking_complete(); 1927 1928 double count_end = os::elapsedTime(); 1929 double this_final_counting_time = (count_end - start); 1930 _total_counting_time += this_final_counting_time; 1931 1932 if (G1PrintRegionLivenessInfo) { 1933 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1934 _g1h->heap_region_iterate(&cl); 1935 } 1936 1937 // Install newly created mark bitMap as "prev". 1938 swapMarkBitMaps(); 1939 1940 g1h->reset_gc_time_stamp(); 1941 1942 // Note end of marking in all heap regions. 1943 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 1944 if (G1CollectedHeap::use_parallel_gc_threads()) { 1945 g1h->set_par_threads((int)n_workers); 1946 g1h->workers()->run_task(&g1_par_note_end_task); 1947 g1h->set_par_threads(0); 1948 1949 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 1950 "sanity check"); 1951 } else { 1952 g1_par_note_end_task.work(0); 1953 } 1954 g1h->check_gc_time_stamps(); 1955 1956 if (!cleanup_list_is_empty()) { 1957 // The cleanup list is not empty, so we'll have to process it 1958 // concurrently. Notify anyone else that might be wanting free 1959 // regions that there will be more free regions coming soon. 1960 g1h->set_free_regions_coming(); 1961 } 1962 1963 // call below, since it affects the metric by which we sort the heap 1964 // regions. 1965 if (G1ScrubRemSets) { 1966 double rs_scrub_start = os::elapsedTime(); 1967 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1968 if (G1CollectedHeap::use_parallel_gc_threads()) { 1969 g1h->set_par_threads((int)n_workers); 1970 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1971 g1h->set_par_threads(0); 1972 1973 assert(g1h->check_heap_region_claim_values( 1974 HeapRegion::ScrubRemSetClaimValue), 1975 "sanity check"); 1976 } else { 1977 g1_par_scrub_rs_task.work(0); 1978 } 1979 1980 double rs_scrub_end = os::elapsedTime(); 1981 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1982 _total_rs_scrub_time += this_rs_scrub_time; 1983 } 1984 1985 // this will also free any regions totally full of garbage objects, 1986 // and sort the regions. 1987 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 1988 1989 // Statistics. 1990 double end = os::elapsedTime(); 1991 _cleanup_times.add((end - start) * 1000.0); 1992 1993 if (G1Log::fine()) { 1994 g1h->print_size_transition(gclog_or_tty, 1995 start_used_bytes, 1996 g1h->used(), 1997 g1h->capacity()); 1998 } 1999 2000 // Clean up will have freed any regions completely full of garbage. 2001 // Update the soft reference policy with the new heap occupancy. 2002 Universe::update_heap_info_at_gc(); 2003 2004 // We need to make this be a "collection" so any collection pause that 2005 // races with it goes around and waits for completeCleanup to finish. 2006 g1h->increment_total_collections(); 2007 2008 // We reclaimed old regions so we should calculate the sizes to make 2009 // sure we update the old gen/space data. 2010 g1h->g1mm()->update_sizes(); 2011 2012 if (VerifyDuringGC) { 2013 HandleMark hm; // handle scope 2014 gclog_or_tty->print(" VerifyDuringGC:(after)"); 2015 Universe::heap()->prepare_for_verify(); 2016 Universe::verify(/* silent */ false, 2017 /* option */ VerifyOption_G1UsePrevMarking); 2018 } 2019 2020 g1h->verify_region_sets_optional(); 2021 g1h->trace_heap_after_concurrent_cycle(); 2022 } 2023 2024 void ConcurrentMark::completeCleanup() { 2025 if (has_aborted()) return; 2026 2027 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2028 2029 _cleanup_list.verify_optional(); 2030 FreeRegionList tmp_free_list("Tmp Free List"); 2031 2032 if (G1ConcRegionFreeingVerbose) { 2033 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2034 "cleanup list has %u entries", 2035 _cleanup_list.length()); 2036 } 2037 2038 // Noone else should be accessing the _cleanup_list at this point, 2039 // so it's not necessary to take any locks 2040 while (!_cleanup_list.is_empty()) { 2041 HeapRegion* hr = _cleanup_list.remove_head(); 2042 assert(hr != NULL, "the list was not empty"); 2043 hr->par_clear(); 2044 tmp_free_list.add_as_tail(hr); 2045 2046 // Instead of adding one region at a time to the secondary_free_list, 2047 // we accumulate them in the local list and move them a few at a 2048 // time. This also cuts down on the number of notify_all() calls 2049 // we do during this process. We'll also append the local list when 2050 // _cleanup_list is empty (which means we just removed the last 2051 // region from the _cleanup_list). 2052 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2053 _cleanup_list.is_empty()) { 2054 if (G1ConcRegionFreeingVerbose) { 2055 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2056 "appending %u entries to the secondary_free_list, " 2057 "cleanup list still has %u entries", 2058 tmp_free_list.length(), 2059 _cleanup_list.length()); 2060 } 2061 2062 { 2063 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2064 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 2065 SecondaryFreeList_lock->notify_all(); 2066 } 2067 2068 if (G1StressConcRegionFreeing) { 2069 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2070 os::sleep(Thread::current(), (jlong) 1, false); 2071 } 2072 } 2073 } 2074 } 2075 assert(tmp_free_list.is_empty(), "post-condition"); 2076 } 2077 2078 // Supporting Object and Oop closures for reference discovery 2079 // and processing in during marking 2080 2081 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2082 HeapWord* addr = (HeapWord*)obj; 2083 return addr != NULL && 2084 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2085 } 2086 2087 // 'Keep Alive' oop closure used by both serial parallel reference processing. 2088 // Uses the CMTask associated with a worker thread (for serial reference 2089 // processing the CMTask for worker 0 is used) to preserve (mark) and 2090 // trace referent objects. 2091 // 2092 // Using the CMTask and embedded local queues avoids having the worker 2093 // threads operating on the global mark stack. This reduces the risk 2094 // of overflowing the stack - which we would rather avoid at this late 2095 // state. Also using the tasks' local queues removes the potential 2096 // of the workers interfering with each other that could occur if 2097 // operating on the global stack. 2098 2099 class G1CMKeepAliveAndDrainClosure: public OopClosure { 2100 ConcurrentMark* _cm; 2101 CMTask* _task; 2102 int _ref_counter_limit; 2103 int _ref_counter; 2104 bool _is_serial; 2105 public: 2106 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2107 _cm(cm), _task(task), _is_serial(is_serial), 2108 _ref_counter_limit(G1RefProcDrainInterval) { 2109 assert(_ref_counter_limit > 0, "sanity"); 2110 assert(!_is_serial || _task->task_id() == 0, "only task 0 for serial code"); 2111 _ref_counter = _ref_counter_limit; 2112 } 2113 2114 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2115 virtual void do_oop( oop* p) { do_oop_work(p); } 2116 2117 template <class T> void do_oop_work(T* p) { 2118 if (!_cm->has_overflown()) { 2119 oop obj = oopDesc::load_decode_heap_oop(p); 2120 if (_cm->verbose_high()) { 2121 gclog_or_tty->print_cr("\t[%d] we're looking at location " 2122 "*"PTR_FORMAT" = "PTR_FORMAT, 2123 _task->task_id(), p, (void*) obj); 2124 } 2125 2126 _task->deal_with_reference(obj); 2127 _ref_counter--; 2128 2129 if (_ref_counter == 0) { 2130 // We have dealt with _ref_counter_limit references, pushing them 2131 // and objects reachable from them on to the local stack (and 2132 // possibly the global stack). Call CMTask::do_marking_step() to 2133 // process these entries. 2134 // 2135 // We call CMTask::do_marking_step() in a loop, which we'll exit if 2136 // there's nothing more to do (i.e. we're done with the entries that 2137 // were pushed as a result of the CMTask::deal_with_reference() calls 2138 // above) or we overflow. 2139 // 2140 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2141 // flag while there may still be some work to do. (See the comment at 2142 // the beginning of CMTask::do_marking_step() for those conditions - 2143 // one of which is reaching the specified time target.) It is only 2144 // when CMTask::do_marking_step() returns without setting the 2145 // has_aborted() flag that the marking step has completed. 2146 do { 2147 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2148 _task->do_marking_step(mark_step_duration_ms, 2149 false /* do_termination */, 2150 _is_serial); 2151 } while (_task->has_aborted() && !_cm->has_overflown()); 2152 _ref_counter = _ref_counter_limit; 2153 } 2154 } else { 2155 if (_cm->verbose_high()) { 2156 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id()); 2157 } 2158 } 2159 } 2160 }; 2161 2162 // 'Drain' oop closure used by both serial and parallel reference processing. 2163 // Uses the CMTask associated with a given worker thread (for serial 2164 // reference processing the CMtask for worker 0 is used). Calls the 2165 // do_marking_step routine, with an unbelievably large timeout value, 2166 // to drain the marking data structures of the remaining entries 2167 // added by the 'keep alive' oop closure above. 2168 2169 class G1CMDrainMarkingStackClosure: public VoidClosure { 2170 ConcurrentMark* _cm; 2171 CMTask* _task; 2172 bool _is_serial; 2173 public: 2174 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2175 _cm(cm), _task(task), _is_serial(is_serial) { 2176 assert(!_is_serial || _task->task_id() == 0, "only task 0 for serial code"); 2177 } 2178 2179 void do_void() { 2180 do { 2181 if (_cm->verbose_high()) { 2182 gclog_or_tty->print_cr("\t[%d] Drain: Calling do_marking_step - serial: %s", 2183 _task->task_id(), BOOL_TO_STR(_is_serial)); 2184 } 2185 2186 // We call CMTask::do_marking_step() to completely drain the local 2187 // and global marking stacks of entries pushed by the 'keep alive' 2188 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2189 // 2190 // CMTask::do_marking_step() is called in a loop, which we'll exit 2191 // if there's nothing more to do (i.e. we'completely drained the 2192 // entries that were pushed as a a result of applying the 'keep alive' 2193 // closure to the entries on the discovered ref lists) or we overflow 2194 // the global marking stack. 2195 // 2196 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2197 // flag while there may still be some work to do. (See the comment at 2198 // the beginning of CMTask::do_marking_step() for those conditions - 2199 // one of which is reaching the specified time target.) It is only 2200 // when CMTask::do_marking_step() returns without setting the 2201 // has_aborted() flag that the marking step has completed. 2202 2203 _task->do_marking_step(1000000000.0 /* something very large */, 2204 true /* do_termination */, 2205 _is_serial); 2206 } while (_task->has_aborted() && !_cm->has_overflown()); 2207 } 2208 }; 2209 2210 // Implementation of AbstractRefProcTaskExecutor for parallel 2211 // reference processing at the end of G1 concurrent marking 2212 2213 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2214 private: 2215 G1CollectedHeap* _g1h; 2216 ConcurrentMark* _cm; 2217 WorkGang* _workers; 2218 int _active_workers; 2219 2220 public: 2221 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2222 ConcurrentMark* cm, 2223 WorkGang* workers, 2224 int n_workers) : 2225 _g1h(g1h), _cm(cm), 2226 _workers(workers), _active_workers(n_workers) { } 2227 2228 // Executes the given task using concurrent marking worker threads. 2229 virtual void execute(ProcessTask& task); 2230 virtual void execute(EnqueueTask& task); 2231 }; 2232 2233 class G1CMRefProcTaskProxy: public AbstractGangTask { 2234 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2235 ProcessTask& _proc_task; 2236 G1CollectedHeap* _g1h; 2237 ConcurrentMark* _cm; 2238 2239 public: 2240 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2241 G1CollectedHeap* g1h, 2242 ConcurrentMark* cm) : 2243 AbstractGangTask("Process reference objects in parallel"), 2244 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2245 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2246 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2247 } 2248 2249 virtual void work(uint worker_id) { 2250 CMTask* task = _cm->task(worker_id); 2251 G1CMIsAliveClosure g1_is_alive(_g1h); 2252 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2253 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2254 2255 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2256 } 2257 }; 2258 2259 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2260 assert(_workers != NULL, "Need parallel worker threads."); 2261 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2262 2263 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2264 2265 // We need to reset the concurrency level before each 2266 // proxy task execution, so that the termination protocol 2267 // and overflow handling in CMTask::do_marking_step() knows 2268 // how many workers to wait for. 2269 _cm->set_concurrency(_active_workers); 2270 _g1h->set_par_threads(_active_workers); 2271 _workers->run_task(&proc_task_proxy); 2272 _g1h->set_par_threads(0); 2273 } 2274 2275 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2276 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2277 EnqueueTask& _enq_task; 2278 2279 public: 2280 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2281 AbstractGangTask("Enqueue reference objects in parallel"), 2282 _enq_task(enq_task) { } 2283 2284 virtual void work(uint worker_id) { 2285 _enq_task.work(worker_id); 2286 } 2287 }; 2288 2289 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2290 assert(_workers != NULL, "Need parallel worker threads."); 2291 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2292 2293 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2294 2295 // Not strictly necessary but... 2296 // 2297 // We need to reset the concurrency level before each 2298 // proxy task execution, so that the termination protocol 2299 // and overflow handling in CMTask::do_marking_step() knows 2300 // how many workers to wait for. 2301 _cm->set_concurrency(_active_workers); 2302 _g1h->set_par_threads(_active_workers); 2303 _workers->run_task(&enq_task_proxy); 2304 _g1h->set_par_threads(0); 2305 } 2306 2307 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2308 if (has_overflown()) { 2309 // Skip processing the discovered references if we have 2310 // overflown the global marking stack. Reference objects 2311 // only get discovered once so it is OK to not 2312 // de-populate the discovered reference lists. We could have, 2313 // but the only benefit would be that, when marking restarts, 2314 // less reference objects are discovered. 2315 return; 2316 } 2317 2318 ResourceMark rm; 2319 HandleMark hm; 2320 2321 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2322 2323 // Is alive closure. 2324 G1CMIsAliveClosure g1_is_alive(g1h); 2325 2326 // Inner scope to exclude the cleaning of the string and symbol 2327 // tables from the displayed time. 2328 { 2329 if (G1Log::finer()) { 2330 gclog_or_tty->put(' '); 2331 } 2332 GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm()); 2333 2334 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2335 2336 // See the comment in G1CollectedHeap::ref_processing_init() 2337 // about how reference processing currently works in G1. 2338 2339 // Set the soft reference policy 2340 rp->setup_policy(clear_all_soft_refs); 2341 assert(_markStack.isEmpty(), "mark stack should be empty"); 2342 2343 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2344 // in serial reference processing. Note these closures are also 2345 // used for serially processing (by the the current thread) the 2346 // JNI references during parallel reference processing. 2347 // 2348 // These closures do not need to synchronize with the worker 2349 // threads involved in parallel reference processing as these 2350 // instances are executed serially by the current thread (e.g. 2351 // reference processing is not multi-threaded and is thus 2352 // performed by the current thread instead of a gang worker). 2353 // 2354 // The gang tasks involved in parallel reference procssing create 2355 // their own instances of these closures, which do their own 2356 // synchronization among themselves. 2357 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2358 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2359 2360 // We need at least one active thread. If reference processing 2361 // is not multi-threaded we use the current (VMThread) thread, 2362 // otherwise we use the work gang from the G1CollectedHeap and 2363 // we utilize all the worker threads we can. 2364 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL; 2365 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2366 active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U); 2367 2368 // Parallel processing task executor. 2369 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2370 g1h->workers(), active_workers); 2371 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2372 2373 ReferenceProcessorStats stats; 2374 2375 // Set the concurrency level. The phase was already set prior to 2376 // executing the remark task. 2377 set_concurrency(active_workers); 2378 2379 // Set the degree of MT processing here. If the discovery was done MT, 2380 // the number of threads involved during discovery could differ from 2381 // the number of active workers. This is OK as long as the discovered 2382 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2383 rp->set_active_mt_degree(active_workers); 2384 2385 // Process the weak references. 2386 stats = rp->process_discovered_references(&g1_is_alive, 2387 &g1_keep_alive, 2388 &g1_drain_mark_stack, 2389 executor, 2390 g1h->gc_timer_cm()); 2391 2392 // The do_oop work routines of the keep_alive and drain_marking_stack 2393 // oop closures will set the has_overflown flag if we overflow the 2394 // global marking stack. 2395 2396 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2397 2398 assert(_markStack.overflow() || _markStack.isEmpty(), 2399 "mark stack should be empty (unless it overflowed)"); 2400 2401 if (_markStack.overflow()) { 2402 // This should have been done already when we tried to push an 2403 // entry on to the global mark stack. But let's do it again. 2404 set_has_overflown(); 2405 } 2406 2407 assert(rp->num_q() == active_workers, "why not"); 2408 2409 rp->enqueue_discovered_references(executor); 2410 2411 rp->verify_no_references_recorded(); 2412 assert(!rp->discovery_enabled(), "Post condition"); 2413 } 2414 2415 // Now clean up stale oops in StringTable 2416 StringTable::unlink(&g1_is_alive); 2417 // Clean up unreferenced symbols in symbol table. 2418 SymbolTable::unlink(); 2419 } 2420 2421 void ConcurrentMark::swapMarkBitMaps() { 2422 CMBitMapRO* temp = _prevMarkBitMap; 2423 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2424 _nextMarkBitMap = (CMBitMap*) temp; 2425 } 2426 2427 class CMRemarkTask: public AbstractGangTask { 2428 private: 2429 ConcurrentMark* _cm; 2430 bool _is_serial; 2431 public: 2432 void work(uint worker_id) { 2433 // Since all available tasks are actually started, we should 2434 // only proceed if we're supposed to be actived. 2435 if (worker_id < _cm->active_tasks()) { 2436 CMTask* task = _cm->task(worker_id); 2437 task->record_start_time(); 2438 do { 2439 task->do_marking_step(1000000000.0 /* something very large */, 2440 true /* do_termination */, 2441 _is_serial); 2442 } while (task->has_aborted() && !_cm->has_overflown()); 2443 // If we overflow, then we do not want to restart. We instead 2444 // want to abort remark and do concurrent marking again. 2445 task->record_end_time(); 2446 } 2447 } 2448 2449 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) : 2450 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) { 2451 _cm->terminator()->reset_for_reuse(active_workers); 2452 } 2453 }; 2454 2455 void ConcurrentMark::checkpointRootsFinalWork() { 2456 ResourceMark rm; 2457 HandleMark hm; 2458 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2459 2460 g1h->ensure_parsability(false); 2461 2462 if (G1CollectedHeap::use_parallel_gc_threads()) { 2463 G1CollectedHeap::StrongRootsScope srs(g1h); 2464 // this is remark, so we'll use up all active threads 2465 uint active_workers = g1h->workers()->active_workers(); 2466 if (active_workers == 0) { 2467 assert(active_workers > 0, "Should have been set earlier"); 2468 active_workers = (uint) ParallelGCThreads; 2469 g1h->workers()->set_active_workers(active_workers); 2470 } 2471 set_concurrency_and_phase(active_workers, false /* concurrent */); 2472 // Leave _parallel_marking_threads at it's 2473 // value originally calculated in the ConcurrentMark 2474 // constructor and pass values of the active workers 2475 // through the gang in the task. 2476 2477 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */); 2478 // We will start all available threads, even if we decide that the 2479 // active_workers will be fewer. The extra ones will just bail out 2480 // immediately. 2481 g1h->set_par_threads(active_workers); 2482 g1h->workers()->run_task(&remarkTask); 2483 g1h->set_par_threads(0); 2484 } else { 2485 G1CollectedHeap::StrongRootsScope srs(g1h); 2486 uint active_workers = 1; 2487 set_concurrency_and_phase(active_workers, false /* concurrent */); 2488 2489 // Note - if there's no work gang then the VMThread will be 2490 // the thread to execute the remark - serially. We have 2491 // to pass true for the is_serial parameter so that 2492 // CMTask::do_marking_step() doesn't enter the sync 2493 // barriers in the event of an overflow. Doing so will 2494 // cause an assert that the current thread is not a 2495 // concurrent GC thread. 2496 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/); 2497 remarkTask.work(0); 2498 } 2499 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2500 guarantee(has_overflown() || 2501 satb_mq_set.completed_buffers_num() == 0, 2502 err_msg("Invariant: has_overflown = %s, num buffers = %d", 2503 BOOL_TO_STR(has_overflown()), 2504 satb_mq_set.completed_buffers_num())); 2505 2506 print_stats(); 2507 2508 #if VERIFY_OBJS_PROCESSED 2509 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2510 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2511 _scan_obj_cl.objs_processed, 2512 ThreadLocalObjQueue::objs_enqueued); 2513 guarantee(_scan_obj_cl.objs_processed == 2514 ThreadLocalObjQueue::objs_enqueued, 2515 "Different number of objs processed and enqueued."); 2516 } 2517 #endif 2518 } 2519 2520 #ifndef PRODUCT 2521 2522 class PrintReachableOopClosure: public OopClosure { 2523 private: 2524 G1CollectedHeap* _g1h; 2525 outputStream* _out; 2526 VerifyOption _vo; 2527 bool _all; 2528 2529 public: 2530 PrintReachableOopClosure(outputStream* out, 2531 VerifyOption vo, 2532 bool all) : 2533 _g1h(G1CollectedHeap::heap()), 2534 _out(out), _vo(vo), _all(all) { } 2535 2536 void do_oop(narrowOop* p) { do_oop_work(p); } 2537 void do_oop( oop* p) { do_oop_work(p); } 2538 2539 template <class T> void do_oop_work(T* p) { 2540 oop obj = oopDesc::load_decode_heap_oop(p); 2541 const char* str = NULL; 2542 const char* str2 = ""; 2543 2544 if (obj == NULL) { 2545 str = ""; 2546 } else if (!_g1h->is_in_g1_reserved(obj)) { 2547 str = " O"; 2548 } else { 2549 HeapRegion* hr = _g1h->heap_region_containing(obj); 2550 guarantee(hr != NULL, "invariant"); 2551 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2552 bool marked = _g1h->is_marked(obj, _vo); 2553 2554 if (over_tams) { 2555 str = " >"; 2556 if (marked) { 2557 str2 = " AND MARKED"; 2558 } 2559 } else if (marked) { 2560 str = " M"; 2561 } else { 2562 str = " NOT"; 2563 } 2564 } 2565 2566 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2567 p, (void*) obj, str, str2); 2568 } 2569 }; 2570 2571 class PrintReachableObjectClosure : public ObjectClosure { 2572 private: 2573 G1CollectedHeap* _g1h; 2574 outputStream* _out; 2575 VerifyOption _vo; 2576 bool _all; 2577 HeapRegion* _hr; 2578 2579 public: 2580 PrintReachableObjectClosure(outputStream* out, 2581 VerifyOption vo, 2582 bool all, 2583 HeapRegion* hr) : 2584 _g1h(G1CollectedHeap::heap()), 2585 _out(out), _vo(vo), _all(all), _hr(hr) { } 2586 2587 void do_object(oop o) { 2588 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2589 bool marked = _g1h->is_marked(o, _vo); 2590 bool print_it = _all || over_tams || marked; 2591 2592 if (print_it) { 2593 _out->print_cr(" "PTR_FORMAT"%s", 2594 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2595 PrintReachableOopClosure oopCl(_out, _vo, _all); 2596 o->oop_iterate(&oopCl); 2597 } 2598 } 2599 }; 2600 2601 class PrintReachableRegionClosure : public HeapRegionClosure { 2602 private: 2603 G1CollectedHeap* _g1h; 2604 outputStream* _out; 2605 VerifyOption _vo; 2606 bool _all; 2607 2608 public: 2609 bool doHeapRegion(HeapRegion* hr) { 2610 HeapWord* b = hr->bottom(); 2611 HeapWord* e = hr->end(); 2612 HeapWord* t = hr->top(); 2613 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2614 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2615 "TAMS: "PTR_FORMAT, b, e, t, p); 2616 _out->cr(); 2617 2618 HeapWord* from = b; 2619 HeapWord* to = t; 2620 2621 if (to > from) { 2622 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2623 _out->cr(); 2624 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2625 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2626 _out->cr(); 2627 } 2628 2629 return false; 2630 } 2631 2632 PrintReachableRegionClosure(outputStream* out, 2633 VerifyOption vo, 2634 bool all) : 2635 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2636 }; 2637 2638 void ConcurrentMark::print_reachable(const char* str, 2639 VerifyOption vo, 2640 bool all) { 2641 gclog_or_tty->cr(); 2642 gclog_or_tty->print_cr("== Doing heap dump... "); 2643 2644 if (G1PrintReachableBaseFile == NULL) { 2645 gclog_or_tty->print_cr(" #### error: no base file defined"); 2646 return; 2647 } 2648 2649 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2650 (JVM_MAXPATHLEN - 1)) { 2651 gclog_or_tty->print_cr(" #### error: file name too long"); 2652 return; 2653 } 2654 2655 char file_name[JVM_MAXPATHLEN]; 2656 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2657 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2658 2659 fileStream fout(file_name); 2660 if (!fout.is_open()) { 2661 gclog_or_tty->print_cr(" #### error: could not open file"); 2662 return; 2663 } 2664 2665 outputStream* out = &fout; 2666 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2667 out->cr(); 2668 2669 out->print_cr("--- ITERATING OVER REGIONS"); 2670 out->cr(); 2671 PrintReachableRegionClosure rcl(out, vo, all); 2672 _g1h->heap_region_iterate(&rcl); 2673 out->cr(); 2674 2675 gclog_or_tty->print_cr(" done"); 2676 gclog_or_tty->flush(); 2677 } 2678 2679 #endif // PRODUCT 2680 2681 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2682 // Note we are overriding the read-only view of the prev map here, via 2683 // the cast. 2684 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2685 } 2686 2687 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2688 _nextMarkBitMap->clearRange(mr); 2689 } 2690 2691 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2692 clearRangePrevBitmap(mr); 2693 clearRangeNextBitmap(mr); 2694 } 2695 2696 HeapRegion* 2697 ConcurrentMark::claim_region(int task_num) { 2698 // "checkpoint" the finger 2699 HeapWord* finger = _finger; 2700 2701 // _heap_end will not change underneath our feet; it only changes at 2702 // yield points. 2703 while (finger < _heap_end) { 2704 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2705 2706 // Note on how this code handles humongous regions. In the 2707 // normal case the finger will reach the start of a "starts 2708 // humongous" (SH) region. Its end will either be the end of the 2709 // last "continues humongous" (CH) region in the sequence, or the 2710 // standard end of the SH region (if the SH is the only region in 2711 // the sequence). That way claim_region() will skip over the CH 2712 // regions. However, there is a subtle race between a CM thread 2713 // executing this method and a mutator thread doing a humongous 2714 // object allocation. The two are not mutually exclusive as the CM 2715 // thread does not need to hold the Heap_lock when it gets 2716 // here. So there is a chance that claim_region() will come across 2717 // a free region that's in the progress of becoming a SH or a CH 2718 // region. In the former case, it will either 2719 // a) Miss the update to the region's end, in which case it will 2720 // visit every subsequent CH region, will find their bitmaps 2721 // empty, and do nothing, or 2722 // b) Will observe the update of the region's end (in which case 2723 // it will skip the subsequent CH regions). 2724 // If it comes across a region that suddenly becomes CH, the 2725 // scenario will be similar to b). So, the race between 2726 // claim_region() and a humongous object allocation might force us 2727 // to do a bit of unnecessary work (due to some unnecessary bitmap 2728 // iterations) but it should not introduce and correctness issues. 2729 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2730 HeapWord* bottom = curr_region->bottom(); 2731 HeapWord* end = curr_region->end(); 2732 HeapWord* limit = curr_region->next_top_at_mark_start(); 2733 2734 if (verbose_low()) { 2735 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 2736 "["PTR_FORMAT", "PTR_FORMAT"), " 2737 "limit = "PTR_FORMAT, 2738 task_num, curr_region, bottom, end, limit); 2739 } 2740 2741 // Is the gap between reading the finger and doing the CAS too long? 2742 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2743 if (res == finger) { 2744 // we succeeded 2745 2746 // notice that _finger == end cannot be guaranteed here since, 2747 // someone else might have moved the finger even further 2748 assert(_finger >= end, "the finger should have moved forward"); 2749 2750 if (verbose_low()) { 2751 gclog_or_tty->print_cr("[%d] we were successful with region = " 2752 PTR_FORMAT, task_num, curr_region); 2753 } 2754 2755 if (limit > bottom) { 2756 if (verbose_low()) { 2757 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 2758 "returning it ", task_num, curr_region); 2759 } 2760 return curr_region; 2761 } else { 2762 assert(limit == bottom, 2763 "the region limit should be at bottom"); 2764 if (verbose_low()) { 2765 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 2766 "returning NULL", task_num, curr_region); 2767 } 2768 // we return NULL and the caller should try calling 2769 // claim_region() again. 2770 return NULL; 2771 } 2772 } else { 2773 assert(_finger > finger, "the finger should have moved forward"); 2774 if (verbose_low()) { 2775 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 2776 "global finger = "PTR_FORMAT", " 2777 "our finger = "PTR_FORMAT, 2778 task_num, _finger, finger); 2779 } 2780 2781 // read it again 2782 finger = _finger; 2783 } 2784 } 2785 2786 return NULL; 2787 } 2788 2789 #ifndef PRODUCT 2790 enum VerifyNoCSetOopsPhase { 2791 VerifyNoCSetOopsStack, 2792 VerifyNoCSetOopsQueues, 2793 VerifyNoCSetOopsSATBCompleted, 2794 VerifyNoCSetOopsSATBThread 2795 }; 2796 2797 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 2798 private: 2799 G1CollectedHeap* _g1h; 2800 VerifyNoCSetOopsPhase _phase; 2801 int _info; 2802 2803 const char* phase_str() { 2804 switch (_phase) { 2805 case VerifyNoCSetOopsStack: return "Stack"; 2806 case VerifyNoCSetOopsQueues: return "Queue"; 2807 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 2808 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 2809 default: ShouldNotReachHere(); 2810 } 2811 return NULL; 2812 } 2813 2814 void do_object_work(oop obj) { 2815 guarantee(!_g1h->obj_in_cs(obj), 2816 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 2817 (void*) obj, phase_str(), _info)); 2818 } 2819 2820 public: 2821 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 2822 2823 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 2824 _phase = phase; 2825 _info = info; 2826 } 2827 2828 virtual void do_oop(oop* p) { 2829 oop obj = oopDesc::load_decode_heap_oop(p); 2830 do_object_work(obj); 2831 } 2832 2833 virtual void do_oop(narrowOop* p) { 2834 // We should not come across narrow oops while scanning marking 2835 // stacks and SATB buffers. 2836 ShouldNotReachHere(); 2837 } 2838 2839 virtual void do_object(oop obj) { 2840 do_object_work(obj); 2841 } 2842 }; 2843 2844 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 2845 bool verify_enqueued_buffers, 2846 bool verify_thread_buffers, 2847 bool verify_fingers) { 2848 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2849 if (!G1CollectedHeap::heap()->mark_in_progress()) { 2850 return; 2851 } 2852 2853 VerifyNoCSetOopsClosure cl; 2854 2855 if (verify_stacks) { 2856 // Verify entries on the global mark stack 2857 cl.set_phase(VerifyNoCSetOopsStack); 2858 _markStack.oops_do(&cl); 2859 2860 // Verify entries on the task queues 2861 for (int i = 0; i < (int) _max_task_num; i += 1) { 2862 cl.set_phase(VerifyNoCSetOopsQueues, i); 2863 OopTaskQueue* queue = _task_queues->queue(i); 2864 queue->oops_do(&cl); 2865 } 2866 } 2867 2868 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 2869 2870 // Verify entries on the enqueued SATB buffers 2871 if (verify_enqueued_buffers) { 2872 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 2873 satb_qs.iterate_completed_buffers_read_only(&cl); 2874 } 2875 2876 // Verify entries on the per-thread SATB buffers 2877 if (verify_thread_buffers) { 2878 cl.set_phase(VerifyNoCSetOopsSATBThread); 2879 satb_qs.iterate_thread_buffers_read_only(&cl); 2880 } 2881 2882 if (verify_fingers) { 2883 // Verify the global finger 2884 HeapWord* global_finger = finger(); 2885 if (global_finger != NULL && global_finger < _heap_end) { 2886 // The global finger always points to a heap region boundary. We 2887 // use heap_region_containing_raw() to get the containing region 2888 // given that the global finger could be pointing to a free region 2889 // which subsequently becomes continues humongous. If that 2890 // happens, heap_region_containing() will return the bottom of the 2891 // corresponding starts humongous region and the check below will 2892 // not hold any more. 2893 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 2894 guarantee(global_finger == global_hr->bottom(), 2895 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 2896 global_finger, HR_FORMAT_PARAMS(global_hr))); 2897 } 2898 2899 // Verify the task fingers 2900 assert(parallel_marking_threads() <= _max_task_num, "sanity"); 2901 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 2902 CMTask* task = _tasks[i]; 2903 HeapWord* task_finger = task->finger(); 2904 if (task_finger != NULL && task_finger < _heap_end) { 2905 // See above note on the global finger verification. 2906 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 2907 guarantee(task_finger == task_hr->bottom() || 2908 !task_hr->in_collection_set(), 2909 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 2910 task_finger, HR_FORMAT_PARAMS(task_hr))); 2911 } 2912 } 2913 } 2914 } 2915 #endif // PRODUCT 2916 2917 // Aggregate the counting data that was constructed concurrently 2918 // with marking. 2919 class AggregateCountDataHRClosure: public HeapRegionClosure { 2920 G1CollectedHeap* _g1h; 2921 ConcurrentMark* _cm; 2922 CardTableModRefBS* _ct_bs; 2923 BitMap* _cm_card_bm; 2924 size_t _max_task_num; 2925 2926 public: 2927 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2928 BitMap* cm_card_bm, 2929 size_t max_task_num) : 2930 _g1h(g1h), _cm(g1h->concurrent_mark()), 2931 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 2932 _cm_card_bm(cm_card_bm), _max_task_num(max_task_num) { } 2933 2934 bool doHeapRegion(HeapRegion* hr) { 2935 if (hr->continuesHumongous()) { 2936 // We will ignore these here and process them when their 2937 // associated "starts humongous" region is processed. 2938 // Note that we cannot rely on their associated 2939 // "starts humongous" region to have their bit set to 1 2940 // since, due to the region chunking in the parallel region 2941 // iteration, a "continues humongous" region might be visited 2942 // before its associated "starts humongous". 2943 return false; 2944 } 2945 2946 HeapWord* start = hr->bottom(); 2947 HeapWord* limit = hr->next_top_at_mark_start(); 2948 HeapWord* end = hr->end(); 2949 2950 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2951 err_msg("Preconditions not met - " 2952 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 2953 "top: "PTR_FORMAT", end: "PTR_FORMAT, 2954 start, limit, hr->top(), hr->end())); 2955 2956 assert(hr->next_marked_bytes() == 0, "Precondition"); 2957 2958 if (start == limit) { 2959 // NTAMS of this region has not been set so nothing to do. 2960 return false; 2961 } 2962 2963 // 'start' should be in the heap. 2964 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2965 // 'end' *may* be just beyone the end of the heap (if hr is the last region) 2966 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2967 2968 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2969 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2970 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2971 2972 // If ntams is not card aligned then we bump card bitmap index 2973 // for limit so that we get the all the cards spanned by 2974 // the object ending at ntams. 2975 // Note: if this is the last region in the heap then ntams 2976 // could be actually just beyond the end of the the heap; 2977 // limit_idx will then correspond to a (non-existent) card 2978 // that is also outside the heap. 2979 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2980 limit_idx += 1; 2981 } 2982 2983 assert(limit_idx <= end_idx, "or else use atomics"); 2984 2985 // Aggregate the "stripe" in the count data associated with hr. 2986 uint hrs_index = hr->hrs_index(); 2987 size_t marked_bytes = 0; 2988 2989 for (int i = 0; (size_t)i < _max_task_num; i += 1) { 2990 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2991 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2992 2993 // Fetch the marked_bytes in this region for task i and 2994 // add it to the running total for this region. 2995 marked_bytes += marked_bytes_array[hrs_index]; 2996 2997 // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx) 2998 // into the global card bitmap. 2999 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 3000 3001 while (scan_idx < limit_idx) { 3002 assert(task_card_bm->at(scan_idx) == true, "should be"); 3003 _cm_card_bm->set_bit(scan_idx); 3004 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 3005 3006 // BitMap::get_next_one_offset() can handle the case when 3007 // its left_offset parameter is greater than its right_offset 3008 // parameter. It does, however, have an early exit if 3009 // left_offset == right_offset. So let's limit the value 3010 // passed in for left offset here. 3011 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 3012 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 3013 } 3014 } 3015 3016 // Update the marked bytes for this region. 3017 hr->add_to_marked_bytes(marked_bytes); 3018 3019 // Next heap region 3020 return false; 3021 } 3022 }; 3023 3024 class G1AggregateCountDataTask: public AbstractGangTask { 3025 protected: 3026 G1CollectedHeap* _g1h; 3027 ConcurrentMark* _cm; 3028 BitMap* _cm_card_bm; 3029 size_t _max_task_num; 3030 int _active_workers; 3031 3032 public: 3033 G1AggregateCountDataTask(G1CollectedHeap* g1h, 3034 ConcurrentMark* cm, 3035 BitMap* cm_card_bm, 3036 size_t max_task_num, 3037 int n_workers) : 3038 AbstractGangTask("Count Aggregation"), 3039 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 3040 _max_task_num(max_task_num), 3041 _active_workers(n_workers) { } 3042 3043 void work(uint worker_id) { 3044 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_task_num); 3045 3046 if (G1CollectedHeap::use_parallel_gc_threads()) { 3047 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 3048 _active_workers, 3049 HeapRegion::AggregateCountClaimValue); 3050 } else { 3051 _g1h->heap_region_iterate(&cl); 3052 } 3053 } 3054 }; 3055 3056 3057 void ConcurrentMark::aggregate_count_data() { 3058 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3059 _g1h->workers()->active_workers() : 3060 1); 3061 3062 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3063 _max_task_num, n_workers); 3064 3065 if (G1CollectedHeap::use_parallel_gc_threads()) { 3066 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3067 "sanity check"); 3068 _g1h->set_par_threads(n_workers); 3069 _g1h->workers()->run_task(&g1_par_agg_task); 3070 _g1h->set_par_threads(0); 3071 3072 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3073 "sanity check"); 3074 _g1h->reset_heap_region_claim_values(); 3075 } else { 3076 g1_par_agg_task.work(0); 3077 } 3078 } 3079 3080 // Clear the per-worker arrays used to store the per-region counting data 3081 void ConcurrentMark::clear_all_count_data() { 3082 // Clear the global card bitmap - it will be filled during 3083 // liveness count aggregation (during remark) and the 3084 // final counting task. 3085 _card_bm.clear(); 3086 3087 // Clear the global region bitmap - it will be filled as part 3088 // of the final counting task. 3089 _region_bm.clear(); 3090 3091 uint max_regions = _g1h->max_regions(); 3092 assert(_max_task_num != 0, "unitialized"); 3093 3094 for (int i = 0; (size_t) i < _max_task_num; i += 1) { 3095 BitMap* task_card_bm = count_card_bitmap_for(i); 3096 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3097 3098 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3099 assert(marked_bytes_array != NULL, "uninitialized"); 3100 3101 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3102 task_card_bm->clear(); 3103 } 3104 } 3105 3106 void ConcurrentMark::print_stats() { 3107 if (verbose_stats()) { 3108 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3109 for (size_t i = 0; i < _active_tasks; ++i) { 3110 _tasks[i]->print_stats(); 3111 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3112 } 3113 } 3114 } 3115 3116 // abandon current marking iteration due to a Full GC 3117 void ConcurrentMark::abort() { 3118 // Clear all marks to force marking thread to do nothing 3119 _nextMarkBitMap->clearAll(); 3120 // Clear the liveness counting data 3121 clear_all_count_data(); 3122 // Empty mark stack 3123 reset_marking_state(); 3124 for (int i = 0; i < (int)_max_task_num; ++i) { 3125 _tasks[i]->clear_region_fields(); 3126 } 3127 _has_aborted = true; 3128 3129 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3130 satb_mq_set.abandon_partial_marking(); 3131 // This can be called either during or outside marking, we'll read 3132 // the expected_active value from the SATB queue set. 3133 satb_mq_set.set_active_all_threads( 3134 false, /* new active value */ 3135 satb_mq_set.is_active() /* expected_active */); 3136 3137 _g1h->trace_heap_after_concurrent_cycle(); 3138 _g1h->register_concurrent_cycle_end(); 3139 } 3140 3141 static void print_ms_time_info(const char* prefix, const char* name, 3142 NumberSeq& ns) { 3143 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3144 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3145 if (ns.num() > 0) { 3146 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3147 prefix, ns.sd(), ns.maximum()); 3148 } 3149 } 3150 3151 void ConcurrentMark::print_summary_info() { 3152 gclog_or_tty->print_cr(" Concurrent marking:"); 3153 print_ms_time_info(" ", "init marks", _init_times); 3154 print_ms_time_info(" ", "remarks", _remark_times); 3155 { 3156 print_ms_time_info(" ", "final marks", _remark_mark_times); 3157 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3158 3159 } 3160 print_ms_time_info(" ", "cleanups", _cleanup_times); 3161 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3162 _total_counting_time, 3163 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3164 (double)_cleanup_times.num() 3165 : 0.0)); 3166 if (G1ScrubRemSets) { 3167 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3168 _total_rs_scrub_time, 3169 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3170 (double)_cleanup_times.num() 3171 : 0.0)); 3172 } 3173 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3174 (_init_times.sum() + _remark_times.sum() + 3175 _cleanup_times.sum())/1000.0); 3176 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3177 "(%8.2f s marking).", 3178 cmThread()->vtime_accum(), 3179 cmThread()->vtime_mark_accum()); 3180 } 3181 3182 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3183 if (use_parallel_marking_threads()) { 3184 _parallel_workers->print_worker_threads_on(st); 3185 } 3186 } 3187 3188 // We take a break if someone is trying to stop the world. 3189 bool ConcurrentMark::do_yield_check(uint worker_id) { 3190 if (should_yield()) { 3191 if (worker_id == 0) { 3192 _g1h->g1_policy()->record_concurrent_pause(); 3193 } 3194 cmThread()->yield(); 3195 return true; 3196 } else { 3197 return false; 3198 } 3199 } 3200 3201 bool ConcurrentMark::should_yield() { 3202 return cmThread()->should_yield(); 3203 } 3204 3205 bool ConcurrentMark::containing_card_is_marked(void* p) { 3206 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3207 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3208 } 3209 3210 bool ConcurrentMark::containing_cards_are_marked(void* start, 3211 void* last) { 3212 return containing_card_is_marked(start) && 3213 containing_card_is_marked(last); 3214 } 3215 3216 #ifndef PRODUCT 3217 // for debugging purposes 3218 void ConcurrentMark::print_finger() { 3219 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3220 _heap_start, _heap_end, _finger); 3221 for (int i = 0; i < (int) _max_task_num; ++i) { 3222 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 3223 } 3224 gclog_or_tty->print_cr(""); 3225 } 3226 #endif 3227 3228 void CMTask::scan_object(oop obj) { 3229 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3230 3231 if (_cm->verbose_high()) { 3232 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT, 3233 _task_id, (void*) obj); 3234 } 3235 3236 size_t obj_size = obj->size(); 3237 _words_scanned += obj_size; 3238 3239 obj->oop_iterate(_cm_oop_closure); 3240 statsOnly( ++_objs_scanned ); 3241 check_limits(); 3242 } 3243 3244 // Closure for iteration over bitmaps 3245 class CMBitMapClosure : public BitMapClosure { 3246 private: 3247 // the bitmap that is being iterated over 3248 CMBitMap* _nextMarkBitMap; 3249 ConcurrentMark* _cm; 3250 CMTask* _task; 3251 3252 public: 3253 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3254 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3255 3256 bool do_bit(size_t offset) { 3257 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3258 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3259 assert( addr < _cm->finger(), "invariant"); 3260 3261 statsOnly( _task->increase_objs_found_on_bitmap() ); 3262 assert(addr >= _task->finger(), "invariant"); 3263 3264 // We move that task's local finger along. 3265 _task->move_finger_to(addr); 3266 3267 _task->scan_object(oop(addr)); 3268 // we only partially drain the local queue and global stack 3269 _task->drain_local_queue(true); 3270 _task->drain_global_stack(true); 3271 3272 // if the has_aborted flag has been raised, we need to bail out of 3273 // the iteration 3274 return !_task->has_aborted(); 3275 } 3276 }; 3277 3278 // Closure for iterating over objects, currently only used for 3279 // processing SATB buffers. 3280 class CMObjectClosure : public ObjectClosure { 3281 private: 3282 CMTask* _task; 3283 3284 public: 3285 void do_object(oop obj) { 3286 _task->deal_with_reference(obj); 3287 } 3288 3289 CMObjectClosure(CMTask* task) : _task(task) { } 3290 }; 3291 3292 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3293 ConcurrentMark* cm, 3294 CMTask* task) 3295 : _g1h(g1h), _cm(cm), _task(task) { 3296 assert(_ref_processor == NULL, "should be initialized to NULL"); 3297 3298 if (G1UseConcMarkReferenceProcessing) { 3299 _ref_processor = g1h->ref_processor_cm(); 3300 assert(_ref_processor != NULL, "should not be NULL"); 3301 } 3302 } 3303 3304 void CMTask::setup_for_region(HeapRegion* hr) { 3305 // Separated the asserts so that we know which one fires. 3306 assert(hr != NULL, 3307 "claim_region() should have filtered out continues humongous regions"); 3308 assert(!hr->continuesHumongous(), 3309 "claim_region() should have filtered out continues humongous regions"); 3310 3311 if (_cm->verbose_low()) { 3312 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 3313 _task_id, hr); 3314 } 3315 3316 _curr_region = hr; 3317 _finger = hr->bottom(); 3318 update_region_limit(); 3319 } 3320 3321 void CMTask::update_region_limit() { 3322 HeapRegion* hr = _curr_region; 3323 HeapWord* bottom = hr->bottom(); 3324 HeapWord* limit = hr->next_top_at_mark_start(); 3325 3326 if (limit == bottom) { 3327 if (_cm->verbose_low()) { 3328 gclog_or_tty->print_cr("[%d] found an empty region " 3329 "["PTR_FORMAT", "PTR_FORMAT")", 3330 _task_id, bottom, limit); 3331 } 3332 // The region was collected underneath our feet. 3333 // We set the finger to bottom to ensure that the bitmap 3334 // iteration that will follow this will not do anything. 3335 // (this is not a condition that holds when we set the region up, 3336 // as the region is not supposed to be empty in the first place) 3337 _finger = bottom; 3338 } else if (limit >= _region_limit) { 3339 assert(limit >= _finger, "peace of mind"); 3340 } else { 3341 assert(limit < _region_limit, "only way to get here"); 3342 // This can happen under some pretty unusual circumstances. An 3343 // evacuation pause empties the region underneath our feet (NTAMS 3344 // at bottom). We then do some allocation in the region (NTAMS 3345 // stays at bottom), followed by the region being used as a GC 3346 // alloc region (NTAMS will move to top() and the objects 3347 // originally below it will be grayed). All objects now marked in 3348 // the region are explicitly grayed, if below the global finger, 3349 // and we do not need in fact to scan anything else. So, we simply 3350 // set _finger to be limit to ensure that the bitmap iteration 3351 // doesn't do anything. 3352 _finger = limit; 3353 } 3354 3355 _region_limit = limit; 3356 } 3357 3358 void CMTask::giveup_current_region() { 3359 assert(_curr_region != NULL, "invariant"); 3360 if (_cm->verbose_low()) { 3361 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 3362 _task_id, _curr_region); 3363 } 3364 clear_region_fields(); 3365 } 3366 3367 void CMTask::clear_region_fields() { 3368 // Values for these three fields that indicate that we're not 3369 // holding on to a region. 3370 _curr_region = NULL; 3371 _finger = NULL; 3372 _region_limit = NULL; 3373 } 3374 3375 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3376 if (cm_oop_closure == NULL) { 3377 assert(_cm_oop_closure != NULL, "invariant"); 3378 } else { 3379 assert(_cm_oop_closure == NULL, "invariant"); 3380 } 3381 _cm_oop_closure = cm_oop_closure; 3382 } 3383 3384 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3385 guarantee(nextMarkBitMap != NULL, "invariant"); 3386 3387 if (_cm->verbose_low()) { 3388 gclog_or_tty->print_cr("[%d] resetting", _task_id); 3389 } 3390 3391 _nextMarkBitMap = nextMarkBitMap; 3392 clear_region_fields(); 3393 3394 _calls = 0; 3395 _elapsed_time_ms = 0.0; 3396 _termination_time_ms = 0.0; 3397 _termination_start_time_ms = 0.0; 3398 3399 #if _MARKING_STATS_ 3400 _local_pushes = 0; 3401 _local_pops = 0; 3402 _local_max_size = 0; 3403 _objs_scanned = 0; 3404 _global_pushes = 0; 3405 _global_pops = 0; 3406 _global_max_size = 0; 3407 _global_transfers_to = 0; 3408 _global_transfers_from = 0; 3409 _regions_claimed = 0; 3410 _objs_found_on_bitmap = 0; 3411 _satb_buffers_processed = 0; 3412 _steal_attempts = 0; 3413 _steals = 0; 3414 _aborted = 0; 3415 _aborted_overflow = 0; 3416 _aborted_cm_aborted = 0; 3417 _aborted_yield = 0; 3418 _aborted_timed_out = 0; 3419 _aborted_satb = 0; 3420 _aborted_termination = 0; 3421 #endif // _MARKING_STATS_ 3422 } 3423 3424 bool CMTask::should_exit_termination() { 3425 regular_clock_call(); 3426 // This is called when we are in the termination protocol. We should 3427 // quit if, for some reason, this task wants to abort or the global 3428 // stack is not empty (this means that we can get work from it). 3429 return !_cm->mark_stack_empty() || has_aborted(); 3430 } 3431 3432 void CMTask::reached_limit() { 3433 assert(_words_scanned >= _words_scanned_limit || 3434 _refs_reached >= _refs_reached_limit , 3435 "shouldn't have been called otherwise"); 3436 regular_clock_call(); 3437 } 3438 3439 void CMTask::regular_clock_call() { 3440 if (has_aborted()) return; 3441 3442 // First, we need to recalculate the words scanned and refs reached 3443 // limits for the next clock call. 3444 recalculate_limits(); 3445 3446 // During the regular clock call we do the following 3447 3448 // (1) If an overflow has been flagged, then we abort. 3449 if (_cm->has_overflown()) { 3450 set_has_aborted(); 3451 return; 3452 } 3453 3454 // If we are not concurrent (i.e. we're doing remark) we don't need 3455 // to check anything else. The other steps are only needed during 3456 // the concurrent marking phase. 3457 if (!concurrent()) return; 3458 3459 // (2) If marking has been aborted for Full GC, then we also abort. 3460 if (_cm->has_aborted()) { 3461 set_has_aborted(); 3462 statsOnly( ++_aborted_cm_aborted ); 3463 return; 3464 } 3465 3466 double curr_time_ms = os::elapsedVTime() * 1000.0; 3467 3468 // (3) If marking stats are enabled, then we update the step history. 3469 #if _MARKING_STATS_ 3470 if (_words_scanned >= _words_scanned_limit) { 3471 ++_clock_due_to_scanning; 3472 } 3473 if (_refs_reached >= _refs_reached_limit) { 3474 ++_clock_due_to_marking; 3475 } 3476 3477 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3478 _interval_start_time_ms = curr_time_ms; 3479 _all_clock_intervals_ms.add(last_interval_ms); 3480 3481 if (_cm->verbose_medium()) { 3482 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 3483 "scanned = %d%s, refs reached = %d%s", 3484 _task_id, last_interval_ms, 3485 _words_scanned, 3486 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3487 _refs_reached, 3488 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3489 } 3490 #endif // _MARKING_STATS_ 3491 3492 // (4) We check whether we should yield. If we have to, then we abort. 3493 if (_cm->should_yield()) { 3494 // We should yield. To do this we abort the task. The caller is 3495 // responsible for yielding. 3496 set_has_aborted(); 3497 statsOnly( ++_aborted_yield ); 3498 return; 3499 } 3500 3501 // (5) We check whether we've reached our time quota. If we have, 3502 // then we abort. 3503 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3504 if (elapsed_time_ms > _time_target_ms) { 3505 set_has_aborted(); 3506 _has_timed_out = true; 3507 statsOnly( ++_aborted_timed_out ); 3508 return; 3509 } 3510 3511 // (6) Finally, we check whether there are enough completed STAB 3512 // buffers available for processing. If there are, we abort. 3513 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3514 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3515 if (_cm->verbose_low()) { 3516 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 3517 _task_id); 3518 } 3519 // we do need to process SATB buffers, we'll abort and restart 3520 // the marking task to do so 3521 set_has_aborted(); 3522 statsOnly( ++_aborted_satb ); 3523 return; 3524 } 3525 } 3526 3527 void CMTask::recalculate_limits() { 3528 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3529 _words_scanned_limit = _real_words_scanned_limit; 3530 3531 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3532 _refs_reached_limit = _real_refs_reached_limit; 3533 } 3534 3535 void CMTask::decrease_limits() { 3536 // This is called when we believe that we're going to do an infrequent 3537 // operation which will increase the per byte scanned cost (i.e. move 3538 // entries to/from the global stack). It basically tries to decrease the 3539 // scanning limit so that the clock is called earlier. 3540 3541 if (_cm->verbose_medium()) { 3542 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 3543 } 3544 3545 _words_scanned_limit = _real_words_scanned_limit - 3546 3 * words_scanned_period / 4; 3547 _refs_reached_limit = _real_refs_reached_limit - 3548 3 * refs_reached_period / 4; 3549 } 3550 3551 void CMTask::move_entries_to_global_stack() { 3552 // local array where we'll store the entries that will be popped 3553 // from the local queue 3554 oop buffer[global_stack_transfer_size]; 3555 3556 int n = 0; 3557 oop obj; 3558 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3559 buffer[n] = obj; 3560 ++n; 3561 } 3562 3563 if (n > 0) { 3564 // we popped at least one entry from the local queue 3565 3566 statsOnly( ++_global_transfers_to; _local_pops += n ); 3567 3568 if (!_cm->mark_stack_push(buffer, n)) { 3569 if (_cm->verbose_low()) { 3570 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", 3571 _task_id); 3572 } 3573 set_has_aborted(); 3574 } else { 3575 // the transfer was successful 3576 3577 if (_cm->verbose_medium()) { 3578 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 3579 _task_id, n); 3580 } 3581 statsOnly( int tmp_size = _cm->mark_stack_size(); 3582 if (tmp_size > _global_max_size) { 3583 _global_max_size = tmp_size; 3584 } 3585 _global_pushes += n ); 3586 } 3587 } 3588 3589 // this operation was quite expensive, so decrease the limits 3590 decrease_limits(); 3591 } 3592 3593 void CMTask::get_entries_from_global_stack() { 3594 // local array where we'll store the entries that will be popped 3595 // from the global stack. 3596 oop buffer[global_stack_transfer_size]; 3597 int n; 3598 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3599 assert(n <= global_stack_transfer_size, 3600 "we should not pop more than the given limit"); 3601 if (n > 0) { 3602 // yes, we did actually pop at least one entry 3603 3604 statsOnly( ++_global_transfers_from; _global_pops += n ); 3605 if (_cm->verbose_medium()) { 3606 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 3607 _task_id, n); 3608 } 3609 for (int i = 0; i < n; ++i) { 3610 bool success = _task_queue->push(buffer[i]); 3611 // We only call this when the local queue is empty or under a 3612 // given target limit. So, we do not expect this push to fail. 3613 assert(success, "invariant"); 3614 } 3615 3616 statsOnly( int tmp_size = _task_queue->size(); 3617 if (tmp_size > _local_max_size) { 3618 _local_max_size = tmp_size; 3619 } 3620 _local_pushes += n ); 3621 } 3622 3623 // this operation was quite expensive, so decrease the limits 3624 decrease_limits(); 3625 } 3626 3627 void CMTask::drain_local_queue(bool partially) { 3628 if (has_aborted()) return; 3629 3630 // Decide what the target size is, depending whether we're going to 3631 // drain it partially (so that other tasks can steal if they run out 3632 // of things to do) or totally (at the very end). 3633 size_t target_size; 3634 if (partially) { 3635 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3636 } else { 3637 target_size = 0; 3638 } 3639 3640 if (_task_queue->size() > target_size) { 3641 if (_cm->verbose_high()) { 3642 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 3643 _task_id, target_size); 3644 } 3645 3646 oop obj; 3647 bool ret = _task_queue->pop_local(obj); 3648 while (ret) { 3649 statsOnly( ++_local_pops ); 3650 3651 if (_cm->verbose_high()) { 3652 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 3653 (void*) obj); 3654 } 3655 3656 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3657 assert(!_g1h->is_on_master_free_list( 3658 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3659 3660 scan_object(obj); 3661 3662 if (_task_queue->size() <= target_size || has_aborted()) { 3663 ret = false; 3664 } else { 3665 ret = _task_queue->pop_local(obj); 3666 } 3667 } 3668 3669 if (_cm->verbose_high()) { 3670 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 3671 _task_id, _task_queue->size()); 3672 } 3673 } 3674 } 3675 3676 void CMTask::drain_global_stack(bool partially) { 3677 if (has_aborted()) return; 3678 3679 // We have a policy to drain the local queue before we attempt to 3680 // drain the global stack. 3681 assert(partially || _task_queue->size() == 0, "invariant"); 3682 3683 // Decide what the target size is, depending whether we're going to 3684 // drain it partially (so that other tasks can steal if they run out 3685 // of things to do) or totally (at the very end). Notice that, 3686 // because we move entries from the global stack in chunks or 3687 // because another task might be doing the same, we might in fact 3688 // drop below the target. But, this is not a problem. 3689 size_t target_size; 3690 if (partially) { 3691 target_size = _cm->partial_mark_stack_size_target(); 3692 } else { 3693 target_size = 0; 3694 } 3695 3696 if (_cm->mark_stack_size() > target_size) { 3697 if (_cm->verbose_low()) { 3698 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 3699 _task_id, target_size); 3700 } 3701 3702 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3703 get_entries_from_global_stack(); 3704 drain_local_queue(partially); 3705 } 3706 3707 if (_cm->verbose_low()) { 3708 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 3709 _task_id, _cm->mark_stack_size()); 3710 } 3711 } 3712 } 3713 3714 // SATB Queue has several assumptions on whether to call the par or 3715 // non-par versions of the methods. this is why some of the code is 3716 // replicated. We should really get rid of the single-threaded version 3717 // of the code to simplify things. 3718 void CMTask::drain_satb_buffers() { 3719 if (has_aborted()) return; 3720 3721 // We set this so that the regular clock knows that we're in the 3722 // middle of draining buffers and doesn't set the abort flag when it 3723 // notices that SATB buffers are available for draining. It'd be 3724 // very counter productive if it did that. :-) 3725 _draining_satb_buffers = true; 3726 3727 CMObjectClosure oc(this); 3728 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3729 if (G1CollectedHeap::use_parallel_gc_threads()) { 3730 satb_mq_set.set_par_closure(_task_id, &oc); 3731 } else { 3732 satb_mq_set.set_closure(&oc); 3733 } 3734 3735 // This keeps claiming and applying the closure to completed buffers 3736 // until we run out of buffers or we need to abort. 3737 if (G1CollectedHeap::use_parallel_gc_threads()) { 3738 while (!has_aborted() && 3739 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 3740 if (_cm->verbose_medium()) { 3741 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3742 } 3743 statsOnly( ++_satb_buffers_processed ); 3744 regular_clock_call(); 3745 } 3746 } else { 3747 while (!has_aborted() && 3748 satb_mq_set.apply_closure_to_completed_buffer()) { 3749 if (_cm->verbose_medium()) { 3750 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3751 } 3752 statsOnly( ++_satb_buffers_processed ); 3753 regular_clock_call(); 3754 } 3755 } 3756 3757 if (!concurrent() && !has_aborted()) { 3758 // We should only do this during remark. 3759 if (G1CollectedHeap::use_parallel_gc_threads()) { 3760 satb_mq_set.par_iterate_closure_all_threads(_task_id); 3761 } else { 3762 satb_mq_set.iterate_closure_all_threads(); 3763 } 3764 } 3765 3766 _draining_satb_buffers = false; 3767 3768 assert(has_aborted() || 3769 concurrent() || 3770 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3771 3772 if (G1CollectedHeap::use_parallel_gc_threads()) { 3773 satb_mq_set.set_par_closure(_task_id, NULL); 3774 } else { 3775 satb_mq_set.set_closure(NULL); 3776 } 3777 3778 // again, this was a potentially expensive operation, decrease the 3779 // limits to get the regular clock call early 3780 decrease_limits(); 3781 } 3782 3783 void CMTask::print_stats() { 3784 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 3785 _task_id, _calls); 3786 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3787 _elapsed_time_ms, _termination_time_ms); 3788 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3789 _step_times_ms.num(), _step_times_ms.avg(), 3790 _step_times_ms.sd()); 3791 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3792 _step_times_ms.maximum(), _step_times_ms.sum()); 3793 3794 #if _MARKING_STATS_ 3795 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3796 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3797 _all_clock_intervals_ms.sd()); 3798 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3799 _all_clock_intervals_ms.maximum(), 3800 _all_clock_intervals_ms.sum()); 3801 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3802 _clock_due_to_scanning, _clock_due_to_marking); 3803 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3804 _objs_scanned, _objs_found_on_bitmap); 3805 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3806 _local_pushes, _local_pops, _local_max_size); 3807 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3808 _global_pushes, _global_pops, _global_max_size); 3809 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3810 _global_transfers_to,_global_transfers_from); 3811 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 3812 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3813 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3814 _steal_attempts, _steals); 3815 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3816 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3817 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3818 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3819 _aborted_timed_out, _aborted_satb, _aborted_termination); 3820 #endif // _MARKING_STATS_ 3821 } 3822 3823 /***************************************************************************** 3824 3825 The do_marking_step(time_target_ms, ...) method is the building 3826 block of the parallel marking framework. It can be called in parallel 3827 with other invocations of do_marking_step() on different tasks 3828 (but only one per task, obviously) and concurrently with the 3829 mutator threads, or during remark, hence it eliminates the need 3830 for two versions of the code. When called during remark, it will 3831 pick up from where the task left off during the concurrent marking 3832 phase. Interestingly, tasks are also claimable during evacuation 3833 pauses too, since do_marking_step() ensures that it aborts before 3834 it needs to yield. 3835 3836 The data structures that it uses to do marking work are the 3837 following: 3838 3839 (1) Marking Bitmap. If there are gray objects that appear only 3840 on the bitmap (this happens either when dealing with an overflow 3841 or when the initial marking phase has simply marked the roots 3842 and didn't push them on the stack), then tasks claim heap 3843 regions whose bitmap they then scan to find gray objects. A 3844 global finger indicates where the end of the last claimed region 3845 is. A local finger indicates how far into the region a task has 3846 scanned. The two fingers are used to determine how to gray an 3847 object (i.e. whether simply marking it is OK, as it will be 3848 visited by a task in the future, or whether it needs to be also 3849 pushed on a stack). 3850 3851 (2) Local Queue. The local queue of the task which is accessed 3852 reasonably efficiently by the task. Other tasks can steal from 3853 it when they run out of work. Throughout the marking phase, a 3854 task attempts to keep its local queue short but not totally 3855 empty, so that entries are available for stealing by other 3856 tasks. Only when there is no more work, a task will totally 3857 drain its local queue. 3858 3859 (3) Global Mark Stack. This handles local queue overflow. During 3860 marking only sets of entries are moved between it and the local 3861 queues, as access to it requires a mutex and more fine-grain 3862 interaction with it which might cause contention. If it 3863 overflows, then the marking phase should restart and iterate 3864 over the bitmap to identify gray objects. Throughout the marking 3865 phase, tasks attempt to keep the global mark stack at a small 3866 length but not totally empty, so that entries are available for 3867 popping by other tasks. Only when there is no more work, tasks 3868 will totally drain the global mark stack. 3869 3870 (4) SATB Buffer Queue. This is where completed SATB buffers are 3871 made available. Buffers are regularly removed from this queue 3872 and scanned for roots, so that the queue doesn't get too 3873 long. During remark, all completed buffers are processed, as 3874 well as the filled in parts of any uncompleted buffers. 3875 3876 The do_marking_step() method tries to abort when the time target 3877 has been reached. There are a few other cases when the 3878 do_marking_step() method also aborts: 3879 3880 (1) When the marking phase has been aborted (after a Full GC). 3881 3882 (2) When a global overflow (on the global stack) has been 3883 triggered. Before the task aborts, it will actually sync up with 3884 the other tasks to ensure that all the marking data structures 3885 (local queues, stacks, fingers etc.) are re-initialized so that 3886 when do_marking_step() completes, the marking phase can 3887 immediately restart. 3888 3889 (3) When enough completed SATB buffers are available. The 3890 do_marking_step() method only tries to drain SATB buffers right 3891 at the beginning. So, if enough buffers are available, the 3892 marking step aborts and the SATB buffers are processed at 3893 the beginning of the next invocation. 3894 3895 (4) To yield. when we have to yield then we abort and yield 3896 right at the end of do_marking_step(). This saves us from a lot 3897 of hassle as, by yielding we might allow a Full GC. If this 3898 happens then objects will be compacted underneath our feet, the 3899 heap might shrink, etc. We save checking for this by just 3900 aborting and doing the yield right at the end. 3901 3902 From the above it follows that the do_marking_step() method should 3903 be called in a loop (or, otherwise, regularly) until it completes. 3904 3905 If a marking step completes without its has_aborted() flag being 3906 true, it means it has completed the current marking phase (and 3907 also all other marking tasks have done so and have all synced up). 3908 3909 A method called regular_clock_call() is invoked "regularly" (in 3910 sub ms intervals) throughout marking. It is this clock method that 3911 checks all the abort conditions which were mentioned above and 3912 decides when the task should abort. A work-based scheme is used to 3913 trigger this clock method: when the number of object words the 3914 marking phase has scanned or the number of references the marking 3915 phase has visited reach a given limit. Additional invocations to 3916 the method clock have been planted in a few other strategic places 3917 too. The initial reason for the clock method was to avoid calling 3918 vtime too regularly, as it is quite expensive. So, once it was in 3919 place, it was natural to piggy-back all the other conditions on it 3920 too and not constantly check them throughout the code. 3921 3922 If do_termination is true then do_marking_step will enter its 3923 termination protocol. 3924 3925 The value of is_serial must be true when do_marking_step is being 3926 called serially (i.e. by the VMThread) and do_marking_step should 3927 skip any synchronization in the termination and overflow code. 3928 Examples include the serial remark code and the serial reference 3929 processing closures. 3930 3931 The value of is_serial must be false when do_marking_step is 3932 being called by any of the worker threads in a work gang. 3933 Examples include the concurrent marking code (CMMarkingTask), 3934 the MT remark code, and the MT reference processing closures. 3935 3936 *****************************************************************************/ 3937 3938 void CMTask::do_marking_step(double time_target_ms, 3939 bool do_termination, 3940 bool is_serial) { 3941 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3942 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3943 3944 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3945 assert(_task_queues != NULL, "invariant"); 3946 assert(_task_queue != NULL, "invariant"); 3947 assert(_task_queues->queue(_task_id) == _task_queue, "invariant"); 3948 3949 assert(!_claimed, 3950 "only one thread should claim this task at any one time"); 3951 3952 // OK, this doesn't safeguard again all possible scenarios, as it is 3953 // possible for two threads to set the _claimed flag at the same 3954 // time. But it is only for debugging purposes anyway and it will 3955 // catch most problems. 3956 _claimed = true; 3957 3958 _start_time_ms = os::elapsedVTime() * 1000.0; 3959 statsOnly( _interval_start_time_ms = _start_time_ms ); 3960 3961 // If do_stealing is true then do_marking_step will attempt to 3962 // steal work from the other CMTasks. It only makes sense to 3963 // enable stealing when the termination protocol is enabled 3964 // and do_marking_step() is not being called serially. 3965 bool do_stealing = do_termination && !is_serial; 3966 3967 double diff_prediction_ms = 3968 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 3969 _time_target_ms = time_target_ms - diff_prediction_ms; 3970 3971 // set up the variables that are used in the work-based scheme to 3972 // call the regular clock method 3973 _words_scanned = 0; 3974 _refs_reached = 0; 3975 recalculate_limits(); 3976 3977 // clear all flags 3978 clear_has_aborted(); 3979 _has_timed_out = false; 3980 _draining_satb_buffers = false; 3981 3982 ++_calls; 3983 3984 if (_cm->verbose_low()) { 3985 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 3986 "target = %1.2lfms >>>>>>>>>>", 3987 _task_id, _calls, _time_target_ms); 3988 } 3989 3990 // Set up the bitmap and oop closures. Anything that uses them is 3991 // eventually called from this method, so it is OK to allocate these 3992 // statically. 3993 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3994 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3995 set_cm_oop_closure(&cm_oop_closure); 3996 3997 if (_cm->has_overflown()) { 3998 // This can happen if the mark stack overflows during a GC pause 3999 // and this task, after a yield point, restarts. We have to abort 4000 // as we need to get into the overflow protocol which happens 4001 // right at the end of this task. 4002 set_has_aborted(); 4003 } 4004 4005 // First drain any available SATB buffers. After this, we will not 4006 // look at SATB buffers before the next invocation of this method. 4007 // If enough completed SATB buffers are queued up, the regular clock 4008 // will abort this task so that it restarts. 4009 drain_satb_buffers(); 4010 // ...then partially drain the local queue and the global stack 4011 drain_local_queue(true); 4012 drain_global_stack(true); 4013 4014 do { 4015 if (!has_aborted() && _curr_region != NULL) { 4016 // This means that we're already holding on to a region. 4017 assert(_finger != NULL, "if region is not NULL, then the finger " 4018 "should not be NULL either"); 4019 4020 // We might have restarted this task after an evacuation pause 4021 // which might have evacuated the region we're holding on to 4022 // underneath our feet. Let's read its limit again to make sure 4023 // that we do not iterate over a region of the heap that 4024 // contains garbage (update_region_limit() will also move 4025 // _finger to the start of the region if it is found empty). 4026 update_region_limit(); 4027 // We will start from _finger not from the start of the region, 4028 // as we might be restarting this task after aborting half-way 4029 // through scanning this region. In this case, _finger points to 4030 // the address where we last found a marked object. If this is a 4031 // fresh region, _finger points to start(). 4032 MemRegion mr = MemRegion(_finger, _region_limit); 4033 4034 if (_cm->verbose_low()) { 4035 gclog_or_tty->print_cr("[%d] we're scanning part " 4036 "["PTR_FORMAT", "PTR_FORMAT") " 4037 "of region "PTR_FORMAT, 4038 _task_id, _finger, _region_limit, _curr_region); 4039 } 4040 4041 // Let's iterate over the bitmap of the part of the 4042 // region that is left. 4043 if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4044 // We successfully completed iterating over the region. Now, 4045 // let's give up the region. 4046 giveup_current_region(); 4047 regular_clock_call(); 4048 } else { 4049 assert(has_aborted(), "currently the only way to do so"); 4050 // The only way to abort the bitmap iteration is to return 4051 // false from the do_bit() method. However, inside the 4052 // do_bit() method we move the _finger to point to the 4053 // object currently being looked at. So, if we bail out, we 4054 // have definitely set _finger to something non-null. 4055 assert(_finger != NULL, "invariant"); 4056 4057 // Region iteration was actually aborted. So now _finger 4058 // points to the address of the object we last scanned. If we 4059 // leave it there, when we restart this task, we will rescan 4060 // the object. It is easy to avoid this. We move the finger by 4061 // enough to point to the next possible object header (the 4062 // bitmap knows by how much we need to move it as it knows its 4063 // granularity). 4064 assert(_finger < _region_limit, "invariant"); 4065 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 4066 // Check if bitmap iteration was aborted while scanning the last object 4067 if (new_finger >= _region_limit) { 4068 giveup_current_region(); 4069 } else { 4070 move_finger_to(new_finger); 4071 } 4072 } 4073 } 4074 // At this point we have either completed iterating over the 4075 // region we were holding on to, or we have aborted. 4076 4077 // We then partially drain the local queue and the global stack. 4078 // (Do we really need this?) 4079 drain_local_queue(true); 4080 drain_global_stack(true); 4081 4082 // Read the note on the claim_region() method on why it might 4083 // return NULL with potentially more regions available for 4084 // claiming and why we have to check out_of_regions() to determine 4085 // whether we're done or not. 4086 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4087 // We are going to try to claim a new region. We should have 4088 // given up on the previous one. 4089 // Separated the asserts so that we know which one fires. 4090 assert(_curr_region == NULL, "invariant"); 4091 assert(_finger == NULL, "invariant"); 4092 assert(_region_limit == NULL, "invariant"); 4093 if (_cm->verbose_low()) { 4094 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 4095 } 4096 HeapRegion* claimed_region = _cm->claim_region(_task_id); 4097 if (claimed_region != NULL) { 4098 // Yes, we managed to claim one 4099 statsOnly( ++_regions_claimed ); 4100 4101 if (_cm->verbose_low()) { 4102 gclog_or_tty->print_cr("[%d] we successfully claimed " 4103 "region "PTR_FORMAT, 4104 _task_id, claimed_region); 4105 } 4106 4107 setup_for_region(claimed_region); 4108 assert(_curr_region == claimed_region, "invariant"); 4109 } 4110 // It is important to call the regular clock here. It might take 4111 // a while to claim a region if, for example, we hit a large 4112 // block of empty regions. So we need to call the regular clock 4113 // method once round the loop to make sure it's called 4114 // frequently enough. 4115 regular_clock_call(); 4116 } 4117 4118 if (!has_aborted() && _curr_region == NULL) { 4119 assert(_cm->out_of_regions(), 4120 "at this point we should be out of regions"); 4121 } 4122 } while ( _curr_region != NULL && !has_aborted()); 4123 4124 if (!has_aborted()) { 4125 // We cannot check whether the global stack is empty, since other 4126 // tasks might be pushing objects to it concurrently. 4127 assert(_cm->out_of_regions(), 4128 "at this point we should be out of regions"); 4129 4130 if (_cm->verbose_low()) { 4131 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 4132 } 4133 4134 // Try to reduce the number of available SATB buffers so that 4135 // remark has less work to do. 4136 drain_satb_buffers(); 4137 } 4138 4139 // Since we've done everything else, we can now totally drain the 4140 // local queue and global stack. 4141 drain_local_queue(false); 4142 drain_global_stack(false); 4143 4144 // Attempt at work stealing from other task's queues. 4145 if (do_stealing && !has_aborted()) { 4146 // We have not aborted. This means that we have finished all that 4147 // we could. Let's try to do some stealing... 4148 4149 // We cannot check whether the global stack is empty, since other 4150 // tasks might be pushing objects to it concurrently. 4151 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4152 "only way to reach here"); 4153 4154 if (_cm->verbose_low()) { 4155 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 4156 } 4157 4158 while (!has_aborted()) { 4159 oop obj; 4160 statsOnly( ++_steal_attempts ); 4161 4162 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 4163 if (_cm->verbose_medium()) { 4164 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 4165 _task_id, (void*) obj); 4166 } 4167 4168 statsOnly( ++_steals ); 4169 4170 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4171 "any stolen object should be marked"); 4172 scan_object(obj); 4173 4174 // And since we're towards the end, let's totally drain the 4175 // local queue and global stack. 4176 drain_local_queue(false); 4177 drain_global_stack(false); 4178 } else { 4179 break; 4180 } 4181 } 4182 } 4183 4184 // If we are about to wrap up and go into termination, check if we 4185 // should raise the overflow flag. 4186 if (do_termination && !has_aborted()) { 4187 if (_cm->force_overflow()->should_force()) { 4188 _cm->set_has_overflown(); 4189 regular_clock_call(); 4190 } 4191 } 4192 4193 // We still haven't aborted. Now, let's try to get into the 4194 // termination protocol. 4195 if (do_termination && !has_aborted()) { 4196 // We cannot check whether the global stack is empty, since other 4197 // tasks might be concurrently pushing objects on it. 4198 // Separated the asserts so that we know which one fires. 4199 assert(_cm->out_of_regions(), "only way to reach here"); 4200 assert(_task_queue->size() == 0, "only way to reach here"); 4201 4202 if (_cm->verbose_low()) { 4203 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 4204 } 4205 4206 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4207 4208 // The CMTask class also extends the TerminatorTerminator class, 4209 // hence its should_exit_termination() method will also decide 4210 // whether to exit the termination protocol or not. 4211 bool finished = (is_serial || 4212 _cm->terminator()->offer_termination(this)); 4213 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4214 _termination_time_ms += 4215 termination_end_time_ms - _termination_start_time_ms; 4216 4217 if (finished) { 4218 // We're all done. 4219 4220 if (_task_id == 0) { 4221 // let's allow task 0 to do this 4222 if (concurrent()) { 4223 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4224 // we need to set this to false before the next 4225 // safepoint. This way we ensure that the marking phase 4226 // doesn't observe any more heap expansions. 4227 _cm->clear_concurrent_marking_in_progress(); 4228 } 4229 } 4230 4231 // We can now guarantee that the global stack is empty, since 4232 // all other tasks have finished. We separated the guarantees so 4233 // that, if a condition is false, we can immediately find out 4234 // which one. 4235 guarantee(_cm->out_of_regions(), "only way to reach here"); 4236 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4237 guarantee(_task_queue->size() == 0, "only way to reach here"); 4238 guarantee(!_cm->has_overflown(), "only way to reach here"); 4239 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4240 4241 if (_cm->verbose_low()) { 4242 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 4243 } 4244 } else { 4245 // Apparently there's more work to do. Let's abort this task. It 4246 // will restart it and we can hopefully find more things to do. 4247 4248 if (_cm->verbose_low()) { 4249 gclog_or_tty->print_cr("[%d] apparently there is more work to do", 4250 _task_id); 4251 } 4252 4253 set_has_aborted(); 4254 statsOnly( ++_aborted_termination ); 4255 } 4256 } 4257 4258 // Mainly for debugging purposes to make sure that a pointer to the 4259 // closure which was statically allocated in this frame doesn't 4260 // escape it by accident. 4261 set_cm_oop_closure(NULL); 4262 double end_time_ms = os::elapsedVTime() * 1000.0; 4263 double elapsed_time_ms = end_time_ms - _start_time_ms; 4264 // Update the step history. 4265 _step_times_ms.add(elapsed_time_ms); 4266 4267 if (has_aborted()) { 4268 // The task was aborted for some reason. 4269 4270 statsOnly( ++_aborted ); 4271 4272 if (_has_timed_out) { 4273 double diff_ms = elapsed_time_ms - _time_target_ms; 4274 // Keep statistics of how well we did with respect to hitting 4275 // our target only if we actually timed out (if we aborted for 4276 // other reasons, then the results might get skewed). 4277 _marking_step_diffs_ms.add(diff_ms); 4278 } 4279 4280 if (_cm->has_overflown()) { 4281 // This is the interesting one. We aborted because a global 4282 // overflow was raised. This means we have to restart the 4283 // marking phase and start iterating over regions. However, in 4284 // order to do this we have to make sure that all tasks stop 4285 // what they are doing and re-initialise in a safe manner. We 4286 // will achieve this with the use of two barrier sync points. 4287 4288 if (_cm->verbose_low()) { 4289 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 4290 } 4291 4292 if (!is_serial) { 4293 // We only need to enter the sync barrier if being called 4294 // from a parallel context 4295 _cm->enter_first_sync_barrier(_task_id); 4296 4297 // When we exit this sync barrier we know that all tasks have 4298 // stopped doing marking work. So, it's now safe to 4299 // re-initialise our data structures. At the end of this method, 4300 // task 0 will clear the global data structures. 4301 } 4302 4303 statsOnly( ++_aborted_overflow ); 4304 4305 // We clear the local state of this task... 4306 clear_region_fields(); 4307 4308 if (!is_serial) { 4309 // ...and enter the second barrier. 4310 _cm->enter_second_sync_barrier(_task_id); 4311 } 4312 // At this point, if we're during the concurrent phase of 4313 // marking, everything has been re-initialized and we're 4314 // ready to restart. 4315 } 4316 4317 if (_cm->verbose_low()) { 4318 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4319 "elapsed = %1.2lfms <<<<<<<<<<", 4320 _task_id, _time_target_ms, elapsed_time_ms); 4321 if (_cm->has_aborted()) { 4322 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 4323 _task_id); 4324 } 4325 } 4326 } else { 4327 if (_cm->verbose_low()) { 4328 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4329 "elapsed = %1.2lfms <<<<<<<<<<", 4330 _task_id, _time_target_ms, elapsed_time_ms); 4331 } 4332 } 4333 4334 _claimed = false; 4335 } 4336 4337 CMTask::CMTask(int task_id, 4338 ConcurrentMark* cm, 4339 size_t* marked_bytes, 4340 BitMap* card_bm, 4341 CMTaskQueue* task_queue, 4342 CMTaskQueueSet* task_queues) 4343 : _g1h(G1CollectedHeap::heap()), 4344 _task_id(task_id), _cm(cm), 4345 _claimed(false), 4346 _nextMarkBitMap(NULL), _hash_seed(17), 4347 _task_queue(task_queue), 4348 _task_queues(task_queues), 4349 _cm_oop_closure(NULL), 4350 _marked_bytes_array(marked_bytes), 4351 _card_bm(card_bm) { 4352 guarantee(task_queue != NULL, "invariant"); 4353 guarantee(task_queues != NULL, "invariant"); 4354 4355 statsOnly( _clock_due_to_scanning = 0; 4356 _clock_due_to_marking = 0 ); 4357 4358 _marking_step_diffs_ms.add(0.5); 4359 } 4360 4361 // These are formatting macros that are used below to ensure 4362 // consistent formatting. The *_H_* versions are used to format the 4363 // header for a particular value and they should be kept consistent 4364 // with the corresponding macro. Also note that most of the macros add 4365 // the necessary white space (as a prefix) which makes them a bit 4366 // easier to compose. 4367 4368 // All the output lines are prefixed with this string to be able to 4369 // identify them easily in a large log file. 4370 #define G1PPRL_LINE_PREFIX "###" 4371 4372 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4373 #ifdef _LP64 4374 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4375 #else // _LP64 4376 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4377 #endif // _LP64 4378 4379 // For per-region info 4380 #define G1PPRL_TYPE_FORMAT " %-4s" 4381 #define G1PPRL_TYPE_H_FORMAT " %4s" 4382 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4383 #define G1PPRL_BYTE_H_FORMAT " %9s" 4384 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4385 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4386 4387 // For summary info 4388 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4389 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4390 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4391 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4392 4393 G1PrintRegionLivenessInfoClosure:: 4394 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4395 : _out(out), 4396 _total_used_bytes(0), _total_capacity_bytes(0), 4397 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4398 _hum_used_bytes(0), _hum_capacity_bytes(0), 4399 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) { 4400 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4401 MemRegion g1_committed = g1h->g1_committed(); 4402 MemRegion g1_reserved = g1h->g1_reserved(); 4403 double now = os::elapsedTime(); 4404 4405 // Print the header of the output. 4406 _out->cr(); 4407 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4408 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4409 G1PPRL_SUM_ADDR_FORMAT("committed") 4410 G1PPRL_SUM_ADDR_FORMAT("reserved") 4411 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4412 g1_committed.start(), g1_committed.end(), 4413 g1_reserved.start(), g1_reserved.end(), 4414 HeapRegion::GrainBytes); 4415 _out->print_cr(G1PPRL_LINE_PREFIX); 4416 _out->print_cr(G1PPRL_LINE_PREFIX 4417 G1PPRL_TYPE_H_FORMAT 4418 G1PPRL_ADDR_BASE_H_FORMAT 4419 G1PPRL_BYTE_H_FORMAT 4420 G1PPRL_BYTE_H_FORMAT 4421 G1PPRL_BYTE_H_FORMAT 4422 G1PPRL_DOUBLE_H_FORMAT, 4423 "type", "address-range", 4424 "used", "prev-live", "next-live", "gc-eff"); 4425 _out->print_cr(G1PPRL_LINE_PREFIX 4426 G1PPRL_TYPE_H_FORMAT 4427 G1PPRL_ADDR_BASE_H_FORMAT 4428 G1PPRL_BYTE_H_FORMAT 4429 G1PPRL_BYTE_H_FORMAT 4430 G1PPRL_BYTE_H_FORMAT 4431 G1PPRL_DOUBLE_H_FORMAT, 4432 "", "", 4433 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)"); 4434 } 4435 4436 // It takes as a parameter a reference to one of the _hum_* fields, it 4437 // deduces the corresponding value for a region in a humongous region 4438 // series (either the region size, or what's left if the _hum_* field 4439 // is < the region size), and updates the _hum_* field accordingly. 4440 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4441 size_t bytes = 0; 4442 // The > 0 check is to deal with the prev and next live bytes which 4443 // could be 0. 4444 if (*hum_bytes > 0) { 4445 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4446 *hum_bytes -= bytes; 4447 } 4448 return bytes; 4449 } 4450 4451 // It deduces the values for a region in a humongous region series 4452 // from the _hum_* fields and updates those accordingly. It assumes 4453 // that that _hum_* fields have already been set up from the "starts 4454 // humongous" region and we visit the regions in address order. 4455 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4456 size_t* capacity_bytes, 4457 size_t* prev_live_bytes, 4458 size_t* next_live_bytes) { 4459 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4460 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4461 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4462 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4463 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4464 } 4465 4466 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4467 const char* type = ""; 4468 HeapWord* bottom = r->bottom(); 4469 HeapWord* end = r->end(); 4470 size_t capacity_bytes = r->capacity(); 4471 size_t used_bytes = r->used(); 4472 size_t prev_live_bytes = r->live_bytes(); 4473 size_t next_live_bytes = r->next_live_bytes(); 4474 double gc_eff = r->gc_efficiency(); 4475 if (r->used() == 0) { 4476 type = "FREE"; 4477 } else if (r->is_survivor()) { 4478 type = "SURV"; 4479 } else if (r->is_young()) { 4480 type = "EDEN"; 4481 } else if (r->startsHumongous()) { 4482 type = "HUMS"; 4483 4484 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4485 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4486 "they should have been zeroed after the last time we used them"); 4487 // Set up the _hum_* fields. 4488 _hum_capacity_bytes = capacity_bytes; 4489 _hum_used_bytes = used_bytes; 4490 _hum_prev_live_bytes = prev_live_bytes; 4491 _hum_next_live_bytes = next_live_bytes; 4492 get_hum_bytes(&used_bytes, &capacity_bytes, 4493 &prev_live_bytes, &next_live_bytes); 4494 end = bottom + HeapRegion::GrainWords; 4495 } else if (r->continuesHumongous()) { 4496 type = "HUMC"; 4497 get_hum_bytes(&used_bytes, &capacity_bytes, 4498 &prev_live_bytes, &next_live_bytes); 4499 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4500 } else { 4501 type = "OLD"; 4502 } 4503 4504 _total_used_bytes += used_bytes; 4505 _total_capacity_bytes += capacity_bytes; 4506 _total_prev_live_bytes += prev_live_bytes; 4507 _total_next_live_bytes += next_live_bytes; 4508 4509 // Print a line for this particular region. 4510 _out->print_cr(G1PPRL_LINE_PREFIX 4511 G1PPRL_TYPE_FORMAT 4512 G1PPRL_ADDR_BASE_FORMAT 4513 G1PPRL_BYTE_FORMAT 4514 G1PPRL_BYTE_FORMAT 4515 G1PPRL_BYTE_FORMAT 4516 G1PPRL_DOUBLE_FORMAT, 4517 type, bottom, end, 4518 used_bytes, prev_live_bytes, next_live_bytes, gc_eff); 4519 4520 return false; 4521 } 4522 4523 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4524 // Print the footer of the output. 4525 _out->print_cr(G1PPRL_LINE_PREFIX); 4526 _out->print_cr(G1PPRL_LINE_PREFIX 4527 " SUMMARY" 4528 G1PPRL_SUM_MB_FORMAT("capacity") 4529 G1PPRL_SUM_MB_PERC_FORMAT("used") 4530 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4531 G1PPRL_SUM_MB_PERC_FORMAT("next-live"), 4532 bytes_to_mb(_total_capacity_bytes), 4533 bytes_to_mb(_total_used_bytes), 4534 perc(_total_used_bytes, _total_capacity_bytes), 4535 bytes_to_mb(_total_prev_live_bytes), 4536 perc(_total_prev_live_bytes, _total_capacity_bytes), 4537 bytes_to_mb(_total_next_live_bytes), 4538 perc(_total_next_live_bytes, _total_capacity_bytes)); 4539 _out->cr(); 4540 }