1 /* 2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 32 #include "gc_implementation/g1/g1Log.hpp" 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 34 #include "gc_implementation/g1/g1RemSet.hpp" 35 #include "gc_implementation/g1/heapRegion.inline.hpp" 36 #include "gc_implementation/g1/heapRegionRemSet.hpp" 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 38 #include "gc_implementation/shared/vmGCOperations.hpp" 39 #include "gc_implementation/shared/gcTimer.hpp" 40 #include "gc_implementation/shared/gcTrace.hpp" 41 #include "gc_implementation/shared/gcTraceTime.hpp" 42 #include "memory/genOopClosures.inline.hpp" 43 #include "memory/referencePolicy.hpp" 44 #include "memory/resourceArea.hpp" 45 #include "oops/oop.inline.hpp" 46 #include "runtime/handles.inline.hpp" 47 #include "runtime/java.hpp" 48 #include "services/memTracker.hpp" 49 50 // Concurrent marking bit map wrapper 51 52 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) : 53 _bm((uintptr_t*)NULL,0), 54 _shifter(shifter) { 55 _bmStartWord = (HeapWord*)(rs.base()); 56 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 57 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 58 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 59 60 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC); 61 62 guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map"); 63 // For now we'll just commit all of the bit map up fromt. 64 // Later on we'll try to be more parsimonious with swap. 65 guarantee(_virtual_space.initialize(brs, brs.size()), 66 "couldn't reseve backing store for concurrent marking bit map"); 67 assert(_virtual_space.committed_size() == brs.size(), 68 "didn't reserve backing store for all of concurrent marking bit map?"); 69 _bm.set_map((uintptr_t*)_virtual_space.low()); 70 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 71 _bmWordSize, "inconsistency in bit map sizing"); 72 _bm.set_size(_bmWordSize >> _shifter); 73 } 74 75 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 76 HeapWord* limit) const { 77 // First we must round addr *up* to a possible object boundary. 78 addr = (HeapWord*)align_size_up((intptr_t)addr, 79 HeapWordSize << _shifter); 80 size_t addrOffset = heapWordToOffset(addr); 81 if (limit == NULL) { 82 limit = _bmStartWord + _bmWordSize; 83 } 84 size_t limitOffset = heapWordToOffset(limit); 85 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 86 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 87 assert(nextAddr >= addr, "get_next_one postcondition"); 88 assert(nextAddr == limit || isMarked(nextAddr), 89 "get_next_one postcondition"); 90 return nextAddr; 91 } 92 93 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 94 HeapWord* limit) const { 95 size_t addrOffset = heapWordToOffset(addr); 96 if (limit == NULL) { 97 limit = _bmStartWord + _bmWordSize; 98 } 99 size_t limitOffset = heapWordToOffset(limit); 100 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 101 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 102 assert(nextAddr >= addr, "get_next_one postcondition"); 103 assert(nextAddr == limit || !isMarked(nextAddr), 104 "get_next_one postcondition"); 105 return nextAddr; 106 } 107 108 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 109 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 110 return (int) (diff >> _shifter); 111 } 112 113 #ifndef PRODUCT 114 bool CMBitMapRO::covers(ReservedSpace rs) const { 115 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 116 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 117 "size inconsistency"); 118 return _bmStartWord == (HeapWord*)(rs.base()) && 119 _bmWordSize == rs.size()>>LogHeapWordSize; 120 } 121 #endif 122 123 void CMBitMap::clearAll() { 124 _bm.clear(); 125 return; 126 } 127 128 void CMBitMap::markRange(MemRegion mr) { 129 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 130 assert(!mr.is_empty(), "unexpected empty region"); 131 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 132 ((HeapWord *) mr.end())), 133 "markRange memory region end is not card aligned"); 134 // convert address range into offset range 135 _bm.at_put_range(heapWordToOffset(mr.start()), 136 heapWordToOffset(mr.end()), true); 137 } 138 139 void CMBitMap::clearRange(MemRegion mr) { 140 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 141 assert(!mr.is_empty(), "unexpected empty region"); 142 // convert address range into offset range 143 _bm.at_put_range(heapWordToOffset(mr.start()), 144 heapWordToOffset(mr.end()), false); 145 } 146 147 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 148 HeapWord* end_addr) { 149 HeapWord* start = getNextMarkedWordAddress(addr); 150 start = MIN2(start, end_addr); 151 HeapWord* end = getNextUnmarkedWordAddress(start); 152 end = MIN2(end, end_addr); 153 assert(start <= end, "Consistency check"); 154 MemRegion mr(start, end); 155 if (!mr.is_empty()) { 156 clearRange(mr); 157 } 158 return mr; 159 } 160 161 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 162 _base(NULL), _cm(cm) 163 #ifdef ASSERT 164 , _drain_in_progress(false) 165 , _drain_in_progress_yields(false) 166 #endif 167 {} 168 169 void CMMarkStack::allocate(size_t size) { 170 _base = NEW_C_HEAP_ARRAY(oop, size, mtGC); 171 if (_base == NULL) { 172 vm_exit_during_initialization("Failed to allocate CM region mark stack"); 173 } 174 _index = 0; 175 _capacity = (jint) size; 176 _saved_index = -1; 177 NOT_PRODUCT(_max_depth = 0); 178 } 179 180 CMMarkStack::~CMMarkStack() { 181 if (_base != NULL) { 182 FREE_C_HEAP_ARRAY(oop, _base, mtGC); 183 } 184 } 185 186 void CMMarkStack::par_push(oop ptr) { 187 while (true) { 188 if (isFull()) { 189 _overflow = true; 190 return; 191 } 192 // Otherwise... 193 jint index = _index; 194 jint next_index = index+1; 195 jint res = Atomic::cmpxchg(next_index, &_index, index); 196 if (res == index) { 197 _base[index] = ptr; 198 // Note that we don't maintain this atomically. We could, but it 199 // doesn't seem necessary. 200 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 201 return; 202 } 203 // Otherwise, we need to try again. 204 } 205 } 206 207 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 208 while (true) { 209 if (isFull()) { 210 _overflow = true; 211 return; 212 } 213 // Otherwise... 214 jint index = _index; 215 jint next_index = index + n; 216 if (next_index > _capacity) { 217 _overflow = true; 218 return; 219 } 220 jint res = Atomic::cmpxchg(next_index, &_index, index); 221 if (res == index) { 222 for (int i = 0; i < n; i++) { 223 int ind = index + i; 224 assert(ind < _capacity, "By overflow test above."); 225 _base[ind] = ptr_arr[i]; 226 } 227 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 228 return; 229 } 230 // Otherwise, we need to try again. 231 } 232 } 233 234 235 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 236 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 237 jint start = _index; 238 jint next_index = start + n; 239 if (next_index > _capacity) { 240 _overflow = true; 241 return; 242 } 243 // Otherwise. 244 _index = next_index; 245 for (int i = 0; i < n; i++) { 246 int ind = start + i; 247 assert(ind < _capacity, "By overflow test above."); 248 _base[ind] = ptr_arr[i]; 249 } 250 } 251 252 253 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 254 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 255 jint index = _index; 256 if (index == 0) { 257 *n = 0; 258 return false; 259 } else { 260 int k = MIN2(max, index); 261 jint new_ind = index - k; 262 for (int j = 0; j < k; j++) { 263 ptr_arr[j] = _base[new_ind + j]; 264 } 265 _index = new_ind; 266 *n = k; 267 return true; 268 } 269 } 270 271 template<class OopClosureClass> 272 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 273 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 274 || SafepointSynchronize::is_at_safepoint(), 275 "Drain recursion must be yield-safe."); 276 bool res = true; 277 debug_only(_drain_in_progress = true); 278 debug_only(_drain_in_progress_yields = yield_after); 279 while (!isEmpty()) { 280 oop newOop = pop(); 281 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 282 assert(newOop->is_oop(), "Expected an oop"); 283 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 284 "only grey objects on this stack"); 285 newOop->oop_iterate(cl); 286 if (yield_after && _cm->do_yield_check()) { 287 res = false; 288 break; 289 } 290 } 291 debug_only(_drain_in_progress = false); 292 return res; 293 } 294 295 void CMMarkStack::note_start_of_gc() { 296 assert(_saved_index == -1, 297 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 298 _saved_index = _index; 299 } 300 301 void CMMarkStack::note_end_of_gc() { 302 // This is intentionally a guarantee, instead of an assert. If we 303 // accidentally add something to the mark stack during GC, it 304 // will be a correctness issue so it's better if we crash. we'll 305 // only check this once per GC anyway, so it won't be a performance 306 // issue in any way. 307 guarantee(_saved_index == _index, 308 err_msg("saved index: %d index: %d", _saved_index, _index)); 309 _saved_index = -1; 310 } 311 312 void CMMarkStack::oops_do(OopClosure* f) { 313 assert(_saved_index == _index, 314 err_msg("saved index: %d index: %d", _saved_index, _index)); 315 for (int i = 0; i < _index; i += 1) { 316 f->do_oop(&_base[i]); 317 } 318 } 319 320 bool ConcurrentMark::not_yet_marked(oop obj) const { 321 return (_g1h->is_obj_ill(obj) 322 || (_g1h->is_in_permanent(obj) 323 && !nextMarkBitMap()->isMarked((HeapWord*)obj))); 324 } 325 326 CMRootRegions::CMRootRegions() : 327 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 328 _should_abort(false), _next_survivor(NULL) { } 329 330 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 331 _young_list = g1h->young_list(); 332 _cm = cm; 333 } 334 335 void CMRootRegions::prepare_for_scan() { 336 assert(!scan_in_progress(), "pre-condition"); 337 338 // Currently, only survivors can be root regions. 339 assert(_next_survivor == NULL, "pre-condition"); 340 _next_survivor = _young_list->first_survivor_region(); 341 _scan_in_progress = (_next_survivor != NULL); 342 _should_abort = false; 343 } 344 345 HeapRegion* CMRootRegions::claim_next() { 346 if (_should_abort) { 347 // If someone has set the should_abort flag, we return NULL to 348 // force the caller to bail out of their loop. 349 return NULL; 350 } 351 352 // Currently, only survivors can be root regions. 353 HeapRegion* res = _next_survivor; 354 if (res != NULL) { 355 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 356 // Read it again in case it changed while we were waiting for the lock. 357 res = _next_survivor; 358 if (res != NULL) { 359 if (res == _young_list->last_survivor_region()) { 360 // We just claimed the last survivor so store NULL to indicate 361 // that we're done. 362 _next_survivor = NULL; 363 } else { 364 _next_survivor = res->get_next_young_region(); 365 } 366 } else { 367 // Someone else claimed the last survivor while we were trying 368 // to take the lock so nothing else to do. 369 } 370 } 371 assert(res == NULL || res->is_survivor(), "post-condition"); 372 373 return res; 374 } 375 376 void CMRootRegions::scan_finished() { 377 assert(scan_in_progress(), "pre-condition"); 378 379 // Currently, only survivors can be root regions. 380 if (!_should_abort) { 381 assert(_next_survivor == NULL, "we should have claimed all survivors"); 382 } 383 _next_survivor = NULL; 384 385 { 386 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 387 _scan_in_progress = false; 388 RootRegionScan_lock->notify_all(); 389 } 390 } 391 392 bool CMRootRegions::wait_until_scan_finished() { 393 if (!scan_in_progress()) return false; 394 395 { 396 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 397 while (scan_in_progress()) { 398 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 399 } 400 } 401 return true; 402 } 403 404 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 405 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 406 #endif // _MSC_VER 407 408 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 409 return MAX2((n_par_threads + 2) / 4, 1U); 410 } 411 412 ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) : 413 _markBitMap1(rs, log2_intptr(MinObjAlignment)), 414 _markBitMap2(rs, log2_intptr(MinObjAlignment)), 415 _parallel_marking_threads(0), 416 _max_parallel_marking_threads(0), 417 _sleep_factor(0.0), 418 _marking_task_overhead(1.0), 419 _cleanup_sleep_factor(0.0), 420 _cleanup_task_overhead(1.0), 421 _cleanup_list("Cleanup List"), 422 _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/), 423 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 424 CardTableModRefBS::card_shift, 425 false /* in_resource_area*/), 426 427 _prevMarkBitMap(&_markBitMap1), 428 _nextMarkBitMap(&_markBitMap2), 429 430 _markStack(this), 431 // _finger set in set_non_marking_state 432 433 _max_task_num(MAX2((uint)ParallelGCThreads, 1U)), 434 // _active_tasks set in set_non_marking_state 435 // _tasks set inside the constructor 436 _task_queues(new CMTaskQueueSet((int) _max_task_num)), 437 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 438 439 _has_overflown(false), 440 _concurrent(false), 441 _has_aborted(false), 442 _restart_for_overflow(false), 443 _concurrent_marking_in_progress(false), 444 445 // _verbose_level set below 446 447 _init_times(), 448 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 449 _cleanup_times(), 450 _total_counting_time(0.0), 451 _total_rs_scrub_time(0.0), 452 453 _parallel_workers(NULL), 454 455 _count_card_bitmaps(NULL), 456 _count_marked_bytes(NULL) { 457 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 458 if (verbose_level < no_verbose) { 459 verbose_level = no_verbose; 460 } 461 if (verbose_level > high_verbose) { 462 verbose_level = high_verbose; 463 } 464 _verbose_level = verbose_level; 465 466 if (verbose_low()) { 467 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 468 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 469 } 470 471 _markStack.allocate(MarkStackSize); 472 473 // Create & start a ConcurrentMark thread. 474 _cmThread = new ConcurrentMarkThread(this); 475 assert(cmThread() != NULL, "CM Thread should have been created"); 476 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 477 478 _g1h = G1CollectedHeap::heap(); 479 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 480 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 481 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 482 483 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 484 satb_qs.set_buffer_size(G1SATBBufferSize); 485 486 _root_regions.init(_g1h, this); 487 488 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num, mtGC); 489 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num, mtGC); 490 491 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_task_num, mtGC); 492 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num, mtGC); 493 494 BitMap::idx_t card_bm_size = _card_bm.size(); 495 496 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 497 _active_tasks = _max_task_num; 498 for (int i = 0; i < (int) _max_task_num; ++i) { 499 CMTaskQueue* task_queue = new CMTaskQueue(); 500 task_queue->initialize(); 501 _task_queues->register_queue(i, task_queue); 502 503 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 504 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC); 505 506 _tasks[i] = new CMTask(i, this, 507 _count_marked_bytes[i], 508 &_count_card_bitmaps[i], 509 task_queue, _task_queues); 510 511 _accum_task_vtime[i] = 0.0; 512 } 513 514 // Calculate the card number for the bottom of the heap. Used 515 // in biasing indexes into the accounting card bitmaps. 516 _heap_bottom_card_num = 517 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 518 CardTableModRefBS::card_shift); 519 520 // Clear all the liveness counting data 521 clear_all_count_data(); 522 523 if (ConcGCThreads > ParallelGCThreads) { 524 vm_exit_during_initialization("Can't have more ConcGCThreads " 525 "than ParallelGCThreads."); 526 } 527 if (ParallelGCThreads == 0) { 528 // if we are not running with any parallel GC threads we will not 529 // spawn any marking threads either 530 _parallel_marking_threads = 0; 531 _max_parallel_marking_threads = 0; 532 _sleep_factor = 0.0; 533 _marking_task_overhead = 1.0; 534 } else { 535 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 536 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 537 // if both are set 538 _sleep_factor = 0.0; 539 _marking_task_overhead = 1.0; 540 } else if (G1MarkingOverheadPercent > 0) { 541 // We will calculate the number of parallel marking threads based 542 // on a target overhead with respect to the soft real-time goal 543 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 544 double overall_cm_overhead = 545 (double) MaxGCPauseMillis * marking_overhead / 546 (double) GCPauseIntervalMillis; 547 double cpu_ratio = 1.0 / (double) os::processor_count(); 548 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 549 double marking_task_overhead = 550 overall_cm_overhead / marking_thread_num * 551 (double) os::processor_count(); 552 double sleep_factor = 553 (1.0 - marking_task_overhead) / marking_task_overhead; 554 555 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num); 556 _sleep_factor = sleep_factor; 557 _marking_task_overhead = marking_task_overhead; 558 } else { 559 // Calculate the number of parallel marking threads by scaling 560 // the number of parallel GC threads. 561 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads); 562 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num); 563 _sleep_factor = 0.0; 564 _marking_task_overhead = 1.0; 565 } 566 567 assert(ConcGCThreads > 0, "Should have been set"); 568 _parallel_marking_threads = (uint) ConcGCThreads; 569 _max_parallel_marking_threads = _parallel_marking_threads; 570 571 if (parallel_marking_threads() > 1) { 572 _cleanup_task_overhead = 1.0; 573 } else { 574 _cleanup_task_overhead = marking_task_overhead(); 575 } 576 _cleanup_sleep_factor = 577 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 578 579 #if 0 580 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 581 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 582 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 583 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 584 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 585 #endif 586 587 guarantee(parallel_marking_threads() > 0, "peace of mind"); 588 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 589 _max_parallel_marking_threads, false, true); 590 if (_parallel_workers == NULL) { 591 vm_exit_during_initialization("Failed necessary allocation."); 592 } else { 593 _parallel_workers->initialize_workers(); 594 } 595 } 596 597 // so that the call below can read a sensible value 598 _heap_start = (HeapWord*) rs.base(); 599 set_non_marking_state(); 600 } 601 602 void ConcurrentMark::update_g1_committed(bool force) { 603 // If concurrent marking is not in progress, then we do not need to 604 // update _heap_end. 605 if (!concurrent_marking_in_progress() && !force) return; 606 607 MemRegion committed = _g1h->g1_committed(); 608 assert(committed.start() == _heap_start, "start shouldn't change"); 609 HeapWord* new_end = committed.end(); 610 if (new_end > _heap_end) { 611 // The heap has been expanded. 612 613 _heap_end = new_end; 614 } 615 // Notice that the heap can also shrink. However, this only happens 616 // during a Full GC (at least currently) and the entire marking 617 // phase will bail out and the task will not be restarted. So, let's 618 // do nothing. 619 } 620 621 void ConcurrentMark::reset() { 622 // Starting values for these two. This should be called in a STW 623 // phase. CM will be notified of any future g1_committed expansions 624 // will be at the end of evacuation pauses, when tasks are 625 // inactive. 626 MemRegion committed = _g1h->g1_committed(); 627 _heap_start = committed.start(); 628 _heap_end = committed.end(); 629 630 // Separated the asserts so that we know which one fires. 631 assert(_heap_start != NULL, "heap bounds should look ok"); 632 assert(_heap_end != NULL, "heap bounds should look ok"); 633 assert(_heap_start < _heap_end, "heap bounds should look ok"); 634 635 // Reset all the marking data structures and any necessary flags 636 reset_marking_state(); 637 638 if (verbose_low()) { 639 gclog_or_tty->print_cr("[global] resetting"); 640 } 641 642 // We do reset all of them, since different phases will use 643 // different number of active threads. So, it's easiest to have all 644 // of them ready. 645 for (int i = 0; i < (int) _max_task_num; ++i) { 646 _tasks[i]->reset(_nextMarkBitMap); 647 } 648 649 // we need this to make sure that the flag is on during the evac 650 // pause with initial mark piggy-backed 651 set_concurrent_marking_in_progress(); 652 } 653 654 655 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 656 _markStack.setEmpty(); 657 _markStack.clear_overflow(); 658 if (clear_overflow) { 659 clear_has_overflown(); 660 } else { 661 assert(has_overflown(), "pre-condition"); 662 } 663 _finger = _heap_start; 664 665 for (uint i = 0; i < _max_task_num; ++i) { 666 CMTaskQueue* queue = _task_queues->queue(i); 667 queue->set_empty(); 668 } 669 } 670 671 void ConcurrentMark::set_concurrency(uint active_tasks) { 672 assert(active_tasks <= _max_task_num, "we should not have more"); 673 674 _active_tasks = active_tasks; 675 // Need to update the three data structures below according to the 676 // number of active threads for this phase. 677 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 678 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 679 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 680 } 681 682 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 683 set_concurrency(active_tasks); 684 685 _concurrent = concurrent; 686 // We propagate this to all tasks, not just the active ones. 687 for (int i = 0; i < (int) _max_task_num; ++i) 688 _tasks[i]->set_concurrent(concurrent); 689 690 if (concurrent) { 691 set_concurrent_marking_in_progress(); 692 } else { 693 // We currently assume that the concurrent flag has been set to 694 // false before we start remark. At this point we should also be 695 // in a STW phase. 696 assert(!concurrent_marking_in_progress(), "invariant"); 697 assert(_finger == _heap_end, 698 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT, 699 _finger, _heap_end)); 700 update_g1_committed(true); 701 } 702 } 703 704 void ConcurrentMark::set_non_marking_state() { 705 // We set the global marking state to some default values when we're 706 // not doing marking. 707 reset_marking_state(); 708 _active_tasks = 0; 709 clear_concurrent_marking_in_progress(); 710 } 711 712 ConcurrentMark::~ConcurrentMark() { 713 // The ConcurrentMark instance is never freed. 714 ShouldNotReachHere(); 715 } 716 717 void ConcurrentMark::clearNextBitmap() { 718 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 719 G1CollectorPolicy* g1p = g1h->g1_policy(); 720 721 // Make sure that the concurrent mark thread looks to still be in 722 // the current cycle. 723 guarantee(cmThread()->during_cycle(), "invariant"); 724 725 // We are finishing up the current cycle by clearing the next 726 // marking bitmap and getting it ready for the next cycle. During 727 // this time no other cycle can start. So, let's make sure that this 728 // is the case. 729 guarantee(!g1h->mark_in_progress(), "invariant"); 730 731 // clear the mark bitmap (no grey objects to start with). 732 // We need to do this in chunks and offer to yield in between 733 // each chunk. 734 HeapWord* start = _nextMarkBitMap->startWord(); 735 HeapWord* end = _nextMarkBitMap->endWord(); 736 HeapWord* cur = start; 737 size_t chunkSize = M; 738 while (cur < end) { 739 HeapWord* next = cur + chunkSize; 740 if (next > end) { 741 next = end; 742 } 743 MemRegion mr(cur,next); 744 _nextMarkBitMap->clearRange(mr); 745 cur = next; 746 do_yield_check(); 747 748 // Repeat the asserts from above. We'll do them as asserts here to 749 // minimize their overhead on the product. However, we'll have 750 // them as guarantees at the beginning / end of the bitmap 751 // clearing to get some checking in the product. 752 assert(cmThread()->during_cycle(), "invariant"); 753 assert(!g1h->mark_in_progress(), "invariant"); 754 } 755 756 // Clear the liveness counting data 757 clear_all_count_data(); 758 759 // Repeat the asserts from above. 760 guarantee(cmThread()->during_cycle(), "invariant"); 761 guarantee(!g1h->mark_in_progress(), "invariant"); 762 } 763 764 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 765 public: 766 bool doHeapRegion(HeapRegion* r) { 767 if (!r->continuesHumongous()) { 768 r->note_start_of_marking(); 769 } 770 return false; 771 } 772 }; 773 774 void ConcurrentMark::checkpointRootsInitialPre() { 775 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 776 G1CollectorPolicy* g1p = g1h->g1_policy(); 777 778 _has_aborted = false; 779 780 #ifndef PRODUCT 781 if (G1PrintReachableAtInitialMark) { 782 print_reachable("at-cycle-start", 783 VerifyOption_G1UsePrevMarking, true /* all */); 784 } 785 #endif 786 787 // Initialise marking structures. This has to be done in a STW phase. 788 reset(); 789 790 // For each region note start of marking. 791 NoteStartOfMarkHRClosure startcl; 792 g1h->heap_region_iterate(&startcl); 793 } 794 795 796 void ConcurrentMark::checkpointRootsInitialPost() { 797 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 798 799 // If we force an overflow during remark, the remark operation will 800 // actually abort and we'll restart concurrent marking. If we always 801 // force an oveflow during remark we'll never actually complete the 802 // marking phase. So, we initilize this here, at the start of the 803 // cycle, so that at the remaining overflow number will decrease at 804 // every remark and we'll eventually not need to cause one. 805 force_overflow_stw()->init(); 806 807 // Start Concurrent Marking weak-reference discovery. 808 ReferenceProcessor* rp = g1h->ref_processor_cm(); 809 // enable ("weak") refs discovery 810 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 811 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 812 813 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 814 // This is the start of the marking cycle, we're expected all 815 // threads to have SATB queues with active set to false. 816 satb_mq_set.set_active_all_threads(true, /* new active value */ 817 false /* expected_active */); 818 819 _root_regions.prepare_for_scan(); 820 821 // update_g1_committed() will be called at the end of an evac pause 822 // when marking is on. So, it's also called at the end of the 823 // initial-mark pause to update the heap end, if the heap expands 824 // during it. No need to call it here. 825 } 826 827 /* 828 * Notice that in the next two methods, we actually leave the STS 829 * during the barrier sync and join it immediately afterwards. If we 830 * do not do this, the following deadlock can occur: one thread could 831 * be in the barrier sync code, waiting for the other thread to also 832 * sync up, whereas another one could be trying to yield, while also 833 * waiting for the other threads to sync up too. 834 * 835 * Note, however, that this code is also used during remark and in 836 * this case we should not attempt to leave / enter the STS, otherwise 837 * we'll either hit an asseert (debug / fastdebug) or deadlock 838 * (product). So we should only leave / enter the STS if we are 839 * operating concurrently. 840 * 841 * Because the thread that does the sync barrier has left the STS, it 842 * is possible to be suspended for a Full GC or an evacuation pause 843 * could occur. This is actually safe, since the entering the sync 844 * barrier is one of the last things do_marking_step() does, and it 845 * doesn't manipulate any data structures afterwards. 846 */ 847 848 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 849 if (verbose_low()) { 850 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 851 } 852 853 if (concurrent()) { 854 ConcurrentGCThread::stsLeave(); 855 } 856 _first_overflow_barrier_sync.enter(); 857 if (concurrent()) { 858 ConcurrentGCThread::stsJoin(); 859 } 860 // at this point everyone should have synced up and not be doing any 861 // more work 862 863 if (verbose_low()) { 864 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 865 } 866 867 // If we're executing the concurrent phase of marking, reset the marking 868 // state; otherwise the marking state is reset after reference processing, 869 // during the remark pause. 870 // If we reset here as a result of an overflow during the remark we will 871 // see assertion failures from any subsequent set_concurrency_and_phase() 872 // calls. 873 if (concurrent()) { 874 // let the task 0 do this 875 if (task_num == 0) { 876 // task 0 is responsible for clearing the global data structures 877 // We should be here because of an overflow. During STW we should 878 // not clear the overflow flag since we rely on it being true when 879 // we exit this method to abort the pause and restart concurent 880 // marking. 881 reset_marking_state(true /* clear_overflow */); 882 force_overflow()->update(); 883 884 if (G1Log::fine()) { 885 gclog_or_tty->date_stamp(PrintGCDateStamps); 886 gclog_or_tty->stamp(PrintGCTimeStamps); 887 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 888 } 889 } 890 } 891 892 // after this, each task should reset its own data structures then 893 // then go into the second barrier 894 } 895 896 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 897 if (verbose_low()) { 898 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 899 } 900 901 if (concurrent()) { 902 ConcurrentGCThread::stsLeave(); 903 } 904 _second_overflow_barrier_sync.enter(); 905 if (concurrent()) { 906 ConcurrentGCThread::stsJoin(); 907 } 908 // at this point everything should be re-initialized and ready to go 909 910 if (verbose_low()) { 911 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 912 } 913 } 914 915 #ifndef PRODUCT 916 void ForceOverflowSettings::init() { 917 _num_remaining = G1ConcMarkForceOverflow; 918 _force = false; 919 update(); 920 } 921 922 void ForceOverflowSettings::update() { 923 if (_num_remaining > 0) { 924 _num_remaining -= 1; 925 _force = true; 926 } else { 927 _force = false; 928 } 929 } 930 931 bool ForceOverflowSettings::should_force() { 932 if (_force) { 933 _force = false; 934 return true; 935 } else { 936 return false; 937 } 938 } 939 #endif // !PRODUCT 940 941 class CMConcurrentMarkingTask: public AbstractGangTask { 942 private: 943 ConcurrentMark* _cm; 944 ConcurrentMarkThread* _cmt; 945 946 public: 947 void work(uint worker_id) { 948 assert(Thread::current()->is_ConcurrentGC_thread(), 949 "this should only be done by a conc GC thread"); 950 ResourceMark rm; 951 952 double start_vtime = os::elapsedVTime(); 953 954 ConcurrentGCThread::stsJoin(); 955 956 assert(worker_id < _cm->active_tasks(), "invariant"); 957 CMTask* the_task = _cm->task(worker_id); 958 the_task->record_start_time(); 959 if (!_cm->has_aborted()) { 960 do { 961 double start_vtime_sec = os::elapsedVTime(); 962 double start_time_sec = os::elapsedTime(); 963 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 964 965 the_task->do_marking_step(mark_step_duration_ms, 966 true /* do_termination */, 967 false /* is_serial*/); 968 969 double end_time_sec = os::elapsedTime(); 970 double end_vtime_sec = os::elapsedVTime(); 971 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 972 double elapsed_time_sec = end_time_sec - start_time_sec; 973 _cm->clear_has_overflown(); 974 975 bool ret = _cm->do_yield_check(worker_id); 976 977 jlong sleep_time_ms; 978 if (!_cm->has_aborted() && the_task->has_aborted()) { 979 sleep_time_ms = 980 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 981 ConcurrentGCThread::stsLeave(); 982 os::sleep(Thread::current(), sleep_time_ms, false); 983 ConcurrentGCThread::stsJoin(); 984 } 985 double end_time2_sec = os::elapsedTime(); 986 double elapsed_time2_sec = end_time2_sec - start_time_sec; 987 988 #if 0 989 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 990 "overhead %1.4lf", 991 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 992 the_task->conc_overhead(os::elapsedTime()) * 8.0); 993 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 994 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 995 #endif 996 } while (!_cm->has_aborted() && the_task->has_aborted()); 997 } 998 the_task->record_end_time(); 999 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1000 1001 ConcurrentGCThread::stsLeave(); 1002 1003 double end_vtime = os::elapsedVTime(); 1004 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1005 } 1006 1007 CMConcurrentMarkingTask(ConcurrentMark* cm, 1008 ConcurrentMarkThread* cmt) : 1009 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1010 1011 ~CMConcurrentMarkingTask() { } 1012 }; 1013 1014 // Calculates the number of active workers for a concurrent 1015 // phase. 1016 uint ConcurrentMark::calc_parallel_marking_threads() { 1017 if (G1CollectedHeap::use_parallel_gc_threads()) { 1018 uint n_conc_workers = 0; 1019 if (!UseDynamicNumberOfGCThreads || 1020 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1021 !ForceDynamicNumberOfGCThreads)) { 1022 n_conc_workers = max_parallel_marking_threads(); 1023 } else { 1024 n_conc_workers = 1025 AdaptiveSizePolicy::calc_default_active_workers( 1026 max_parallel_marking_threads(), 1027 1, /* Minimum workers */ 1028 parallel_marking_threads(), 1029 Threads::number_of_non_daemon_threads()); 1030 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1031 // that scaling has already gone into "_max_parallel_marking_threads". 1032 } 1033 assert(n_conc_workers > 0, "Always need at least 1"); 1034 return n_conc_workers; 1035 } 1036 // If we are not running with any parallel GC threads we will not 1037 // have spawned any marking threads either. Hence the number of 1038 // concurrent workers should be 0. 1039 return 0; 1040 } 1041 1042 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1043 // Currently, only survivors can be root regions. 1044 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1045 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1046 1047 const uintx interval = PrefetchScanIntervalInBytes; 1048 HeapWord* curr = hr->bottom(); 1049 const HeapWord* end = hr->top(); 1050 while (curr < end) { 1051 Prefetch::read(curr, interval); 1052 oop obj = oop(curr); 1053 int size = obj->oop_iterate(&cl); 1054 assert(size == obj->size(), "sanity"); 1055 curr += size; 1056 } 1057 } 1058 1059 class CMRootRegionScanTask : public AbstractGangTask { 1060 private: 1061 ConcurrentMark* _cm; 1062 1063 public: 1064 CMRootRegionScanTask(ConcurrentMark* cm) : 1065 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1066 1067 void work(uint worker_id) { 1068 assert(Thread::current()->is_ConcurrentGC_thread(), 1069 "this should only be done by a conc GC thread"); 1070 1071 CMRootRegions* root_regions = _cm->root_regions(); 1072 HeapRegion* hr = root_regions->claim_next(); 1073 while (hr != NULL) { 1074 _cm->scanRootRegion(hr, worker_id); 1075 hr = root_regions->claim_next(); 1076 } 1077 } 1078 }; 1079 1080 void ConcurrentMark::scanRootRegions() { 1081 // scan_in_progress() will have been set to true only if there was 1082 // at least one root region to scan. So, if it's false, we 1083 // should not attempt to do any further work. 1084 if (root_regions()->scan_in_progress()) { 1085 _parallel_marking_threads = calc_parallel_marking_threads(); 1086 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1087 "Maximum number of marking threads exceeded"); 1088 uint active_workers = MAX2(1U, parallel_marking_threads()); 1089 1090 CMRootRegionScanTask task(this); 1091 if (use_parallel_marking_threads()) { 1092 _parallel_workers->set_active_workers((int) active_workers); 1093 _parallel_workers->run_task(&task); 1094 } else { 1095 task.work(0); 1096 } 1097 1098 // It's possible that has_aborted() is true here without actually 1099 // aborting the survivor scan earlier. This is OK as it's 1100 // mainly used for sanity checking. 1101 root_regions()->scan_finished(); 1102 } 1103 } 1104 1105 void ConcurrentMark::markFromRoots() { 1106 // we might be tempted to assert that: 1107 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1108 // "inconsistent argument?"); 1109 // However that wouldn't be right, because it's possible that 1110 // a safepoint is indeed in progress as a younger generation 1111 // stop-the-world GC happens even as we mark in this generation. 1112 1113 _restart_for_overflow = false; 1114 force_overflow_conc()->init(); 1115 1116 // _g1h has _n_par_threads 1117 _parallel_marking_threads = calc_parallel_marking_threads(); 1118 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1119 "Maximum number of marking threads exceeded"); 1120 1121 uint active_workers = MAX2(1U, parallel_marking_threads()); 1122 1123 // Parallel task terminator is set in "set_concurrency_and_phase()" 1124 set_concurrency_and_phase(active_workers, true /* concurrent */); 1125 1126 CMConcurrentMarkingTask markingTask(this, cmThread()); 1127 if (use_parallel_marking_threads()) { 1128 _parallel_workers->set_active_workers((int)active_workers); 1129 // Don't set _n_par_threads because it affects MT in proceess_strong_roots() 1130 // and the decisions on that MT processing is made elsewhere. 1131 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1132 _parallel_workers->run_task(&markingTask); 1133 } else { 1134 markingTask.work(0); 1135 } 1136 print_stats(); 1137 } 1138 1139 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1140 // world is stopped at this checkpoint 1141 assert(SafepointSynchronize::is_at_safepoint(), 1142 "world should be stopped"); 1143 1144 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1145 1146 // If a full collection has happened, we shouldn't do this. 1147 if (has_aborted()) { 1148 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1149 return; 1150 } 1151 1152 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1153 1154 if (VerifyDuringGC) { 1155 HandleMark hm; // handle scope 1156 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1157 Universe::heap()->prepare_for_verify(); 1158 Universe::verify(/* silent */ false, 1159 /* option */ VerifyOption_G1UsePrevMarking); 1160 } 1161 1162 G1CollectorPolicy* g1p = g1h->g1_policy(); 1163 g1p->record_concurrent_mark_remark_start(); 1164 1165 double start = os::elapsedTime(); 1166 1167 checkpointRootsFinalWork(); 1168 1169 double mark_work_end = os::elapsedTime(); 1170 1171 weakRefsWork(clear_all_soft_refs); 1172 1173 if (has_overflown()) { 1174 // Oops. We overflowed. Restart concurrent marking. 1175 _restart_for_overflow = true; 1176 if (G1TraceMarkStackOverflow) { 1177 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1178 } 1179 1180 // Verify the heap w.r.t. the previous marking bitmap. 1181 if (VerifyDuringGC) { 1182 HandleMark hm; // handle scope 1183 gclog_or_tty->print(" VerifyDuringGC:(overflow)"); 1184 Universe::heap()->prepare_for_verify(); 1185 Universe::verify(/* silent */ false, 1186 /* option */ VerifyOption_G1UsePrevMarking); 1187 } 1188 1189 // Clear the marking state because we will be restarting 1190 // marking due to overflowing the global mark stack. 1191 reset_marking_state(); 1192 } else { 1193 // Aggregate the per-task counting data that we have accumulated 1194 // while marking. 1195 aggregate_count_data(); 1196 1197 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1198 // We're done with marking. 1199 // This is the end of the marking cycle, we're expected all 1200 // threads to have SATB queues with active set to true. 1201 satb_mq_set.set_active_all_threads(false, /* new active value */ 1202 true /* expected_active */); 1203 1204 if (VerifyDuringGC) { 1205 HandleMark hm; // handle scope 1206 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1207 Universe::heap()->prepare_for_verify(); 1208 Universe::verify(/* silent */ false, 1209 /* option */ VerifyOption_G1UseNextMarking); 1210 } 1211 assert(!restart_for_overflow(), "sanity"); 1212 // Completely reset the marking state since marking completed 1213 set_non_marking_state(); 1214 } 1215 1216 #if VERIFY_OBJS_PROCESSED 1217 _scan_obj_cl.objs_processed = 0; 1218 ThreadLocalObjQueue::objs_enqueued = 0; 1219 #endif 1220 1221 // Statistics 1222 double now = os::elapsedTime(); 1223 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1224 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1225 _remark_times.add((now - start) * 1000.0); 1226 1227 g1p->record_concurrent_mark_remark_end(); 1228 1229 G1CMIsAliveClosure is_alive(g1h); 1230 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1231 } 1232 1233 // Base class of the closures that finalize and verify the 1234 // liveness counting data. 1235 class CMCountDataClosureBase: public HeapRegionClosure { 1236 protected: 1237 G1CollectedHeap* _g1h; 1238 ConcurrentMark* _cm; 1239 CardTableModRefBS* _ct_bs; 1240 1241 BitMap* _region_bm; 1242 BitMap* _card_bm; 1243 1244 // Takes a region that's not empty (i.e., it has at least one 1245 // live object in it and sets its corresponding bit on the region 1246 // bitmap to 1. If the region is "starts humongous" it will also set 1247 // to 1 the bits on the region bitmap that correspond to its 1248 // associated "continues humongous" regions. 1249 void set_bit_for_region(HeapRegion* hr) { 1250 assert(!hr->continuesHumongous(), "should have filtered those out"); 1251 1252 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1253 if (!hr->startsHumongous()) { 1254 // Normal (non-humongous) case: just set the bit. 1255 _region_bm->par_at_put(index, true); 1256 } else { 1257 // Starts humongous case: calculate how many regions are part of 1258 // this humongous region and then set the bit range. 1259 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1260 _region_bm->par_at_put_range(index, end_index, true); 1261 } 1262 } 1263 1264 public: 1265 CMCountDataClosureBase(G1CollectedHeap* g1h, 1266 BitMap* region_bm, BitMap* card_bm): 1267 _g1h(g1h), _cm(g1h->concurrent_mark()), 1268 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 1269 _region_bm(region_bm), _card_bm(card_bm) { } 1270 }; 1271 1272 // Closure that calculates the # live objects per region. Used 1273 // for verification purposes during the cleanup pause. 1274 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1275 CMBitMapRO* _bm; 1276 size_t _region_marked_bytes; 1277 1278 public: 1279 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1280 BitMap* region_bm, BitMap* card_bm) : 1281 CMCountDataClosureBase(g1h, region_bm, card_bm), 1282 _bm(bm), _region_marked_bytes(0) { } 1283 1284 bool doHeapRegion(HeapRegion* hr) { 1285 1286 if (hr->continuesHumongous()) { 1287 // We will ignore these here and process them when their 1288 // associated "starts humongous" region is processed (see 1289 // set_bit_for_heap_region()). Note that we cannot rely on their 1290 // associated "starts humongous" region to have their bit set to 1291 // 1 since, due to the region chunking in the parallel region 1292 // iteration, a "continues humongous" region might be visited 1293 // before its associated "starts humongous". 1294 return false; 1295 } 1296 1297 HeapWord* ntams = hr->next_top_at_mark_start(); 1298 HeapWord* start = hr->bottom(); 1299 1300 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1301 err_msg("Preconditions not met - " 1302 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT, 1303 start, ntams, hr->end())); 1304 1305 // Find the first marked object at or after "start". 1306 start = _bm->getNextMarkedWordAddress(start, ntams); 1307 1308 size_t marked_bytes = 0; 1309 1310 while (start < ntams) { 1311 oop obj = oop(start); 1312 int obj_sz = obj->size(); 1313 HeapWord* obj_end = start + obj_sz; 1314 1315 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1316 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1317 1318 // Note: if we're looking at the last region in heap - obj_end 1319 // could be actually just beyond the end of the heap; end_idx 1320 // will then correspond to a (non-existent) card that is also 1321 // just beyond the heap. 1322 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1323 // end of object is not card aligned - increment to cover 1324 // all the cards spanned by the object 1325 end_idx += 1; 1326 } 1327 1328 // Set the bits in the card BM for the cards spanned by this object. 1329 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1330 1331 // Add the size of this object to the number of marked bytes. 1332 marked_bytes += (size_t)obj_sz * HeapWordSize; 1333 1334 // Find the next marked object after this one. 1335 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1336 } 1337 1338 // Mark the allocated-since-marking portion... 1339 HeapWord* top = hr->top(); 1340 if (ntams < top) { 1341 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1342 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1343 1344 // Note: if we're looking at the last region in heap - top 1345 // could be actually just beyond the end of the heap; end_idx 1346 // will then correspond to a (non-existent) card that is also 1347 // just beyond the heap. 1348 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1349 // end of object is not card aligned - increment to cover 1350 // all the cards spanned by the object 1351 end_idx += 1; 1352 } 1353 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1354 1355 // This definitely means the region has live objects. 1356 set_bit_for_region(hr); 1357 } 1358 1359 // Update the live region bitmap. 1360 if (marked_bytes > 0) { 1361 set_bit_for_region(hr); 1362 } 1363 1364 // Set the marked bytes for the current region so that 1365 // it can be queried by a calling verificiation routine 1366 _region_marked_bytes = marked_bytes; 1367 1368 return false; 1369 } 1370 1371 size_t region_marked_bytes() const { return _region_marked_bytes; } 1372 }; 1373 1374 // Heap region closure used for verifying the counting data 1375 // that was accumulated concurrently and aggregated during 1376 // the remark pause. This closure is applied to the heap 1377 // regions during the STW cleanup pause. 1378 1379 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1380 G1CollectedHeap* _g1h; 1381 ConcurrentMark* _cm; 1382 CalcLiveObjectsClosure _calc_cl; 1383 BitMap* _region_bm; // Region BM to be verified 1384 BitMap* _card_bm; // Card BM to be verified 1385 bool _verbose; // verbose output? 1386 1387 BitMap* _exp_region_bm; // Expected Region BM values 1388 BitMap* _exp_card_bm; // Expected card BM values 1389 1390 int _failures; 1391 1392 public: 1393 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1394 BitMap* region_bm, 1395 BitMap* card_bm, 1396 BitMap* exp_region_bm, 1397 BitMap* exp_card_bm, 1398 bool verbose) : 1399 _g1h(g1h), _cm(g1h->concurrent_mark()), 1400 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1401 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1402 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1403 _failures(0) { } 1404 1405 int failures() const { return _failures; } 1406 1407 bool doHeapRegion(HeapRegion* hr) { 1408 if (hr->continuesHumongous()) { 1409 // We will ignore these here and process them when their 1410 // associated "starts humongous" region is processed (see 1411 // set_bit_for_heap_region()). Note that we cannot rely on their 1412 // associated "starts humongous" region to have their bit set to 1413 // 1 since, due to the region chunking in the parallel region 1414 // iteration, a "continues humongous" region might be visited 1415 // before its associated "starts humongous". 1416 return false; 1417 } 1418 1419 int failures = 0; 1420 1421 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1422 // this region and set the corresponding bits in the expected region 1423 // and card bitmaps. 1424 bool res = _calc_cl.doHeapRegion(hr); 1425 assert(res == false, "should be continuing"); 1426 1427 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1428 Mutex::_no_safepoint_check_flag); 1429 1430 // Verify the marked bytes for this region. 1431 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1432 size_t act_marked_bytes = hr->next_marked_bytes(); 1433 1434 // We're not OK if expected marked bytes > actual marked bytes. It means 1435 // we have missed accounting some objects during the actual marking. 1436 if (exp_marked_bytes > act_marked_bytes) { 1437 if (_verbose) { 1438 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1439 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1440 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1441 } 1442 failures += 1; 1443 } 1444 1445 // Verify the bit, for this region, in the actual and expected 1446 // (which was just calculated) region bit maps. 1447 // We're not OK if the bit in the calculated expected region 1448 // bitmap is set and the bit in the actual region bitmap is not. 1449 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1450 1451 bool expected = _exp_region_bm->at(index); 1452 bool actual = _region_bm->at(index); 1453 if (expected && !actual) { 1454 if (_verbose) { 1455 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1456 "expected: %s, actual: %s", 1457 hr->hrs_index(), 1458 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1459 } 1460 failures += 1; 1461 } 1462 1463 // Verify that the card bit maps for the cards spanned by the current 1464 // region match. We have an error if we have a set bit in the expected 1465 // bit map and the corresponding bit in the actual bitmap is not set. 1466 1467 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1468 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1469 1470 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1471 expected = _exp_card_bm->at(i); 1472 actual = _card_bm->at(i); 1473 1474 if (expected && !actual) { 1475 if (_verbose) { 1476 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1477 "expected: %s, actual: %s", 1478 hr->hrs_index(), i, 1479 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1480 } 1481 failures += 1; 1482 } 1483 } 1484 1485 if (failures > 0 && _verbose) { 1486 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1487 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1488 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(), 1489 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1490 } 1491 1492 _failures += failures; 1493 1494 // We could stop iteration over the heap when we 1495 // find the first violating region by returning true. 1496 return false; 1497 } 1498 }; 1499 1500 1501 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1502 protected: 1503 G1CollectedHeap* _g1h; 1504 ConcurrentMark* _cm; 1505 BitMap* _actual_region_bm; 1506 BitMap* _actual_card_bm; 1507 1508 uint _n_workers; 1509 1510 BitMap* _expected_region_bm; 1511 BitMap* _expected_card_bm; 1512 1513 int _failures; 1514 bool _verbose; 1515 1516 public: 1517 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1518 BitMap* region_bm, BitMap* card_bm, 1519 BitMap* expected_region_bm, BitMap* expected_card_bm) 1520 : AbstractGangTask("G1 verify final counting"), 1521 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1522 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1523 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1524 _failures(0), _verbose(false), 1525 _n_workers(0) { 1526 assert(VerifyDuringGC, "don't call this otherwise"); 1527 1528 // Use the value already set as the number of active threads 1529 // in the call to run_task(). 1530 if (G1CollectedHeap::use_parallel_gc_threads()) { 1531 assert( _g1h->workers()->active_workers() > 0, 1532 "Should have been previously set"); 1533 _n_workers = _g1h->workers()->active_workers(); 1534 } else { 1535 _n_workers = 1; 1536 } 1537 1538 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1539 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1540 1541 _verbose = _cm->verbose_medium(); 1542 } 1543 1544 void work(uint worker_id) { 1545 assert(worker_id < _n_workers, "invariant"); 1546 1547 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1548 _actual_region_bm, _actual_card_bm, 1549 _expected_region_bm, 1550 _expected_card_bm, 1551 _verbose); 1552 1553 if (G1CollectedHeap::use_parallel_gc_threads()) { 1554 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1555 worker_id, 1556 _n_workers, 1557 HeapRegion::VerifyCountClaimValue); 1558 } else { 1559 _g1h->heap_region_iterate(&verify_cl); 1560 } 1561 1562 Atomic::add(verify_cl.failures(), &_failures); 1563 } 1564 1565 int failures() const { return _failures; } 1566 }; 1567 1568 // Closure that finalizes the liveness counting data. 1569 // Used during the cleanup pause. 1570 // Sets the bits corresponding to the interval [NTAMS, top] 1571 // (which contains the implicitly live objects) in the 1572 // card liveness bitmap. Also sets the bit for each region, 1573 // containing live data, in the region liveness bitmap. 1574 1575 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1576 public: 1577 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1578 BitMap* region_bm, 1579 BitMap* card_bm) : 1580 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1581 1582 bool doHeapRegion(HeapRegion* hr) { 1583 1584 if (hr->continuesHumongous()) { 1585 // We will ignore these here and process them when their 1586 // associated "starts humongous" region is processed (see 1587 // set_bit_for_heap_region()). Note that we cannot rely on their 1588 // associated "starts humongous" region to have their bit set to 1589 // 1 since, due to the region chunking in the parallel region 1590 // iteration, a "continues humongous" region might be visited 1591 // before its associated "starts humongous". 1592 return false; 1593 } 1594 1595 HeapWord* ntams = hr->next_top_at_mark_start(); 1596 HeapWord* top = hr->top(); 1597 1598 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1599 1600 // Mark the allocated-since-marking portion... 1601 if (ntams < top) { 1602 // This definitely means the region has live objects. 1603 set_bit_for_region(hr); 1604 1605 // Now set the bits in the card bitmap for [ntams, top) 1606 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1607 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1608 1609 // Note: if we're looking at the last region in heap - top 1610 // could be actually just beyond the end of the heap; end_idx 1611 // will then correspond to a (non-existent) card that is also 1612 // just beyond the heap. 1613 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1614 // end of object is not card aligned - increment to cover 1615 // all the cards spanned by the object 1616 end_idx += 1; 1617 } 1618 1619 assert(end_idx <= _card_bm->size(), 1620 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1621 end_idx, _card_bm->size())); 1622 assert(start_idx < _card_bm->size(), 1623 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1624 start_idx, _card_bm->size())); 1625 1626 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1627 } 1628 1629 // Set the bit for the region if it contains live data 1630 if (hr->next_marked_bytes() > 0) { 1631 set_bit_for_region(hr); 1632 } 1633 1634 return false; 1635 } 1636 }; 1637 1638 class G1ParFinalCountTask: public AbstractGangTask { 1639 protected: 1640 G1CollectedHeap* _g1h; 1641 ConcurrentMark* _cm; 1642 BitMap* _actual_region_bm; 1643 BitMap* _actual_card_bm; 1644 1645 uint _n_workers; 1646 1647 public: 1648 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1649 : AbstractGangTask("G1 final counting"), 1650 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1651 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1652 _n_workers(0) { 1653 // Use the value already set as the number of active threads 1654 // in the call to run_task(). 1655 if (G1CollectedHeap::use_parallel_gc_threads()) { 1656 assert( _g1h->workers()->active_workers() > 0, 1657 "Should have been previously set"); 1658 _n_workers = _g1h->workers()->active_workers(); 1659 } else { 1660 _n_workers = 1; 1661 } 1662 } 1663 1664 void work(uint worker_id) { 1665 assert(worker_id < _n_workers, "invariant"); 1666 1667 FinalCountDataUpdateClosure final_update_cl(_g1h, 1668 _actual_region_bm, 1669 _actual_card_bm); 1670 1671 if (G1CollectedHeap::use_parallel_gc_threads()) { 1672 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1673 worker_id, 1674 _n_workers, 1675 HeapRegion::FinalCountClaimValue); 1676 } else { 1677 _g1h->heap_region_iterate(&final_update_cl); 1678 } 1679 } 1680 }; 1681 1682 class G1ParNoteEndTask; 1683 1684 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1685 G1CollectedHeap* _g1; 1686 int _worker_num; 1687 size_t _max_live_bytes; 1688 uint _regions_claimed; 1689 size_t _freed_bytes; 1690 FreeRegionList* _local_cleanup_list; 1691 OldRegionSet* _old_proxy_set; 1692 HumongousRegionSet* _humongous_proxy_set; 1693 HRRSCleanupTask* _hrrs_cleanup_task; 1694 double _claimed_region_time; 1695 double _max_region_time; 1696 1697 public: 1698 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1699 int worker_num, 1700 FreeRegionList* local_cleanup_list, 1701 OldRegionSet* old_proxy_set, 1702 HumongousRegionSet* humongous_proxy_set, 1703 HRRSCleanupTask* hrrs_cleanup_task) : 1704 _g1(g1), _worker_num(worker_num), 1705 _max_live_bytes(0), _regions_claimed(0), 1706 _freed_bytes(0), 1707 _claimed_region_time(0.0), _max_region_time(0.0), 1708 _local_cleanup_list(local_cleanup_list), 1709 _old_proxy_set(old_proxy_set), 1710 _humongous_proxy_set(humongous_proxy_set), 1711 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1712 1713 size_t freed_bytes() { return _freed_bytes; } 1714 1715 bool doHeapRegion(HeapRegion *hr) { 1716 if (hr->continuesHumongous()) { 1717 return false; 1718 } 1719 // We use a claim value of zero here because all regions 1720 // were claimed with value 1 in the FinalCount task. 1721 _g1->reset_gc_time_stamps(hr); 1722 double start = os::elapsedTime(); 1723 _regions_claimed++; 1724 hr->note_end_of_marking(); 1725 _max_live_bytes += hr->max_live_bytes(); 1726 _g1->free_region_if_empty(hr, 1727 &_freed_bytes, 1728 _local_cleanup_list, 1729 _old_proxy_set, 1730 _humongous_proxy_set, 1731 _hrrs_cleanup_task, 1732 true /* par */); 1733 double region_time = (os::elapsedTime() - start); 1734 _claimed_region_time += region_time; 1735 if (region_time > _max_region_time) { 1736 _max_region_time = region_time; 1737 } 1738 return false; 1739 } 1740 1741 size_t max_live_bytes() { return _max_live_bytes; } 1742 uint regions_claimed() { return _regions_claimed; } 1743 double claimed_region_time_sec() { return _claimed_region_time; } 1744 double max_region_time_sec() { return _max_region_time; } 1745 }; 1746 1747 class G1ParNoteEndTask: public AbstractGangTask { 1748 friend class G1NoteEndOfConcMarkClosure; 1749 1750 protected: 1751 G1CollectedHeap* _g1h; 1752 size_t _max_live_bytes; 1753 size_t _freed_bytes; 1754 FreeRegionList* _cleanup_list; 1755 1756 public: 1757 G1ParNoteEndTask(G1CollectedHeap* g1h, 1758 FreeRegionList* cleanup_list) : 1759 AbstractGangTask("G1 note end"), _g1h(g1h), 1760 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1761 1762 void work(uint worker_id) { 1763 double start = os::elapsedTime(); 1764 FreeRegionList local_cleanup_list("Local Cleanup List"); 1765 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set"); 1766 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1767 HRRSCleanupTask hrrs_cleanup_task; 1768 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list, 1769 &old_proxy_set, 1770 &humongous_proxy_set, 1771 &hrrs_cleanup_task); 1772 if (G1CollectedHeap::use_parallel_gc_threads()) { 1773 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1774 _g1h->workers()->active_workers(), 1775 HeapRegion::NoteEndClaimValue); 1776 } else { 1777 _g1h->heap_region_iterate(&g1_note_end); 1778 } 1779 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1780 1781 // Now update the lists 1782 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1783 NULL /* free_list */, 1784 &old_proxy_set, 1785 &humongous_proxy_set, 1786 true /* par */); 1787 { 1788 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1789 _max_live_bytes += g1_note_end.max_live_bytes(); 1790 _freed_bytes += g1_note_end.freed_bytes(); 1791 1792 // If we iterate over the global cleanup list at the end of 1793 // cleanup to do this printing we will not guarantee to only 1794 // generate output for the newly-reclaimed regions (the list 1795 // might not be empty at the beginning of cleanup; we might 1796 // still be working on its previous contents). So we do the 1797 // printing here, before we append the new regions to the global 1798 // cleanup list. 1799 1800 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1801 if (hr_printer->is_active()) { 1802 HeapRegionLinkedListIterator iter(&local_cleanup_list); 1803 while (iter.more_available()) { 1804 HeapRegion* hr = iter.get_next(); 1805 hr_printer->cleanup(hr); 1806 } 1807 } 1808 1809 _cleanup_list->add_as_tail(&local_cleanup_list); 1810 assert(local_cleanup_list.is_empty(), "post-condition"); 1811 1812 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1813 } 1814 } 1815 size_t max_live_bytes() { return _max_live_bytes; } 1816 size_t freed_bytes() { return _freed_bytes; } 1817 }; 1818 1819 class G1ParScrubRemSetTask: public AbstractGangTask { 1820 protected: 1821 G1RemSet* _g1rs; 1822 BitMap* _region_bm; 1823 BitMap* _card_bm; 1824 public: 1825 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1826 BitMap* region_bm, BitMap* card_bm) : 1827 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1828 _region_bm(region_bm), _card_bm(card_bm) { } 1829 1830 void work(uint worker_id) { 1831 if (G1CollectedHeap::use_parallel_gc_threads()) { 1832 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1833 HeapRegion::ScrubRemSetClaimValue); 1834 } else { 1835 _g1rs->scrub(_region_bm, _card_bm); 1836 } 1837 } 1838 1839 }; 1840 1841 void ConcurrentMark::cleanup() { 1842 // world is stopped at this checkpoint 1843 assert(SafepointSynchronize::is_at_safepoint(), 1844 "world should be stopped"); 1845 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1846 1847 // If a full collection has happened, we shouldn't do this. 1848 if (has_aborted()) { 1849 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1850 return; 1851 } 1852 1853 HRSPhaseSetter x(HRSPhaseCleanup); 1854 g1h->verify_region_sets_optional(); 1855 1856 if (VerifyDuringGC) { 1857 HandleMark hm; // handle scope 1858 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1859 Universe::heap()->prepare_for_verify(); 1860 Universe::verify(/* silent */ false, 1861 /* option */ VerifyOption_G1UsePrevMarking); 1862 } 1863 1864 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1865 g1p->record_concurrent_mark_cleanup_start(); 1866 1867 double start = os::elapsedTime(); 1868 1869 HeapRegionRemSet::reset_for_cleanup_tasks(); 1870 1871 uint n_workers; 1872 1873 // Do counting once more with the world stopped for good measure. 1874 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1875 1876 if (G1CollectedHeap::use_parallel_gc_threads()) { 1877 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 1878 "sanity check"); 1879 1880 g1h->set_par_threads(); 1881 n_workers = g1h->n_par_threads(); 1882 assert(g1h->n_par_threads() == n_workers, 1883 "Should not have been reset"); 1884 g1h->workers()->run_task(&g1_par_count_task); 1885 // Done with the parallel phase so reset to 0. 1886 g1h->set_par_threads(0); 1887 1888 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 1889 "sanity check"); 1890 } else { 1891 n_workers = 1; 1892 g1_par_count_task.work(0); 1893 } 1894 1895 if (VerifyDuringGC) { 1896 // Verify that the counting data accumulated during marking matches 1897 // that calculated by walking the marking bitmap. 1898 1899 // Bitmaps to hold expected values 1900 BitMap expected_region_bm(_region_bm.size(), false); 1901 BitMap expected_card_bm(_card_bm.size(), false); 1902 1903 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1904 &_region_bm, 1905 &_card_bm, 1906 &expected_region_bm, 1907 &expected_card_bm); 1908 1909 if (G1CollectedHeap::use_parallel_gc_threads()) { 1910 g1h->set_par_threads((int)n_workers); 1911 g1h->workers()->run_task(&g1_par_verify_task); 1912 // Done with the parallel phase so reset to 0. 1913 g1h->set_par_threads(0); 1914 1915 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 1916 "sanity check"); 1917 } else { 1918 g1_par_verify_task.work(0); 1919 } 1920 1921 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1922 } 1923 1924 size_t start_used_bytes = g1h->used(); 1925 g1h->set_marking_complete(); 1926 1927 double count_end = os::elapsedTime(); 1928 double this_final_counting_time = (count_end - start); 1929 _total_counting_time += this_final_counting_time; 1930 1931 if (G1PrintRegionLivenessInfo) { 1932 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1933 _g1h->heap_region_iterate(&cl); 1934 } 1935 1936 // Install newly created mark bitMap as "prev". 1937 swapMarkBitMaps(); 1938 1939 g1h->reset_gc_time_stamp(); 1940 1941 // Note end of marking in all heap regions. 1942 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 1943 if (G1CollectedHeap::use_parallel_gc_threads()) { 1944 g1h->set_par_threads((int)n_workers); 1945 g1h->workers()->run_task(&g1_par_note_end_task); 1946 g1h->set_par_threads(0); 1947 1948 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 1949 "sanity check"); 1950 } else { 1951 g1_par_note_end_task.work(0); 1952 } 1953 g1h->check_gc_time_stamps(); 1954 1955 if (!cleanup_list_is_empty()) { 1956 // The cleanup list is not empty, so we'll have to process it 1957 // concurrently. Notify anyone else that might be wanting free 1958 // regions that there will be more free regions coming soon. 1959 g1h->set_free_regions_coming(); 1960 } 1961 1962 // call below, since it affects the metric by which we sort the heap 1963 // regions. 1964 if (G1ScrubRemSets) { 1965 double rs_scrub_start = os::elapsedTime(); 1966 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1967 if (G1CollectedHeap::use_parallel_gc_threads()) { 1968 g1h->set_par_threads((int)n_workers); 1969 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1970 g1h->set_par_threads(0); 1971 1972 assert(g1h->check_heap_region_claim_values( 1973 HeapRegion::ScrubRemSetClaimValue), 1974 "sanity check"); 1975 } else { 1976 g1_par_scrub_rs_task.work(0); 1977 } 1978 1979 double rs_scrub_end = os::elapsedTime(); 1980 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1981 _total_rs_scrub_time += this_rs_scrub_time; 1982 } 1983 1984 // this will also free any regions totally full of garbage objects, 1985 // and sort the regions. 1986 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 1987 1988 // Statistics. 1989 double end = os::elapsedTime(); 1990 _cleanup_times.add((end - start) * 1000.0); 1991 1992 if (G1Log::fine()) { 1993 g1h->print_size_transition(gclog_or_tty, 1994 start_used_bytes, 1995 g1h->used(), 1996 g1h->capacity()); 1997 } 1998 1999 // Clean up will have freed any regions completely full of garbage. 2000 // Update the soft reference policy with the new heap occupancy. 2001 Universe::update_heap_info_at_gc(); 2002 2003 // We need to make this be a "collection" so any collection pause that 2004 // races with it goes around and waits for completeCleanup to finish. 2005 g1h->increment_total_collections(); 2006 2007 // We reclaimed old regions so we should calculate the sizes to make 2008 // sure we update the old gen/space data. 2009 g1h->g1mm()->update_sizes(); 2010 2011 if (VerifyDuringGC) { 2012 HandleMark hm; // handle scope 2013 gclog_or_tty->print(" VerifyDuringGC:(after)"); 2014 Universe::heap()->prepare_for_verify(); 2015 Universe::verify(/* silent */ false, 2016 /* option */ VerifyOption_G1UsePrevMarking); 2017 } 2018 2019 g1h->verify_region_sets_optional(); 2020 g1h->trace_heap_after_concurrent_cycle(); 2021 } 2022 2023 void ConcurrentMark::completeCleanup() { 2024 if (has_aborted()) return; 2025 2026 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2027 2028 _cleanup_list.verify_optional(); 2029 FreeRegionList tmp_free_list("Tmp Free List"); 2030 2031 if (G1ConcRegionFreeingVerbose) { 2032 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2033 "cleanup list has %u entries", 2034 _cleanup_list.length()); 2035 } 2036 2037 // Noone else should be accessing the _cleanup_list at this point, 2038 // so it's not necessary to take any locks 2039 while (!_cleanup_list.is_empty()) { 2040 HeapRegion* hr = _cleanup_list.remove_head(); 2041 assert(hr != NULL, "the list was not empty"); 2042 hr->par_clear(); 2043 tmp_free_list.add_as_tail(hr); 2044 2045 // Instead of adding one region at a time to the secondary_free_list, 2046 // we accumulate them in the local list and move them a few at a 2047 // time. This also cuts down on the number of notify_all() calls 2048 // we do during this process. We'll also append the local list when 2049 // _cleanup_list is empty (which means we just removed the last 2050 // region from the _cleanup_list). 2051 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2052 _cleanup_list.is_empty()) { 2053 if (G1ConcRegionFreeingVerbose) { 2054 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2055 "appending %u entries to the secondary_free_list, " 2056 "cleanup list still has %u entries", 2057 tmp_free_list.length(), 2058 _cleanup_list.length()); 2059 } 2060 2061 { 2062 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2063 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 2064 SecondaryFreeList_lock->notify_all(); 2065 } 2066 2067 if (G1StressConcRegionFreeing) { 2068 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2069 os::sleep(Thread::current(), (jlong) 1, false); 2070 } 2071 } 2072 } 2073 } 2074 assert(tmp_free_list.is_empty(), "post-condition"); 2075 } 2076 2077 // Supporting Object and Oop closures for reference discovery 2078 // and processing in during marking 2079 2080 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2081 HeapWord* addr = (HeapWord*)obj; 2082 return addr != NULL && 2083 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2084 } 2085 2086 // 'Keep Alive' oop closure used by both serial parallel reference processing. 2087 // Uses the CMTask associated with a worker thread (for serial reference 2088 // processing the CMTask for worker 0 is used) to preserve (mark) and 2089 // trace referent objects. 2090 // 2091 // Using the CMTask and embedded local queues avoids having the worker 2092 // threads operating on the global mark stack. This reduces the risk 2093 // of overflowing the stack - which we would rather avoid at this late 2094 // state. Also using the tasks' local queues removes the potential 2095 // of the workers interfering with each other that could occur if 2096 // operating on the global stack. 2097 2098 class G1CMKeepAliveAndDrainClosure: public OopClosure { 2099 ConcurrentMark* _cm; 2100 CMTask* _task; 2101 int _ref_counter_limit; 2102 int _ref_counter; 2103 bool _is_serial; 2104 public: 2105 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2106 _cm(cm), _task(task), _is_serial(is_serial), 2107 _ref_counter_limit(G1RefProcDrainInterval) { 2108 assert(_ref_counter_limit > 0, "sanity"); 2109 assert(!_is_serial || _task->task_id() == 0, "only task 0 for serial code"); 2110 _ref_counter = _ref_counter_limit; 2111 } 2112 2113 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2114 virtual void do_oop( oop* p) { do_oop_work(p); } 2115 2116 template <class T> void do_oop_work(T* p) { 2117 if (!_cm->has_overflown()) { 2118 oop obj = oopDesc::load_decode_heap_oop(p); 2119 if (_cm->verbose_high()) { 2120 gclog_or_tty->print_cr("\t[%d] we're looking at location " 2121 "*"PTR_FORMAT" = "PTR_FORMAT, 2122 _task->task_id(), p, (void*) obj); 2123 } 2124 2125 _task->deal_with_reference(obj); 2126 _ref_counter--; 2127 2128 if (_ref_counter == 0) { 2129 // We have dealt with _ref_counter_limit references, pushing them 2130 // and objects reachable from them on to the local stack (and 2131 // possibly the global stack). Call CMTask::do_marking_step() to 2132 // process these entries. 2133 // 2134 // We call CMTask::do_marking_step() in a loop, which we'll exit if 2135 // there's nothing more to do (i.e. we're done with the entries that 2136 // were pushed as a result of the CMTask::deal_with_reference() calls 2137 // above) or we overflow. 2138 // 2139 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2140 // flag while there may still be some work to do. (See the comment at 2141 // the beginning of CMTask::do_marking_step() for those conditions - 2142 // one of which is reaching the specified time target.) It is only 2143 // when CMTask::do_marking_step() returns without setting the 2144 // has_aborted() flag that the marking step has completed. 2145 do { 2146 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2147 _task->do_marking_step(mark_step_duration_ms, 2148 false /* do_termination */, 2149 _is_serial); 2150 } while (_task->has_aborted() && !_cm->has_overflown()); 2151 _ref_counter = _ref_counter_limit; 2152 } 2153 } else { 2154 if (_cm->verbose_high()) { 2155 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id()); 2156 } 2157 } 2158 } 2159 }; 2160 2161 // 'Drain' oop closure used by both serial and parallel reference processing. 2162 // Uses the CMTask associated with a given worker thread (for serial 2163 // reference processing the CMtask for worker 0 is used). Calls the 2164 // do_marking_step routine, with an unbelievably large timeout value, 2165 // to drain the marking data structures of the remaining entries 2166 // added by the 'keep alive' oop closure above. 2167 2168 class G1CMDrainMarkingStackClosure: public VoidClosure { 2169 ConcurrentMark* _cm; 2170 CMTask* _task; 2171 bool _is_serial; 2172 public: 2173 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2174 _cm(cm), _task(task), _is_serial(is_serial) { 2175 assert(!_is_serial || _task->task_id() == 0, "only task 0 for serial code"); 2176 } 2177 2178 void do_void() { 2179 do { 2180 if (_cm->verbose_high()) { 2181 gclog_or_tty->print_cr("\t[%d] Drain: Calling do_marking_step - serial: %s", 2182 _task->task_id(), BOOL_TO_STR(_is_serial)); 2183 } 2184 2185 // We call CMTask::do_marking_step() to completely drain the local 2186 // and global marking stacks of entries pushed by the 'keep alive' 2187 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2188 // 2189 // CMTask::do_marking_step() is called in a loop, which we'll exit 2190 // if there's nothing more to do (i.e. we'completely drained the 2191 // entries that were pushed as a a result of applying the 'keep alive' 2192 // closure to the entries on the discovered ref lists) or we overflow 2193 // the global marking stack. 2194 // 2195 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2196 // flag while there may still be some work to do. (See the comment at 2197 // the beginning of CMTask::do_marking_step() for those conditions - 2198 // one of which is reaching the specified time target.) It is only 2199 // when CMTask::do_marking_step() returns without setting the 2200 // has_aborted() flag that the marking step has completed. 2201 2202 _task->do_marking_step(1000000000.0 /* something very large */, 2203 true /* do_termination */, 2204 _is_serial); 2205 } while (_task->has_aborted() && !_cm->has_overflown()); 2206 } 2207 }; 2208 2209 // Implementation of AbstractRefProcTaskExecutor for parallel 2210 // reference processing at the end of G1 concurrent marking 2211 2212 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2213 private: 2214 G1CollectedHeap* _g1h; 2215 ConcurrentMark* _cm; 2216 WorkGang* _workers; 2217 int _active_workers; 2218 2219 public: 2220 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2221 ConcurrentMark* cm, 2222 WorkGang* workers, 2223 int n_workers) : 2224 _g1h(g1h), _cm(cm), 2225 _workers(workers), _active_workers(n_workers) { } 2226 2227 // Executes the given task using concurrent marking worker threads. 2228 virtual void execute(ProcessTask& task); 2229 virtual void execute(EnqueueTask& task); 2230 }; 2231 2232 class G1CMRefProcTaskProxy: public AbstractGangTask { 2233 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2234 ProcessTask& _proc_task; 2235 G1CollectedHeap* _g1h; 2236 ConcurrentMark* _cm; 2237 2238 public: 2239 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2240 G1CollectedHeap* g1h, 2241 ConcurrentMark* cm) : 2242 AbstractGangTask("Process reference objects in parallel"), 2243 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2244 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2245 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2246 } 2247 2248 virtual void work(uint worker_id) { 2249 CMTask* task = _cm->task(worker_id); 2250 G1CMIsAliveClosure g1_is_alive(_g1h); 2251 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2252 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2253 2254 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2255 } 2256 }; 2257 2258 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2259 assert(_workers != NULL, "Need parallel worker threads."); 2260 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2261 2262 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2263 2264 // We need to reset the concurrency level before each 2265 // proxy task execution, so that the termination protocol 2266 // and overflow handling in CMTask::do_marking_step() knows 2267 // how many workers to wait for. 2268 _cm->set_concurrency(_active_workers); 2269 _g1h->set_par_threads(_active_workers); 2270 _workers->run_task(&proc_task_proxy); 2271 _g1h->set_par_threads(0); 2272 } 2273 2274 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2275 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2276 EnqueueTask& _enq_task; 2277 2278 public: 2279 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2280 AbstractGangTask("Enqueue reference objects in parallel"), 2281 _enq_task(enq_task) { } 2282 2283 virtual void work(uint worker_id) { 2284 _enq_task.work(worker_id); 2285 } 2286 }; 2287 2288 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2289 assert(_workers != NULL, "Need parallel worker threads."); 2290 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2291 2292 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2293 2294 // Not strictly necessary but... 2295 // 2296 // We need to reset the concurrency level before each 2297 // proxy task execution, so that the termination protocol 2298 // and overflow handling in CMTask::do_marking_step() knows 2299 // how many workers to wait for. 2300 _cm->set_concurrency(_active_workers); 2301 _g1h->set_par_threads(_active_workers); 2302 _workers->run_task(&enq_task_proxy); 2303 _g1h->set_par_threads(0); 2304 } 2305 2306 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2307 if (has_overflown()) { 2308 // Skip processing the discovered references if we have 2309 // overflown the global marking stack. Reference objects 2310 // only get discovered once so it is OK to not 2311 // de-populate the discovered reference lists. We could have, 2312 // but the only benefit would be that, when marking restarts, 2313 // less reference objects are discovered. 2314 return; 2315 } 2316 2317 ResourceMark rm; 2318 HandleMark hm; 2319 2320 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2321 2322 // Is alive closure. 2323 G1CMIsAliveClosure g1_is_alive(g1h); 2324 2325 // Inner scope to exclude the cleaning of the string and symbol 2326 // tables from the displayed time. 2327 { 2328 if (G1Log::finer()) { 2329 gclog_or_tty->put(' '); 2330 } 2331 GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm()); 2332 2333 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2334 2335 // See the comment in G1CollectedHeap::ref_processing_init() 2336 // about how reference processing currently works in G1. 2337 2338 // Set the soft reference policy 2339 rp->setup_policy(clear_all_soft_refs); 2340 assert(_markStack.isEmpty(), "mark stack should be empty"); 2341 2342 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2343 // in serial reference processing. Note these closures are also 2344 // used for serially processing (by the the current thread) the 2345 // JNI references during parallel reference processing. 2346 // 2347 // These closures do not need to synchronize with the worker 2348 // threads involved in parallel reference processing as these 2349 // instances are executed serially by the current thread (e.g. 2350 // reference processing is not multi-threaded and is thus 2351 // performed by the current thread instead of a gang worker). 2352 // 2353 // The gang tasks involved in parallel reference procssing create 2354 // their own instances of these closures, which do their own 2355 // synchronization among themselves. 2356 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2357 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2358 2359 // We need at least one active thread. If reference processing 2360 // is not multi-threaded we use the current (VMThread) thread, 2361 // otherwise we use the work gang from the G1CollectedHeap and 2362 // we utilize all the worker threads we can. 2363 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL; 2364 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2365 active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U); 2366 2367 // Parallel processing task executor. 2368 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2369 g1h->workers(), active_workers); 2370 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2371 2372 ReferenceProcessorStats stats; 2373 2374 // Set the concurrency level. The phase was already set prior to 2375 // executing the remark task. 2376 set_concurrency(active_workers); 2377 2378 // Set the degree of MT processing here. If the discovery was done MT, 2379 // the number of threads involved during discovery could differ from 2380 // the number of active workers. This is OK as long as the discovered 2381 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2382 rp->set_active_mt_degree(active_workers); 2383 2384 // Process the weak references. 2385 stats = rp->process_discovered_references(&g1_is_alive, 2386 &g1_keep_alive, 2387 &g1_drain_mark_stack, 2388 executor, 2389 g1h->gc_timer_cm()); 2390 2391 // The do_oop work routines of the keep_alive and drain_marking_stack 2392 // oop closures will set the has_overflown flag if we overflow the 2393 // global marking stack. 2394 2395 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2396 2397 assert(_markStack.overflow() || _markStack.isEmpty(), 2398 "mark stack should be empty (unless it overflowed)"); 2399 2400 if (_markStack.overflow()) { 2401 // This should have been done already when we tried to push an 2402 // entry on to the global mark stack. But let's do it again. 2403 set_has_overflown(); 2404 } 2405 2406 assert(rp->num_q() == active_workers, "why not"); 2407 2408 rp->enqueue_discovered_references(executor); 2409 2410 rp->verify_no_references_recorded(); 2411 assert(!rp->discovery_enabled(), "Post condition"); 2412 } 2413 2414 // Now clean up stale oops in StringTable 2415 StringTable::unlink(&g1_is_alive); 2416 // Clean up unreferenced symbols in symbol table. 2417 SymbolTable::unlink(); 2418 } 2419 2420 void ConcurrentMark::swapMarkBitMaps() { 2421 CMBitMapRO* temp = _prevMarkBitMap; 2422 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2423 _nextMarkBitMap = (CMBitMap*) temp; 2424 } 2425 2426 class CMRemarkTask: public AbstractGangTask { 2427 private: 2428 ConcurrentMark* _cm; 2429 bool _is_serial; 2430 public: 2431 void work(uint worker_id) { 2432 // Since all available tasks are actually started, we should 2433 // only proceed if we're supposed to be actived. 2434 if (worker_id < _cm->active_tasks()) { 2435 CMTask* task = _cm->task(worker_id); 2436 task->record_start_time(); 2437 do { 2438 task->do_marking_step(1000000000.0 /* something very large */, 2439 true /* do_termination */, 2440 _is_serial); 2441 } while (task->has_aborted() && !_cm->has_overflown()); 2442 // If we overflow, then we do not want to restart. We instead 2443 // want to abort remark and do concurrent marking again. 2444 task->record_end_time(); 2445 } 2446 } 2447 2448 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) : 2449 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) { 2450 _cm->terminator()->reset_for_reuse(active_workers); 2451 } 2452 }; 2453 2454 void ConcurrentMark::checkpointRootsFinalWork() { 2455 ResourceMark rm; 2456 HandleMark hm; 2457 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2458 2459 g1h->ensure_parsability(false); 2460 2461 if (G1CollectedHeap::use_parallel_gc_threads()) { 2462 G1CollectedHeap::StrongRootsScope srs(g1h); 2463 // this is remark, so we'll use up all active threads 2464 uint active_workers = g1h->workers()->active_workers(); 2465 if (active_workers == 0) { 2466 assert(active_workers > 0, "Should have been set earlier"); 2467 active_workers = (uint) ParallelGCThreads; 2468 g1h->workers()->set_active_workers(active_workers); 2469 } 2470 set_concurrency_and_phase(active_workers, false /* concurrent */); 2471 // Leave _parallel_marking_threads at it's 2472 // value originally calculated in the ConcurrentMark 2473 // constructor and pass values of the active workers 2474 // through the gang in the task. 2475 2476 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */); 2477 // We will start all available threads, even if we decide that the 2478 // active_workers will be fewer. The extra ones will just bail out 2479 // immediately. 2480 g1h->set_par_threads(active_workers); 2481 g1h->workers()->run_task(&remarkTask); 2482 g1h->set_par_threads(0); 2483 } else { 2484 G1CollectedHeap::StrongRootsScope srs(g1h); 2485 uint active_workers = 1; 2486 set_concurrency_and_phase(active_workers, false /* concurrent */); 2487 2488 // Note - if there's no work gang then the VMThread will be 2489 // the thread to execute the remark - serially. We have 2490 // to pass true for the is_serial parameter so that 2491 // CMTask::do_marking_step() doesn't enter the sync 2492 // barriers in the event of an overflow. Doing so will 2493 // cause an assert that the current thread is not a 2494 // concurrent GC thread. 2495 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/); 2496 remarkTask.work(0); 2497 } 2498 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2499 guarantee(has_overflown() || 2500 satb_mq_set.completed_buffers_num() == 0, 2501 err_msg("Invariant: has_overflown = %s, num buffers = %d", 2502 BOOL_TO_STR(has_overflown()), 2503 satb_mq_set.completed_buffers_num())); 2504 2505 print_stats(); 2506 2507 #if VERIFY_OBJS_PROCESSED 2508 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2509 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2510 _scan_obj_cl.objs_processed, 2511 ThreadLocalObjQueue::objs_enqueued); 2512 guarantee(_scan_obj_cl.objs_processed == 2513 ThreadLocalObjQueue::objs_enqueued, 2514 "Different number of objs processed and enqueued."); 2515 } 2516 #endif 2517 } 2518 2519 #ifndef PRODUCT 2520 2521 class PrintReachableOopClosure: public OopClosure { 2522 private: 2523 G1CollectedHeap* _g1h; 2524 outputStream* _out; 2525 VerifyOption _vo; 2526 bool _all; 2527 2528 public: 2529 PrintReachableOopClosure(outputStream* out, 2530 VerifyOption vo, 2531 bool all) : 2532 _g1h(G1CollectedHeap::heap()), 2533 _out(out), _vo(vo), _all(all) { } 2534 2535 void do_oop(narrowOop* p) { do_oop_work(p); } 2536 void do_oop( oop* p) { do_oop_work(p); } 2537 2538 template <class T> void do_oop_work(T* p) { 2539 oop obj = oopDesc::load_decode_heap_oop(p); 2540 const char* str = NULL; 2541 const char* str2 = ""; 2542 2543 if (obj == NULL) { 2544 str = ""; 2545 } else if (!_g1h->is_in_g1_reserved(obj)) { 2546 str = " O"; 2547 } else { 2548 HeapRegion* hr = _g1h->heap_region_containing(obj); 2549 guarantee(hr != NULL, "invariant"); 2550 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2551 bool marked = _g1h->is_marked(obj, _vo); 2552 2553 if (over_tams) { 2554 str = " >"; 2555 if (marked) { 2556 str2 = " AND MARKED"; 2557 } 2558 } else if (marked) { 2559 str = " M"; 2560 } else { 2561 str = " NOT"; 2562 } 2563 } 2564 2565 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2566 p, (void*) obj, str, str2); 2567 } 2568 }; 2569 2570 class PrintReachableObjectClosure : public ObjectClosure { 2571 private: 2572 G1CollectedHeap* _g1h; 2573 outputStream* _out; 2574 VerifyOption _vo; 2575 bool _all; 2576 HeapRegion* _hr; 2577 2578 public: 2579 PrintReachableObjectClosure(outputStream* out, 2580 VerifyOption vo, 2581 bool all, 2582 HeapRegion* hr) : 2583 _g1h(G1CollectedHeap::heap()), 2584 _out(out), _vo(vo), _all(all), _hr(hr) { } 2585 2586 void do_object(oop o) { 2587 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2588 bool marked = _g1h->is_marked(o, _vo); 2589 bool print_it = _all || over_tams || marked; 2590 2591 if (print_it) { 2592 _out->print_cr(" "PTR_FORMAT"%s", 2593 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2594 PrintReachableOopClosure oopCl(_out, _vo, _all); 2595 o->oop_iterate(&oopCl); 2596 } 2597 } 2598 }; 2599 2600 class PrintReachableRegionClosure : public HeapRegionClosure { 2601 private: 2602 G1CollectedHeap* _g1h; 2603 outputStream* _out; 2604 VerifyOption _vo; 2605 bool _all; 2606 2607 public: 2608 bool doHeapRegion(HeapRegion* hr) { 2609 HeapWord* b = hr->bottom(); 2610 HeapWord* e = hr->end(); 2611 HeapWord* t = hr->top(); 2612 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2613 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2614 "TAMS: "PTR_FORMAT, b, e, t, p); 2615 _out->cr(); 2616 2617 HeapWord* from = b; 2618 HeapWord* to = t; 2619 2620 if (to > from) { 2621 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2622 _out->cr(); 2623 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2624 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2625 _out->cr(); 2626 } 2627 2628 return false; 2629 } 2630 2631 PrintReachableRegionClosure(outputStream* out, 2632 VerifyOption vo, 2633 bool all) : 2634 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2635 }; 2636 2637 void ConcurrentMark::print_reachable(const char* str, 2638 VerifyOption vo, 2639 bool all) { 2640 gclog_or_tty->cr(); 2641 gclog_or_tty->print_cr("== Doing heap dump... "); 2642 2643 if (G1PrintReachableBaseFile == NULL) { 2644 gclog_or_tty->print_cr(" #### error: no base file defined"); 2645 return; 2646 } 2647 2648 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2649 (JVM_MAXPATHLEN - 1)) { 2650 gclog_or_tty->print_cr(" #### error: file name too long"); 2651 return; 2652 } 2653 2654 char file_name[JVM_MAXPATHLEN]; 2655 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2656 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2657 2658 fileStream fout(file_name); 2659 if (!fout.is_open()) { 2660 gclog_or_tty->print_cr(" #### error: could not open file"); 2661 return; 2662 } 2663 2664 outputStream* out = &fout; 2665 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2666 out->cr(); 2667 2668 out->print_cr("--- ITERATING OVER REGIONS"); 2669 out->cr(); 2670 PrintReachableRegionClosure rcl(out, vo, all); 2671 _g1h->heap_region_iterate(&rcl); 2672 out->cr(); 2673 2674 gclog_or_tty->print_cr(" done"); 2675 gclog_or_tty->flush(); 2676 } 2677 2678 #endif // PRODUCT 2679 2680 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2681 // Note we are overriding the read-only view of the prev map here, via 2682 // the cast. 2683 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2684 } 2685 2686 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2687 _nextMarkBitMap->clearRange(mr); 2688 } 2689 2690 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2691 clearRangePrevBitmap(mr); 2692 clearRangeNextBitmap(mr); 2693 } 2694 2695 HeapRegion* 2696 ConcurrentMark::claim_region(int task_num) { 2697 // "checkpoint" the finger 2698 HeapWord* finger = _finger; 2699 2700 // _heap_end will not change underneath our feet; it only changes at 2701 // yield points. 2702 while (finger < _heap_end) { 2703 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2704 2705 // Note on how this code handles humongous regions. In the 2706 // normal case the finger will reach the start of a "starts 2707 // humongous" (SH) region. Its end will either be the end of the 2708 // last "continues humongous" (CH) region in the sequence, or the 2709 // standard end of the SH region (if the SH is the only region in 2710 // the sequence). That way claim_region() will skip over the CH 2711 // regions. However, there is a subtle race between a CM thread 2712 // executing this method and a mutator thread doing a humongous 2713 // object allocation. The two are not mutually exclusive as the CM 2714 // thread does not need to hold the Heap_lock when it gets 2715 // here. So there is a chance that claim_region() will come across 2716 // a free region that's in the progress of becoming a SH or a CH 2717 // region. In the former case, it will either 2718 // a) Miss the update to the region's end, in which case it will 2719 // visit every subsequent CH region, will find their bitmaps 2720 // empty, and do nothing, or 2721 // b) Will observe the update of the region's end (in which case 2722 // it will skip the subsequent CH regions). 2723 // If it comes across a region that suddenly becomes CH, the 2724 // scenario will be similar to b). So, the race between 2725 // claim_region() and a humongous object allocation might force us 2726 // to do a bit of unnecessary work (due to some unnecessary bitmap 2727 // iterations) but it should not introduce and correctness issues. 2728 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2729 HeapWord* bottom = curr_region->bottom(); 2730 HeapWord* end = curr_region->end(); 2731 HeapWord* limit = curr_region->next_top_at_mark_start(); 2732 2733 if (verbose_low()) { 2734 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 2735 "["PTR_FORMAT", "PTR_FORMAT"), " 2736 "limit = "PTR_FORMAT, 2737 task_num, curr_region, bottom, end, limit); 2738 } 2739 2740 // Is the gap between reading the finger and doing the CAS too long? 2741 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2742 if (res == finger) { 2743 // we succeeded 2744 2745 // notice that _finger == end cannot be guaranteed here since, 2746 // someone else might have moved the finger even further 2747 assert(_finger >= end, "the finger should have moved forward"); 2748 2749 if (verbose_low()) { 2750 gclog_or_tty->print_cr("[%d] we were successful with region = " 2751 PTR_FORMAT, task_num, curr_region); 2752 } 2753 2754 if (limit > bottom) { 2755 if (verbose_low()) { 2756 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 2757 "returning it ", task_num, curr_region); 2758 } 2759 return curr_region; 2760 } else { 2761 assert(limit == bottom, 2762 "the region limit should be at bottom"); 2763 if (verbose_low()) { 2764 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 2765 "returning NULL", task_num, curr_region); 2766 } 2767 // we return NULL and the caller should try calling 2768 // claim_region() again. 2769 return NULL; 2770 } 2771 } else { 2772 assert(_finger > finger, "the finger should have moved forward"); 2773 if (verbose_low()) { 2774 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 2775 "global finger = "PTR_FORMAT", " 2776 "our finger = "PTR_FORMAT, 2777 task_num, _finger, finger); 2778 } 2779 2780 // read it again 2781 finger = _finger; 2782 } 2783 } 2784 2785 return NULL; 2786 } 2787 2788 #ifndef PRODUCT 2789 enum VerifyNoCSetOopsPhase { 2790 VerifyNoCSetOopsStack, 2791 VerifyNoCSetOopsQueues, 2792 VerifyNoCSetOopsSATBCompleted, 2793 VerifyNoCSetOopsSATBThread 2794 }; 2795 2796 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 2797 private: 2798 G1CollectedHeap* _g1h; 2799 VerifyNoCSetOopsPhase _phase; 2800 int _info; 2801 2802 const char* phase_str() { 2803 switch (_phase) { 2804 case VerifyNoCSetOopsStack: return "Stack"; 2805 case VerifyNoCSetOopsQueues: return "Queue"; 2806 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 2807 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 2808 default: ShouldNotReachHere(); 2809 } 2810 return NULL; 2811 } 2812 2813 void do_object_work(oop obj) { 2814 guarantee(!_g1h->obj_in_cs(obj), 2815 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 2816 (void*) obj, phase_str(), _info)); 2817 } 2818 2819 public: 2820 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 2821 2822 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 2823 _phase = phase; 2824 _info = info; 2825 } 2826 2827 virtual void do_oop(oop* p) { 2828 oop obj = oopDesc::load_decode_heap_oop(p); 2829 do_object_work(obj); 2830 } 2831 2832 virtual void do_oop(narrowOop* p) { 2833 // We should not come across narrow oops while scanning marking 2834 // stacks and SATB buffers. 2835 ShouldNotReachHere(); 2836 } 2837 2838 virtual void do_object(oop obj) { 2839 do_object_work(obj); 2840 } 2841 }; 2842 2843 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 2844 bool verify_enqueued_buffers, 2845 bool verify_thread_buffers, 2846 bool verify_fingers) { 2847 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2848 if (!G1CollectedHeap::heap()->mark_in_progress()) { 2849 return; 2850 } 2851 2852 VerifyNoCSetOopsClosure cl; 2853 2854 if (verify_stacks) { 2855 // Verify entries on the global mark stack 2856 cl.set_phase(VerifyNoCSetOopsStack); 2857 _markStack.oops_do(&cl); 2858 2859 // Verify entries on the task queues 2860 for (int i = 0; i < (int) _max_task_num; i += 1) { 2861 cl.set_phase(VerifyNoCSetOopsQueues, i); 2862 OopTaskQueue* queue = _task_queues->queue(i); 2863 queue->oops_do(&cl); 2864 } 2865 } 2866 2867 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 2868 2869 // Verify entries on the enqueued SATB buffers 2870 if (verify_enqueued_buffers) { 2871 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 2872 satb_qs.iterate_completed_buffers_read_only(&cl); 2873 } 2874 2875 // Verify entries on the per-thread SATB buffers 2876 if (verify_thread_buffers) { 2877 cl.set_phase(VerifyNoCSetOopsSATBThread); 2878 satb_qs.iterate_thread_buffers_read_only(&cl); 2879 } 2880 2881 if (verify_fingers) { 2882 // Verify the global finger 2883 HeapWord* global_finger = finger(); 2884 if (global_finger != NULL && global_finger < _heap_end) { 2885 // The global finger always points to a heap region boundary. We 2886 // use heap_region_containing_raw() to get the containing region 2887 // given that the global finger could be pointing to a free region 2888 // which subsequently becomes continues humongous. If that 2889 // happens, heap_region_containing() will return the bottom of the 2890 // corresponding starts humongous region and the check below will 2891 // not hold any more. 2892 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 2893 guarantee(global_finger == global_hr->bottom(), 2894 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 2895 global_finger, HR_FORMAT_PARAMS(global_hr))); 2896 } 2897 2898 // Verify the task fingers 2899 assert(parallel_marking_threads() <= _max_task_num, "sanity"); 2900 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 2901 CMTask* task = _tasks[i]; 2902 HeapWord* task_finger = task->finger(); 2903 if (task_finger != NULL && task_finger < _heap_end) { 2904 // See above note on the global finger verification. 2905 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 2906 guarantee(task_finger == task_hr->bottom() || 2907 !task_hr->in_collection_set(), 2908 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 2909 task_finger, HR_FORMAT_PARAMS(task_hr))); 2910 } 2911 } 2912 } 2913 } 2914 #endif // PRODUCT 2915 2916 // Aggregate the counting data that was constructed concurrently 2917 // with marking. 2918 class AggregateCountDataHRClosure: public HeapRegionClosure { 2919 G1CollectedHeap* _g1h; 2920 ConcurrentMark* _cm; 2921 CardTableModRefBS* _ct_bs; 2922 BitMap* _cm_card_bm; 2923 size_t _max_task_num; 2924 2925 public: 2926 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2927 BitMap* cm_card_bm, 2928 size_t max_task_num) : 2929 _g1h(g1h), _cm(g1h->concurrent_mark()), 2930 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 2931 _cm_card_bm(cm_card_bm), _max_task_num(max_task_num) { } 2932 2933 bool doHeapRegion(HeapRegion* hr) { 2934 if (hr->continuesHumongous()) { 2935 // We will ignore these here and process them when their 2936 // associated "starts humongous" region is processed. 2937 // Note that we cannot rely on their associated 2938 // "starts humongous" region to have their bit set to 1 2939 // since, due to the region chunking in the parallel region 2940 // iteration, a "continues humongous" region might be visited 2941 // before its associated "starts humongous". 2942 return false; 2943 } 2944 2945 HeapWord* start = hr->bottom(); 2946 HeapWord* limit = hr->next_top_at_mark_start(); 2947 HeapWord* end = hr->end(); 2948 2949 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2950 err_msg("Preconditions not met - " 2951 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 2952 "top: "PTR_FORMAT", end: "PTR_FORMAT, 2953 start, limit, hr->top(), hr->end())); 2954 2955 assert(hr->next_marked_bytes() == 0, "Precondition"); 2956 2957 if (start == limit) { 2958 // NTAMS of this region has not been set so nothing to do. 2959 return false; 2960 } 2961 2962 // 'start' should be in the heap. 2963 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2964 // 'end' *may* be just beyone the end of the heap (if hr is the last region) 2965 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2966 2967 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2968 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2969 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2970 2971 // If ntams is not card aligned then we bump card bitmap index 2972 // for limit so that we get the all the cards spanned by 2973 // the object ending at ntams. 2974 // Note: if this is the last region in the heap then ntams 2975 // could be actually just beyond the end of the the heap; 2976 // limit_idx will then correspond to a (non-existent) card 2977 // that is also outside the heap. 2978 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2979 limit_idx += 1; 2980 } 2981 2982 assert(limit_idx <= end_idx, "or else use atomics"); 2983 2984 // Aggregate the "stripe" in the count data associated with hr. 2985 uint hrs_index = hr->hrs_index(); 2986 size_t marked_bytes = 0; 2987 2988 for (int i = 0; (size_t)i < _max_task_num; i += 1) { 2989 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2990 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2991 2992 // Fetch the marked_bytes in this region for task i and 2993 // add it to the running total for this region. 2994 marked_bytes += marked_bytes_array[hrs_index]; 2995 2996 // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx) 2997 // into the global card bitmap. 2998 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2999 3000 while (scan_idx < limit_idx) { 3001 assert(task_card_bm->at(scan_idx) == true, "should be"); 3002 _cm_card_bm->set_bit(scan_idx); 3003 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 3004 3005 // BitMap::get_next_one_offset() can handle the case when 3006 // its left_offset parameter is greater than its right_offset 3007 // parameter. It does, however, have an early exit if 3008 // left_offset == right_offset. So let's limit the value 3009 // passed in for left offset here. 3010 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 3011 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 3012 } 3013 } 3014 3015 // Update the marked bytes for this region. 3016 hr->add_to_marked_bytes(marked_bytes); 3017 3018 // Next heap region 3019 return false; 3020 } 3021 }; 3022 3023 class G1AggregateCountDataTask: public AbstractGangTask { 3024 protected: 3025 G1CollectedHeap* _g1h; 3026 ConcurrentMark* _cm; 3027 BitMap* _cm_card_bm; 3028 size_t _max_task_num; 3029 int _active_workers; 3030 3031 public: 3032 G1AggregateCountDataTask(G1CollectedHeap* g1h, 3033 ConcurrentMark* cm, 3034 BitMap* cm_card_bm, 3035 size_t max_task_num, 3036 int n_workers) : 3037 AbstractGangTask("Count Aggregation"), 3038 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 3039 _max_task_num(max_task_num), 3040 _active_workers(n_workers) { } 3041 3042 void work(uint worker_id) { 3043 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_task_num); 3044 3045 if (G1CollectedHeap::use_parallel_gc_threads()) { 3046 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 3047 _active_workers, 3048 HeapRegion::AggregateCountClaimValue); 3049 } else { 3050 _g1h->heap_region_iterate(&cl); 3051 } 3052 } 3053 }; 3054 3055 3056 void ConcurrentMark::aggregate_count_data() { 3057 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3058 _g1h->workers()->active_workers() : 3059 1); 3060 3061 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3062 _max_task_num, n_workers); 3063 3064 if (G1CollectedHeap::use_parallel_gc_threads()) { 3065 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3066 "sanity check"); 3067 _g1h->set_par_threads(n_workers); 3068 _g1h->workers()->run_task(&g1_par_agg_task); 3069 _g1h->set_par_threads(0); 3070 3071 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3072 "sanity check"); 3073 _g1h->reset_heap_region_claim_values(); 3074 } else { 3075 g1_par_agg_task.work(0); 3076 } 3077 } 3078 3079 // Clear the per-worker arrays used to store the per-region counting data 3080 void ConcurrentMark::clear_all_count_data() { 3081 // Clear the global card bitmap - it will be filled during 3082 // liveness count aggregation (during remark) and the 3083 // final counting task. 3084 _card_bm.clear(); 3085 3086 // Clear the global region bitmap - it will be filled as part 3087 // of the final counting task. 3088 _region_bm.clear(); 3089 3090 uint max_regions = _g1h->max_regions(); 3091 assert(_max_task_num != 0, "unitialized"); 3092 3093 for (int i = 0; (size_t) i < _max_task_num; i += 1) { 3094 BitMap* task_card_bm = count_card_bitmap_for(i); 3095 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3096 3097 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3098 assert(marked_bytes_array != NULL, "uninitialized"); 3099 3100 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3101 task_card_bm->clear(); 3102 } 3103 } 3104 3105 void ConcurrentMark::print_stats() { 3106 if (verbose_stats()) { 3107 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3108 for (size_t i = 0; i < _active_tasks; ++i) { 3109 _tasks[i]->print_stats(); 3110 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3111 } 3112 } 3113 } 3114 3115 // abandon current marking iteration due to a Full GC 3116 void ConcurrentMark::abort() { 3117 // Clear all marks to force marking thread to do nothing 3118 _nextMarkBitMap->clearAll(); 3119 // Clear the liveness counting data 3120 clear_all_count_data(); 3121 // Empty mark stack 3122 reset_marking_state(); 3123 for (int i = 0; i < (int)_max_task_num; ++i) { 3124 _tasks[i]->clear_region_fields(); 3125 } 3126 _has_aborted = true; 3127 3128 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3129 satb_mq_set.abandon_partial_marking(); 3130 // This can be called either during or outside marking, we'll read 3131 // the expected_active value from the SATB queue set. 3132 satb_mq_set.set_active_all_threads( 3133 false, /* new active value */ 3134 satb_mq_set.is_active() /* expected_active */); 3135 3136 _g1h->trace_heap_after_concurrent_cycle(); 3137 _g1h->register_concurrent_cycle_end(); 3138 } 3139 3140 static void print_ms_time_info(const char* prefix, const char* name, 3141 NumberSeq& ns) { 3142 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3143 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3144 if (ns.num() > 0) { 3145 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3146 prefix, ns.sd(), ns.maximum()); 3147 } 3148 } 3149 3150 void ConcurrentMark::print_summary_info() { 3151 gclog_or_tty->print_cr(" Concurrent marking:"); 3152 print_ms_time_info(" ", "init marks", _init_times); 3153 print_ms_time_info(" ", "remarks", _remark_times); 3154 { 3155 print_ms_time_info(" ", "final marks", _remark_mark_times); 3156 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3157 3158 } 3159 print_ms_time_info(" ", "cleanups", _cleanup_times); 3160 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3161 _total_counting_time, 3162 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3163 (double)_cleanup_times.num() 3164 : 0.0)); 3165 if (G1ScrubRemSets) { 3166 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3167 _total_rs_scrub_time, 3168 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3169 (double)_cleanup_times.num() 3170 : 0.0)); 3171 } 3172 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3173 (_init_times.sum() + _remark_times.sum() + 3174 _cleanup_times.sum())/1000.0); 3175 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3176 "(%8.2f s marking).", 3177 cmThread()->vtime_accum(), 3178 cmThread()->vtime_mark_accum()); 3179 } 3180 3181 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3182 if (use_parallel_marking_threads()) { 3183 _parallel_workers->print_worker_threads_on(st); 3184 } 3185 } 3186 3187 // We take a break if someone is trying to stop the world. 3188 bool ConcurrentMark::do_yield_check(uint worker_id) { 3189 if (should_yield()) { 3190 if (worker_id == 0) { 3191 _g1h->g1_policy()->record_concurrent_pause(); 3192 } 3193 cmThread()->yield(); 3194 return true; 3195 } else { 3196 return false; 3197 } 3198 } 3199 3200 bool ConcurrentMark::should_yield() { 3201 return cmThread()->should_yield(); 3202 } 3203 3204 bool ConcurrentMark::containing_card_is_marked(void* p) { 3205 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3206 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3207 } 3208 3209 bool ConcurrentMark::containing_cards_are_marked(void* start, 3210 void* last) { 3211 return containing_card_is_marked(start) && 3212 containing_card_is_marked(last); 3213 } 3214 3215 #ifndef PRODUCT 3216 // for debugging purposes 3217 void ConcurrentMark::print_finger() { 3218 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3219 _heap_start, _heap_end, _finger); 3220 for (int i = 0; i < (int) _max_task_num; ++i) { 3221 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 3222 } 3223 gclog_or_tty->print_cr(""); 3224 } 3225 #endif 3226 3227 void CMTask::scan_object(oop obj) { 3228 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3229 3230 if (_cm->verbose_high()) { 3231 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT, 3232 _task_id, (void*) obj); 3233 } 3234 3235 size_t obj_size = obj->size(); 3236 _words_scanned += obj_size; 3237 3238 obj->oop_iterate(_cm_oop_closure); 3239 statsOnly( ++_objs_scanned ); 3240 check_limits(); 3241 } 3242 3243 // Closure for iteration over bitmaps 3244 class CMBitMapClosure : public BitMapClosure { 3245 private: 3246 // the bitmap that is being iterated over 3247 CMBitMap* _nextMarkBitMap; 3248 ConcurrentMark* _cm; 3249 CMTask* _task; 3250 3251 public: 3252 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3253 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3254 3255 bool do_bit(size_t offset) { 3256 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3257 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3258 assert( addr < _cm->finger(), "invariant"); 3259 3260 statsOnly( _task->increase_objs_found_on_bitmap() ); 3261 assert(addr >= _task->finger(), "invariant"); 3262 3263 // We move that task's local finger along. 3264 _task->move_finger_to(addr); 3265 3266 _task->scan_object(oop(addr)); 3267 // we only partially drain the local queue and global stack 3268 _task->drain_local_queue(true); 3269 _task->drain_global_stack(true); 3270 3271 // if the has_aborted flag has been raised, we need to bail out of 3272 // the iteration 3273 return !_task->has_aborted(); 3274 } 3275 }; 3276 3277 // Closure for iterating over objects, currently only used for 3278 // processing SATB buffers. 3279 class CMObjectClosure : public ObjectClosure { 3280 private: 3281 CMTask* _task; 3282 3283 public: 3284 void do_object(oop obj) { 3285 _task->deal_with_reference(obj); 3286 } 3287 3288 CMObjectClosure(CMTask* task) : _task(task) { } 3289 }; 3290 3291 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3292 ConcurrentMark* cm, 3293 CMTask* task) 3294 : _g1h(g1h), _cm(cm), _task(task) { 3295 assert(_ref_processor == NULL, "should be initialized to NULL"); 3296 3297 if (G1UseConcMarkReferenceProcessing) { 3298 _ref_processor = g1h->ref_processor_cm(); 3299 assert(_ref_processor != NULL, "should not be NULL"); 3300 } 3301 } 3302 3303 void CMTask::setup_for_region(HeapRegion* hr) { 3304 // Separated the asserts so that we know which one fires. 3305 assert(hr != NULL, 3306 "claim_region() should have filtered out continues humongous regions"); 3307 assert(!hr->continuesHumongous(), 3308 "claim_region() should have filtered out continues humongous regions"); 3309 3310 if (_cm->verbose_low()) { 3311 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 3312 _task_id, hr); 3313 } 3314 3315 _curr_region = hr; 3316 _finger = hr->bottom(); 3317 update_region_limit(); 3318 } 3319 3320 void CMTask::update_region_limit() { 3321 HeapRegion* hr = _curr_region; 3322 HeapWord* bottom = hr->bottom(); 3323 HeapWord* limit = hr->next_top_at_mark_start(); 3324 3325 if (limit == bottom) { 3326 if (_cm->verbose_low()) { 3327 gclog_or_tty->print_cr("[%d] found an empty region " 3328 "["PTR_FORMAT", "PTR_FORMAT")", 3329 _task_id, bottom, limit); 3330 } 3331 // The region was collected underneath our feet. 3332 // We set the finger to bottom to ensure that the bitmap 3333 // iteration that will follow this will not do anything. 3334 // (this is not a condition that holds when we set the region up, 3335 // as the region is not supposed to be empty in the first place) 3336 _finger = bottom; 3337 } else if (limit >= _region_limit) { 3338 assert(limit >= _finger, "peace of mind"); 3339 } else { 3340 assert(limit < _region_limit, "only way to get here"); 3341 // This can happen under some pretty unusual circumstances. An 3342 // evacuation pause empties the region underneath our feet (NTAMS 3343 // at bottom). We then do some allocation in the region (NTAMS 3344 // stays at bottom), followed by the region being used as a GC 3345 // alloc region (NTAMS will move to top() and the objects 3346 // originally below it will be grayed). All objects now marked in 3347 // the region are explicitly grayed, if below the global finger, 3348 // and we do not need in fact to scan anything else. So, we simply 3349 // set _finger to be limit to ensure that the bitmap iteration 3350 // doesn't do anything. 3351 _finger = limit; 3352 } 3353 3354 _region_limit = limit; 3355 } 3356 3357 void CMTask::giveup_current_region() { 3358 assert(_curr_region != NULL, "invariant"); 3359 if (_cm->verbose_low()) { 3360 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 3361 _task_id, _curr_region); 3362 } 3363 clear_region_fields(); 3364 } 3365 3366 void CMTask::clear_region_fields() { 3367 // Values for these three fields that indicate that we're not 3368 // holding on to a region. 3369 _curr_region = NULL; 3370 _finger = NULL; 3371 _region_limit = NULL; 3372 } 3373 3374 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3375 if (cm_oop_closure == NULL) { 3376 assert(_cm_oop_closure != NULL, "invariant"); 3377 } else { 3378 assert(_cm_oop_closure == NULL, "invariant"); 3379 } 3380 _cm_oop_closure = cm_oop_closure; 3381 } 3382 3383 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3384 guarantee(nextMarkBitMap != NULL, "invariant"); 3385 3386 if (_cm->verbose_low()) { 3387 gclog_or_tty->print_cr("[%d] resetting", _task_id); 3388 } 3389 3390 _nextMarkBitMap = nextMarkBitMap; 3391 clear_region_fields(); 3392 3393 _calls = 0; 3394 _elapsed_time_ms = 0.0; 3395 _termination_time_ms = 0.0; 3396 _termination_start_time_ms = 0.0; 3397 3398 #if _MARKING_STATS_ 3399 _local_pushes = 0; 3400 _local_pops = 0; 3401 _local_max_size = 0; 3402 _objs_scanned = 0; 3403 _global_pushes = 0; 3404 _global_pops = 0; 3405 _global_max_size = 0; 3406 _global_transfers_to = 0; 3407 _global_transfers_from = 0; 3408 _regions_claimed = 0; 3409 _objs_found_on_bitmap = 0; 3410 _satb_buffers_processed = 0; 3411 _steal_attempts = 0; 3412 _steals = 0; 3413 _aborted = 0; 3414 _aborted_overflow = 0; 3415 _aborted_cm_aborted = 0; 3416 _aborted_yield = 0; 3417 _aborted_timed_out = 0; 3418 _aborted_satb = 0; 3419 _aborted_termination = 0; 3420 #endif // _MARKING_STATS_ 3421 } 3422 3423 bool CMTask::should_exit_termination() { 3424 regular_clock_call(); 3425 // This is called when we are in the termination protocol. We should 3426 // quit if, for some reason, this task wants to abort or the global 3427 // stack is not empty (this means that we can get work from it). 3428 return !_cm->mark_stack_empty() || has_aborted(); 3429 } 3430 3431 void CMTask::reached_limit() { 3432 assert(_words_scanned >= _words_scanned_limit || 3433 _refs_reached >= _refs_reached_limit , 3434 "shouldn't have been called otherwise"); 3435 regular_clock_call(); 3436 } 3437 3438 void CMTask::regular_clock_call() { 3439 if (has_aborted()) return; 3440 3441 // First, we need to recalculate the words scanned and refs reached 3442 // limits for the next clock call. 3443 recalculate_limits(); 3444 3445 // During the regular clock call we do the following 3446 3447 // (1) If an overflow has been flagged, then we abort. 3448 if (_cm->has_overflown()) { 3449 set_has_aborted(); 3450 return; 3451 } 3452 3453 // If we are not concurrent (i.e. we're doing remark) we don't need 3454 // to check anything else. The other steps are only needed during 3455 // the concurrent marking phase. 3456 if (!concurrent()) return; 3457 3458 // (2) If marking has been aborted for Full GC, then we also abort. 3459 if (_cm->has_aborted()) { 3460 set_has_aborted(); 3461 statsOnly( ++_aborted_cm_aborted ); 3462 return; 3463 } 3464 3465 double curr_time_ms = os::elapsedVTime() * 1000.0; 3466 3467 // (3) If marking stats are enabled, then we update the step history. 3468 #if _MARKING_STATS_ 3469 if (_words_scanned >= _words_scanned_limit) { 3470 ++_clock_due_to_scanning; 3471 } 3472 if (_refs_reached >= _refs_reached_limit) { 3473 ++_clock_due_to_marking; 3474 } 3475 3476 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3477 _interval_start_time_ms = curr_time_ms; 3478 _all_clock_intervals_ms.add(last_interval_ms); 3479 3480 if (_cm->verbose_medium()) { 3481 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 3482 "scanned = %d%s, refs reached = %d%s", 3483 _task_id, last_interval_ms, 3484 _words_scanned, 3485 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3486 _refs_reached, 3487 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3488 } 3489 #endif // _MARKING_STATS_ 3490 3491 // (4) We check whether we should yield. If we have to, then we abort. 3492 if (_cm->should_yield()) { 3493 // We should yield. To do this we abort the task. The caller is 3494 // responsible for yielding. 3495 set_has_aborted(); 3496 statsOnly( ++_aborted_yield ); 3497 return; 3498 } 3499 3500 // (5) We check whether we've reached our time quota. If we have, 3501 // then we abort. 3502 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3503 if (elapsed_time_ms > _time_target_ms) { 3504 set_has_aborted(); 3505 _has_timed_out = true; 3506 statsOnly( ++_aborted_timed_out ); 3507 return; 3508 } 3509 3510 // (6) Finally, we check whether there are enough completed STAB 3511 // buffers available for processing. If there are, we abort. 3512 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3513 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3514 if (_cm->verbose_low()) { 3515 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 3516 _task_id); 3517 } 3518 // we do need to process SATB buffers, we'll abort and restart 3519 // the marking task to do so 3520 set_has_aborted(); 3521 statsOnly( ++_aborted_satb ); 3522 return; 3523 } 3524 } 3525 3526 void CMTask::recalculate_limits() { 3527 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3528 _words_scanned_limit = _real_words_scanned_limit; 3529 3530 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3531 _refs_reached_limit = _real_refs_reached_limit; 3532 } 3533 3534 void CMTask::decrease_limits() { 3535 // This is called when we believe that we're going to do an infrequent 3536 // operation which will increase the per byte scanned cost (i.e. move 3537 // entries to/from the global stack). It basically tries to decrease the 3538 // scanning limit so that the clock is called earlier. 3539 3540 if (_cm->verbose_medium()) { 3541 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 3542 } 3543 3544 _words_scanned_limit = _real_words_scanned_limit - 3545 3 * words_scanned_period / 4; 3546 _refs_reached_limit = _real_refs_reached_limit - 3547 3 * refs_reached_period / 4; 3548 } 3549 3550 void CMTask::move_entries_to_global_stack() { 3551 // local array where we'll store the entries that will be popped 3552 // from the local queue 3553 oop buffer[global_stack_transfer_size]; 3554 3555 int n = 0; 3556 oop obj; 3557 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3558 buffer[n] = obj; 3559 ++n; 3560 } 3561 3562 if (n > 0) { 3563 // we popped at least one entry from the local queue 3564 3565 statsOnly( ++_global_transfers_to; _local_pops += n ); 3566 3567 if (!_cm->mark_stack_push(buffer, n)) { 3568 if (_cm->verbose_low()) { 3569 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", 3570 _task_id); 3571 } 3572 set_has_aborted(); 3573 } else { 3574 // the transfer was successful 3575 3576 if (_cm->verbose_medium()) { 3577 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 3578 _task_id, n); 3579 } 3580 statsOnly( int tmp_size = _cm->mark_stack_size(); 3581 if (tmp_size > _global_max_size) { 3582 _global_max_size = tmp_size; 3583 } 3584 _global_pushes += n ); 3585 } 3586 } 3587 3588 // this operation was quite expensive, so decrease the limits 3589 decrease_limits(); 3590 } 3591 3592 void CMTask::get_entries_from_global_stack() { 3593 // local array where we'll store the entries that will be popped 3594 // from the global stack. 3595 oop buffer[global_stack_transfer_size]; 3596 int n; 3597 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3598 assert(n <= global_stack_transfer_size, 3599 "we should not pop more than the given limit"); 3600 if (n > 0) { 3601 // yes, we did actually pop at least one entry 3602 3603 statsOnly( ++_global_transfers_from; _global_pops += n ); 3604 if (_cm->verbose_medium()) { 3605 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 3606 _task_id, n); 3607 } 3608 for (int i = 0; i < n; ++i) { 3609 bool success = _task_queue->push(buffer[i]); 3610 // We only call this when the local queue is empty or under a 3611 // given target limit. So, we do not expect this push to fail. 3612 assert(success, "invariant"); 3613 } 3614 3615 statsOnly( int tmp_size = _task_queue->size(); 3616 if (tmp_size > _local_max_size) { 3617 _local_max_size = tmp_size; 3618 } 3619 _local_pushes += n ); 3620 } 3621 3622 // this operation was quite expensive, so decrease the limits 3623 decrease_limits(); 3624 } 3625 3626 void CMTask::drain_local_queue(bool partially) { 3627 if (has_aborted()) return; 3628 3629 // Decide what the target size is, depending whether we're going to 3630 // drain it partially (so that other tasks can steal if they run out 3631 // of things to do) or totally (at the very end). 3632 size_t target_size; 3633 if (partially) { 3634 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3635 } else { 3636 target_size = 0; 3637 } 3638 3639 if (_task_queue->size() > target_size) { 3640 if (_cm->verbose_high()) { 3641 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 3642 _task_id, target_size); 3643 } 3644 3645 oop obj; 3646 bool ret = _task_queue->pop_local(obj); 3647 while (ret) { 3648 statsOnly( ++_local_pops ); 3649 3650 if (_cm->verbose_high()) { 3651 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 3652 (void*) obj); 3653 } 3654 3655 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3656 assert(!_g1h->is_on_master_free_list( 3657 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3658 3659 scan_object(obj); 3660 3661 if (_task_queue->size() <= target_size || has_aborted()) { 3662 ret = false; 3663 } else { 3664 ret = _task_queue->pop_local(obj); 3665 } 3666 } 3667 3668 if (_cm->verbose_high()) { 3669 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 3670 _task_id, _task_queue->size()); 3671 } 3672 } 3673 } 3674 3675 void CMTask::drain_global_stack(bool partially) { 3676 if (has_aborted()) return; 3677 3678 // We have a policy to drain the local queue before we attempt to 3679 // drain the global stack. 3680 assert(partially || _task_queue->size() == 0, "invariant"); 3681 3682 // Decide what the target size is, depending whether we're going to 3683 // drain it partially (so that other tasks can steal if they run out 3684 // of things to do) or totally (at the very end). Notice that, 3685 // because we move entries from the global stack in chunks or 3686 // because another task might be doing the same, we might in fact 3687 // drop below the target. But, this is not a problem. 3688 size_t target_size; 3689 if (partially) { 3690 target_size = _cm->partial_mark_stack_size_target(); 3691 } else { 3692 target_size = 0; 3693 } 3694 3695 if (_cm->mark_stack_size() > target_size) { 3696 if (_cm->verbose_low()) { 3697 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 3698 _task_id, target_size); 3699 } 3700 3701 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3702 get_entries_from_global_stack(); 3703 drain_local_queue(partially); 3704 } 3705 3706 if (_cm->verbose_low()) { 3707 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 3708 _task_id, _cm->mark_stack_size()); 3709 } 3710 } 3711 } 3712 3713 // SATB Queue has several assumptions on whether to call the par or 3714 // non-par versions of the methods. this is why some of the code is 3715 // replicated. We should really get rid of the single-threaded version 3716 // of the code to simplify things. 3717 void CMTask::drain_satb_buffers() { 3718 if (has_aborted()) return; 3719 3720 // We set this so that the regular clock knows that we're in the 3721 // middle of draining buffers and doesn't set the abort flag when it 3722 // notices that SATB buffers are available for draining. It'd be 3723 // very counter productive if it did that. :-) 3724 _draining_satb_buffers = true; 3725 3726 CMObjectClosure oc(this); 3727 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3728 if (G1CollectedHeap::use_parallel_gc_threads()) { 3729 satb_mq_set.set_par_closure(_task_id, &oc); 3730 } else { 3731 satb_mq_set.set_closure(&oc); 3732 } 3733 3734 // This keeps claiming and applying the closure to completed buffers 3735 // until we run out of buffers or we need to abort. 3736 if (G1CollectedHeap::use_parallel_gc_threads()) { 3737 while (!has_aborted() && 3738 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 3739 if (_cm->verbose_medium()) { 3740 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3741 } 3742 statsOnly( ++_satb_buffers_processed ); 3743 regular_clock_call(); 3744 } 3745 } else { 3746 while (!has_aborted() && 3747 satb_mq_set.apply_closure_to_completed_buffer()) { 3748 if (_cm->verbose_medium()) { 3749 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3750 } 3751 statsOnly( ++_satb_buffers_processed ); 3752 regular_clock_call(); 3753 } 3754 } 3755 3756 if (!concurrent() && !has_aborted()) { 3757 // We should only do this during remark. 3758 if (G1CollectedHeap::use_parallel_gc_threads()) { 3759 satb_mq_set.par_iterate_closure_all_threads(_task_id); 3760 } else { 3761 satb_mq_set.iterate_closure_all_threads(); 3762 } 3763 } 3764 3765 _draining_satb_buffers = false; 3766 3767 assert(has_aborted() || 3768 concurrent() || 3769 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3770 3771 if (G1CollectedHeap::use_parallel_gc_threads()) { 3772 satb_mq_set.set_par_closure(_task_id, NULL); 3773 } else { 3774 satb_mq_set.set_closure(NULL); 3775 } 3776 3777 // again, this was a potentially expensive operation, decrease the 3778 // limits to get the regular clock call early 3779 decrease_limits(); 3780 } 3781 3782 void CMTask::print_stats() { 3783 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 3784 _task_id, _calls); 3785 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3786 _elapsed_time_ms, _termination_time_ms); 3787 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3788 _step_times_ms.num(), _step_times_ms.avg(), 3789 _step_times_ms.sd()); 3790 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3791 _step_times_ms.maximum(), _step_times_ms.sum()); 3792 3793 #if _MARKING_STATS_ 3794 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3795 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3796 _all_clock_intervals_ms.sd()); 3797 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3798 _all_clock_intervals_ms.maximum(), 3799 _all_clock_intervals_ms.sum()); 3800 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3801 _clock_due_to_scanning, _clock_due_to_marking); 3802 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3803 _objs_scanned, _objs_found_on_bitmap); 3804 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3805 _local_pushes, _local_pops, _local_max_size); 3806 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3807 _global_pushes, _global_pops, _global_max_size); 3808 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3809 _global_transfers_to,_global_transfers_from); 3810 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 3811 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3812 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3813 _steal_attempts, _steals); 3814 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3815 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3816 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3817 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3818 _aborted_timed_out, _aborted_satb, _aborted_termination); 3819 #endif // _MARKING_STATS_ 3820 } 3821 3822 /***************************************************************************** 3823 3824 The do_marking_step(time_target_ms, ...) method is the building 3825 block of the parallel marking framework. It can be called in parallel 3826 with other invocations of do_marking_step() on different tasks 3827 (but only one per task, obviously) and concurrently with the 3828 mutator threads, or during remark, hence it eliminates the need 3829 for two versions of the code. When called during remark, it will 3830 pick up from where the task left off during the concurrent marking 3831 phase. Interestingly, tasks are also claimable during evacuation 3832 pauses too, since do_marking_step() ensures that it aborts before 3833 it needs to yield. 3834 3835 The data structures that it uses to do marking work are the 3836 following: 3837 3838 (1) Marking Bitmap. If there are gray objects that appear only 3839 on the bitmap (this happens either when dealing with an overflow 3840 or when the initial marking phase has simply marked the roots 3841 and didn't push them on the stack), then tasks claim heap 3842 regions whose bitmap they then scan to find gray objects. A 3843 global finger indicates where the end of the last claimed region 3844 is. A local finger indicates how far into the region a task has 3845 scanned. The two fingers are used to determine how to gray an 3846 object (i.e. whether simply marking it is OK, as it will be 3847 visited by a task in the future, or whether it needs to be also 3848 pushed on a stack). 3849 3850 (2) Local Queue. The local queue of the task which is accessed 3851 reasonably efficiently by the task. Other tasks can steal from 3852 it when they run out of work. Throughout the marking phase, a 3853 task attempts to keep its local queue short but not totally 3854 empty, so that entries are available for stealing by other 3855 tasks. Only when there is no more work, a task will totally 3856 drain its local queue. 3857 3858 (3) Global Mark Stack. This handles local queue overflow. During 3859 marking only sets of entries are moved between it and the local 3860 queues, as access to it requires a mutex and more fine-grain 3861 interaction with it which might cause contention. If it 3862 overflows, then the marking phase should restart and iterate 3863 over the bitmap to identify gray objects. Throughout the marking 3864 phase, tasks attempt to keep the global mark stack at a small 3865 length but not totally empty, so that entries are available for 3866 popping by other tasks. Only when there is no more work, tasks 3867 will totally drain the global mark stack. 3868 3869 (4) SATB Buffer Queue. This is where completed SATB buffers are 3870 made available. Buffers are regularly removed from this queue 3871 and scanned for roots, so that the queue doesn't get too 3872 long. During remark, all completed buffers are processed, as 3873 well as the filled in parts of any uncompleted buffers. 3874 3875 The do_marking_step() method tries to abort when the time target 3876 has been reached. There are a few other cases when the 3877 do_marking_step() method also aborts: 3878 3879 (1) When the marking phase has been aborted (after a Full GC). 3880 3881 (2) When a global overflow (on the global stack) has been 3882 triggered. Before the task aborts, it will actually sync up with 3883 the other tasks to ensure that all the marking data structures 3884 (local queues, stacks, fingers etc.) are re-initialized so that 3885 when do_marking_step() completes, the marking phase can 3886 immediately restart. 3887 3888 (3) When enough completed SATB buffers are available. The 3889 do_marking_step() method only tries to drain SATB buffers right 3890 at the beginning. So, if enough buffers are available, the 3891 marking step aborts and the SATB buffers are processed at 3892 the beginning of the next invocation. 3893 3894 (4) To yield. when we have to yield then we abort and yield 3895 right at the end of do_marking_step(). This saves us from a lot 3896 of hassle as, by yielding we might allow a Full GC. If this 3897 happens then objects will be compacted underneath our feet, the 3898 heap might shrink, etc. We save checking for this by just 3899 aborting and doing the yield right at the end. 3900 3901 From the above it follows that the do_marking_step() method should 3902 be called in a loop (or, otherwise, regularly) until it completes. 3903 3904 If a marking step completes without its has_aborted() flag being 3905 true, it means it has completed the current marking phase (and 3906 also all other marking tasks have done so and have all synced up). 3907 3908 A method called regular_clock_call() is invoked "regularly" (in 3909 sub ms intervals) throughout marking. It is this clock method that 3910 checks all the abort conditions which were mentioned above and 3911 decides when the task should abort. A work-based scheme is used to 3912 trigger this clock method: when the number of object words the 3913 marking phase has scanned or the number of references the marking 3914 phase has visited reach a given limit. Additional invocations to 3915 the method clock have been planted in a few other strategic places 3916 too. The initial reason for the clock method was to avoid calling 3917 vtime too regularly, as it is quite expensive. So, once it was in 3918 place, it was natural to piggy-back all the other conditions on it 3919 too and not constantly check them throughout the code. 3920 3921 If do_termination is true then do_marking_step will enter its 3922 termination protocol. 3923 3924 The value of is_serial must be true when do_marking_step is being 3925 called serially (i.e. by the VMThread) and do_marking_step should 3926 skip any synchronization in the termination and overflow code. 3927 Examples include the serial remark code and the serial reference 3928 processing closures. 3929 3930 The value of is_serial must be false when do_marking_step is 3931 being called by any of the worker threads in a work gang. 3932 Examples include the concurrent marking code (CMMarkingTask), 3933 the MT remark code, and the MT reference processing closures. 3934 3935 *****************************************************************************/ 3936 3937 void CMTask::do_marking_step(double time_target_ms, 3938 bool do_termination, 3939 bool is_serial) { 3940 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3941 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3942 3943 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3944 assert(_task_queues != NULL, "invariant"); 3945 assert(_task_queue != NULL, "invariant"); 3946 assert(_task_queues->queue(_task_id) == _task_queue, "invariant"); 3947 3948 assert(!_claimed, 3949 "only one thread should claim this task at any one time"); 3950 3951 // OK, this doesn't safeguard again all possible scenarios, as it is 3952 // possible for two threads to set the _claimed flag at the same 3953 // time. But it is only for debugging purposes anyway and it will 3954 // catch most problems. 3955 _claimed = true; 3956 3957 _start_time_ms = os::elapsedVTime() * 1000.0; 3958 statsOnly( _interval_start_time_ms = _start_time_ms ); 3959 3960 // If do_stealing is true then do_marking_step will attempt to 3961 // steal work from the other CMTasks. It only makes sense to 3962 // enable stealing when the termination protocol is enabled 3963 // and do_marking_step() is not being called serially. 3964 bool do_stealing = do_termination && !is_serial; 3965 3966 double diff_prediction_ms = 3967 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 3968 _time_target_ms = time_target_ms - diff_prediction_ms; 3969 3970 // set up the variables that are used in the work-based scheme to 3971 // call the regular clock method 3972 _words_scanned = 0; 3973 _refs_reached = 0; 3974 recalculate_limits(); 3975 3976 // clear all flags 3977 clear_has_aborted(); 3978 _has_timed_out = false; 3979 _draining_satb_buffers = false; 3980 3981 ++_calls; 3982 3983 if (_cm->verbose_low()) { 3984 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 3985 "target = %1.2lfms >>>>>>>>>>", 3986 _task_id, _calls, _time_target_ms); 3987 } 3988 3989 // Set up the bitmap and oop closures. Anything that uses them is 3990 // eventually called from this method, so it is OK to allocate these 3991 // statically. 3992 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3993 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3994 set_cm_oop_closure(&cm_oop_closure); 3995 3996 if (_cm->has_overflown()) { 3997 // This can happen if the mark stack overflows during a GC pause 3998 // and this task, after a yield point, restarts. We have to abort 3999 // as we need to get into the overflow protocol which happens 4000 // right at the end of this task. 4001 set_has_aborted(); 4002 } 4003 4004 // First drain any available SATB buffers. After this, we will not 4005 // look at SATB buffers before the next invocation of this method. 4006 // If enough completed SATB buffers are queued up, the regular clock 4007 // will abort this task so that it restarts. 4008 drain_satb_buffers(); 4009 // ...then partially drain the local queue and the global stack 4010 drain_local_queue(true); 4011 drain_global_stack(true); 4012 4013 do { 4014 if (!has_aborted() && _curr_region != NULL) { 4015 // This means that we're already holding on to a region. 4016 assert(_finger != NULL, "if region is not NULL, then the finger " 4017 "should not be NULL either"); 4018 4019 // We might have restarted this task after an evacuation pause 4020 // which might have evacuated the region we're holding on to 4021 // underneath our feet. Let's read its limit again to make sure 4022 // that we do not iterate over a region of the heap that 4023 // contains garbage (update_region_limit() will also move 4024 // _finger to the start of the region if it is found empty). 4025 update_region_limit(); 4026 // We will start from _finger not from the start of the region, 4027 // as we might be restarting this task after aborting half-way 4028 // through scanning this region. In this case, _finger points to 4029 // the address where we last found a marked object. If this is a 4030 // fresh region, _finger points to start(). 4031 MemRegion mr = MemRegion(_finger, _region_limit); 4032 4033 if (_cm->verbose_low()) { 4034 gclog_or_tty->print_cr("[%d] we're scanning part " 4035 "["PTR_FORMAT", "PTR_FORMAT") " 4036 "of region "PTR_FORMAT, 4037 _task_id, _finger, _region_limit, _curr_region); 4038 } 4039 4040 // Let's iterate over the bitmap of the part of the 4041 // region that is left. 4042 if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4043 // We successfully completed iterating over the region. Now, 4044 // let's give up the region. 4045 giveup_current_region(); 4046 regular_clock_call(); 4047 } else { 4048 assert(has_aborted(), "currently the only way to do so"); 4049 // The only way to abort the bitmap iteration is to return 4050 // false from the do_bit() method. However, inside the 4051 // do_bit() method we move the _finger to point to the 4052 // object currently being looked at. So, if we bail out, we 4053 // have definitely set _finger to something non-null. 4054 assert(_finger != NULL, "invariant"); 4055 4056 // Region iteration was actually aborted. So now _finger 4057 // points to the address of the object we last scanned. If we 4058 // leave it there, when we restart this task, we will rescan 4059 // the object. It is easy to avoid this. We move the finger by 4060 // enough to point to the next possible object header (the 4061 // bitmap knows by how much we need to move it as it knows its 4062 // granularity). 4063 assert(_finger < _region_limit, "invariant"); 4064 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 4065 // Check if bitmap iteration was aborted while scanning the last object 4066 if (new_finger >= _region_limit) { 4067 giveup_current_region(); 4068 } else { 4069 move_finger_to(new_finger); 4070 } 4071 } 4072 } 4073 // At this point we have either completed iterating over the 4074 // region we were holding on to, or we have aborted. 4075 4076 // We then partially drain the local queue and the global stack. 4077 // (Do we really need this?) 4078 drain_local_queue(true); 4079 drain_global_stack(true); 4080 4081 // Read the note on the claim_region() method on why it might 4082 // return NULL with potentially more regions available for 4083 // claiming and why we have to check out_of_regions() to determine 4084 // whether we're done or not. 4085 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4086 // We are going to try to claim a new region. We should have 4087 // given up on the previous one. 4088 // Separated the asserts so that we know which one fires. 4089 assert(_curr_region == NULL, "invariant"); 4090 assert(_finger == NULL, "invariant"); 4091 assert(_region_limit == NULL, "invariant"); 4092 if (_cm->verbose_low()) { 4093 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 4094 } 4095 HeapRegion* claimed_region = _cm->claim_region(_task_id); 4096 if (claimed_region != NULL) { 4097 // Yes, we managed to claim one 4098 statsOnly( ++_regions_claimed ); 4099 4100 if (_cm->verbose_low()) { 4101 gclog_or_tty->print_cr("[%d] we successfully claimed " 4102 "region "PTR_FORMAT, 4103 _task_id, claimed_region); 4104 } 4105 4106 setup_for_region(claimed_region); 4107 assert(_curr_region == claimed_region, "invariant"); 4108 } 4109 // It is important to call the regular clock here. It might take 4110 // a while to claim a region if, for example, we hit a large 4111 // block of empty regions. So we need to call the regular clock 4112 // method once round the loop to make sure it's called 4113 // frequently enough. 4114 regular_clock_call(); 4115 } 4116 4117 if (!has_aborted() && _curr_region == NULL) { 4118 assert(_cm->out_of_regions(), 4119 "at this point we should be out of regions"); 4120 } 4121 } while ( _curr_region != NULL && !has_aborted()); 4122 4123 if (!has_aborted()) { 4124 // We cannot check whether the global stack is empty, since other 4125 // tasks might be pushing objects to it concurrently. 4126 assert(_cm->out_of_regions(), 4127 "at this point we should be out of regions"); 4128 4129 if (_cm->verbose_low()) { 4130 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 4131 } 4132 4133 // Try to reduce the number of available SATB buffers so that 4134 // remark has less work to do. 4135 drain_satb_buffers(); 4136 } 4137 4138 // Since we've done everything else, we can now totally drain the 4139 // local queue and global stack. 4140 drain_local_queue(false); 4141 drain_global_stack(false); 4142 4143 // Attempt at work stealing from other task's queues. 4144 if (do_stealing && !has_aborted()) { 4145 // We have not aborted. This means that we have finished all that 4146 // we could. Let's try to do some stealing... 4147 4148 // We cannot check whether the global stack is empty, since other 4149 // tasks might be pushing objects to it concurrently. 4150 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4151 "only way to reach here"); 4152 4153 if (_cm->verbose_low()) { 4154 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 4155 } 4156 4157 while (!has_aborted()) { 4158 oop obj; 4159 statsOnly( ++_steal_attempts ); 4160 4161 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 4162 if (_cm->verbose_medium()) { 4163 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 4164 _task_id, (void*) obj); 4165 } 4166 4167 statsOnly( ++_steals ); 4168 4169 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4170 "any stolen object should be marked"); 4171 scan_object(obj); 4172 4173 // And since we're towards the end, let's totally drain the 4174 // local queue and global stack. 4175 drain_local_queue(false); 4176 drain_global_stack(false); 4177 } else { 4178 break; 4179 } 4180 } 4181 } 4182 4183 // If we are about to wrap up and go into termination, check if we 4184 // should raise the overflow flag. 4185 if (do_termination && !has_aborted()) { 4186 if (_cm->force_overflow()->should_force()) { 4187 _cm->set_has_overflown(); 4188 regular_clock_call(); 4189 } 4190 } 4191 4192 // We still haven't aborted. Now, let's try to get into the 4193 // termination protocol. 4194 if (do_termination && !has_aborted()) { 4195 // We cannot check whether the global stack is empty, since other 4196 // tasks might be concurrently pushing objects on it. 4197 // Separated the asserts so that we know which one fires. 4198 assert(_cm->out_of_regions(), "only way to reach here"); 4199 assert(_task_queue->size() == 0, "only way to reach here"); 4200 4201 if (_cm->verbose_low()) { 4202 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 4203 } 4204 4205 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4206 4207 // The CMTask class also extends the TerminatorTerminator class, 4208 // hence its should_exit_termination() method will also decide 4209 // whether to exit the termination protocol or not. 4210 bool finished = (is_serial || 4211 _cm->terminator()->offer_termination(this)); 4212 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4213 _termination_time_ms += 4214 termination_end_time_ms - _termination_start_time_ms; 4215 4216 if (finished) { 4217 // We're all done. 4218 4219 if (_task_id == 0) { 4220 // let's allow task 0 to do this 4221 if (concurrent()) { 4222 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4223 // we need to set this to false before the next 4224 // safepoint. This way we ensure that the marking phase 4225 // doesn't observe any more heap expansions. 4226 _cm->clear_concurrent_marking_in_progress(); 4227 } 4228 } 4229 4230 // We can now guarantee that the global stack is empty, since 4231 // all other tasks have finished. We separated the guarantees so 4232 // that, if a condition is false, we can immediately find out 4233 // which one. 4234 guarantee(_cm->out_of_regions(), "only way to reach here"); 4235 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4236 guarantee(_task_queue->size() == 0, "only way to reach here"); 4237 guarantee(!_cm->has_overflown(), "only way to reach here"); 4238 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4239 4240 if (_cm->verbose_low()) { 4241 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 4242 } 4243 } else { 4244 // Apparently there's more work to do. Let's abort this task. It 4245 // will restart it and we can hopefully find more things to do. 4246 4247 if (_cm->verbose_low()) { 4248 gclog_or_tty->print_cr("[%d] apparently there is more work to do", 4249 _task_id); 4250 } 4251 4252 set_has_aborted(); 4253 statsOnly( ++_aborted_termination ); 4254 } 4255 } 4256 4257 // Mainly for debugging purposes to make sure that a pointer to the 4258 // closure which was statically allocated in this frame doesn't 4259 // escape it by accident. 4260 set_cm_oop_closure(NULL); 4261 double end_time_ms = os::elapsedVTime() * 1000.0; 4262 double elapsed_time_ms = end_time_ms - _start_time_ms; 4263 // Update the step history. 4264 _step_times_ms.add(elapsed_time_ms); 4265 4266 if (has_aborted()) { 4267 // The task was aborted for some reason. 4268 4269 statsOnly( ++_aborted ); 4270 4271 if (_has_timed_out) { 4272 double diff_ms = elapsed_time_ms - _time_target_ms; 4273 // Keep statistics of how well we did with respect to hitting 4274 // our target only if we actually timed out (if we aborted for 4275 // other reasons, then the results might get skewed). 4276 _marking_step_diffs_ms.add(diff_ms); 4277 } 4278 4279 if (_cm->has_overflown()) { 4280 // This is the interesting one. We aborted because a global 4281 // overflow was raised. This means we have to restart the 4282 // marking phase and start iterating over regions. However, in 4283 // order to do this we have to make sure that all tasks stop 4284 // what they are doing and re-initialise in a safe manner. We 4285 // will achieve this with the use of two barrier sync points. 4286 4287 if (_cm->verbose_low()) { 4288 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 4289 } 4290 4291 if (!is_serial) { 4292 // We only need to enter the sync barrier if being called 4293 // from a parallel context 4294 _cm->enter_first_sync_barrier(_task_id); 4295 4296 // When we exit this sync barrier we know that all tasks have 4297 // stopped doing marking work. So, it's now safe to 4298 // re-initialise our data structures. At the end of this method, 4299 // task 0 will clear the global data structures. 4300 } 4301 4302 statsOnly( ++_aborted_overflow ); 4303 4304 // We clear the local state of this task... 4305 clear_region_fields(); 4306 4307 if (!is_serial) { 4308 // ...and enter the second barrier. 4309 _cm->enter_second_sync_barrier(_task_id); 4310 } 4311 // At this point, if we're during the concurrent phase of 4312 // marking, everything has been re-initialized and we're 4313 // ready to restart. 4314 } 4315 4316 if (_cm->verbose_low()) { 4317 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4318 "elapsed = %1.2lfms <<<<<<<<<<", 4319 _task_id, _time_target_ms, elapsed_time_ms); 4320 if (_cm->has_aborted()) { 4321 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 4322 _task_id); 4323 } 4324 } 4325 } else { 4326 if (_cm->verbose_low()) { 4327 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4328 "elapsed = %1.2lfms <<<<<<<<<<", 4329 _task_id, _time_target_ms, elapsed_time_ms); 4330 } 4331 } 4332 4333 _claimed = false; 4334 } 4335 4336 CMTask::CMTask(int task_id, 4337 ConcurrentMark* cm, 4338 size_t* marked_bytes, 4339 BitMap* card_bm, 4340 CMTaskQueue* task_queue, 4341 CMTaskQueueSet* task_queues) 4342 : _g1h(G1CollectedHeap::heap()), 4343 _task_id(task_id), _cm(cm), 4344 _claimed(false), 4345 _nextMarkBitMap(NULL), _hash_seed(17), 4346 _task_queue(task_queue), 4347 _task_queues(task_queues), 4348 _cm_oop_closure(NULL), 4349 _marked_bytes_array(marked_bytes), 4350 _card_bm(card_bm) { 4351 guarantee(task_queue != NULL, "invariant"); 4352 guarantee(task_queues != NULL, "invariant"); 4353 4354 statsOnly( _clock_due_to_scanning = 0; 4355 _clock_due_to_marking = 0 ); 4356 4357 _marking_step_diffs_ms.add(0.5); 4358 } 4359 4360 // These are formatting macros that are used below to ensure 4361 // consistent formatting. The *_H_* versions are used to format the 4362 // header for a particular value and they should be kept consistent 4363 // with the corresponding macro. Also note that most of the macros add 4364 // the necessary white space (as a prefix) which makes them a bit 4365 // easier to compose. 4366 4367 // All the output lines are prefixed with this string to be able to 4368 // identify them easily in a large log file. 4369 #define G1PPRL_LINE_PREFIX "###" 4370 4371 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4372 #ifdef _LP64 4373 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4374 #else // _LP64 4375 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4376 #endif // _LP64 4377 4378 // For per-region info 4379 #define G1PPRL_TYPE_FORMAT " %-4s" 4380 #define G1PPRL_TYPE_H_FORMAT " %4s" 4381 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4382 #define G1PPRL_BYTE_H_FORMAT " %9s" 4383 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4384 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4385 4386 // For summary info 4387 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4388 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4389 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4390 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4391 4392 G1PrintRegionLivenessInfoClosure:: 4393 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4394 : _out(out), 4395 _total_used_bytes(0), _total_capacity_bytes(0), 4396 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4397 _hum_used_bytes(0), _hum_capacity_bytes(0), 4398 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) { 4399 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4400 MemRegion g1_committed = g1h->g1_committed(); 4401 MemRegion g1_reserved = g1h->g1_reserved(); 4402 double now = os::elapsedTime(); 4403 4404 // Print the header of the output. 4405 _out->cr(); 4406 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4407 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4408 G1PPRL_SUM_ADDR_FORMAT("committed") 4409 G1PPRL_SUM_ADDR_FORMAT("reserved") 4410 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4411 g1_committed.start(), g1_committed.end(), 4412 g1_reserved.start(), g1_reserved.end(), 4413 HeapRegion::GrainBytes); 4414 _out->print_cr(G1PPRL_LINE_PREFIX); 4415 _out->print_cr(G1PPRL_LINE_PREFIX 4416 G1PPRL_TYPE_H_FORMAT 4417 G1PPRL_ADDR_BASE_H_FORMAT 4418 G1PPRL_BYTE_H_FORMAT 4419 G1PPRL_BYTE_H_FORMAT 4420 G1PPRL_BYTE_H_FORMAT 4421 G1PPRL_DOUBLE_H_FORMAT, 4422 "type", "address-range", 4423 "used", "prev-live", "next-live", "gc-eff"); 4424 _out->print_cr(G1PPRL_LINE_PREFIX 4425 G1PPRL_TYPE_H_FORMAT 4426 G1PPRL_ADDR_BASE_H_FORMAT 4427 G1PPRL_BYTE_H_FORMAT 4428 G1PPRL_BYTE_H_FORMAT 4429 G1PPRL_BYTE_H_FORMAT 4430 G1PPRL_DOUBLE_H_FORMAT, 4431 "", "", 4432 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)"); 4433 } 4434 4435 // It takes as a parameter a reference to one of the _hum_* fields, it 4436 // deduces the corresponding value for a region in a humongous region 4437 // series (either the region size, or what's left if the _hum_* field 4438 // is < the region size), and updates the _hum_* field accordingly. 4439 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4440 size_t bytes = 0; 4441 // The > 0 check is to deal with the prev and next live bytes which 4442 // could be 0. 4443 if (*hum_bytes > 0) { 4444 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4445 *hum_bytes -= bytes; 4446 } 4447 return bytes; 4448 } 4449 4450 // It deduces the values for a region in a humongous region series 4451 // from the _hum_* fields and updates those accordingly. It assumes 4452 // that that _hum_* fields have already been set up from the "starts 4453 // humongous" region and we visit the regions in address order. 4454 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4455 size_t* capacity_bytes, 4456 size_t* prev_live_bytes, 4457 size_t* next_live_bytes) { 4458 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4459 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4460 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4461 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4462 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4463 } 4464 4465 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4466 const char* type = ""; 4467 HeapWord* bottom = r->bottom(); 4468 HeapWord* end = r->end(); 4469 size_t capacity_bytes = r->capacity(); 4470 size_t used_bytes = r->used(); 4471 size_t prev_live_bytes = r->live_bytes(); 4472 size_t next_live_bytes = r->next_live_bytes(); 4473 double gc_eff = r->gc_efficiency(); 4474 if (r->used() == 0) { 4475 type = "FREE"; 4476 } else if (r->is_survivor()) { 4477 type = "SURV"; 4478 } else if (r->is_young()) { 4479 type = "EDEN"; 4480 } else if (r->startsHumongous()) { 4481 type = "HUMS"; 4482 4483 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4484 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4485 "they should have been zeroed after the last time we used them"); 4486 // Set up the _hum_* fields. 4487 _hum_capacity_bytes = capacity_bytes; 4488 _hum_used_bytes = used_bytes; 4489 _hum_prev_live_bytes = prev_live_bytes; 4490 _hum_next_live_bytes = next_live_bytes; 4491 get_hum_bytes(&used_bytes, &capacity_bytes, 4492 &prev_live_bytes, &next_live_bytes); 4493 end = bottom + HeapRegion::GrainWords; 4494 } else if (r->continuesHumongous()) { 4495 type = "HUMC"; 4496 get_hum_bytes(&used_bytes, &capacity_bytes, 4497 &prev_live_bytes, &next_live_bytes); 4498 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4499 } else { 4500 type = "OLD"; 4501 } 4502 4503 _total_used_bytes += used_bytes; 4504 _total_capacity_bytes += capacity_bytes; 4505 _total_prev_live_bytes += prev_live_bytes; 4506 _total_next_live_bytes += next_live_bytes; 4507 4508 // Print a line for this particular region. 4509 _out->print_cr(G1PPRL_LINE_PREFIX 4510 G1PPRL_TYPE_FORMAT 4511 G1PPRL_ADDR_BASE_FORMAT 4512 G1PPRL_BYTE_FORMAT 4513 G1PPRL_BYTE_FORMAT 4514 G1PPRL_BYTE_FORMAT 4515 G1PPRL_DOUBLE_FORMAT, 4516 type, bottom, end, 4517 used_bytes, prev_live_bytes, next_live_bytes, gc_eff); 4518 4519 return false; 4520 } 4521 4522 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4523 // Print the footer of the output. 4524 _out->print_cr(G1PPRL_LINE_PREFIX); 4525 _out->print_cr(G1PPRL_LINE_PREFIX 4526 " SUMMARY" 4527 G1PPRL_SUM_MB_FORMAT("capacity") 4528 G1PPRL_SUM_MB_PERC_FORMAT("used") 4529 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4530 G1PPRL_SUM_MB_PERC_FORMAT("next-live"), 4531 bytes_to_mb(_total_capacity_bytes), 4532 bytes_to_mb(_total_used_bytes), 4533 perc(_total_used_bytes, _total_capacity_bytes), 4534 bytes_to_mb(_total_prev_live_bytes), 4535 perc(_total_prev_live_bytes, _total_capacity_bytes), 4536 bytes_to_mb(_total_next_live_bytes), 4537 perc(_total_next_live_bytes, _total_capacity_bytes)); 4538 _out->cr(); 4539 }