1 /* 2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 32 #include "gc_implementation/g1/g1Log.hpp" 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 34 #include "gc_implementation/g1/g1RemSet.hpp" 35 #include "gc_implementation/g1/heapRegion.inline.hpp" 36 #include "gc_implementation/g1/heapRegionRemSet.hpp" 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 38 #include "gc_implementation/shared/vmGCOperations.hpp" 39 #include "gc_implementation/shared/gcTimer.hpp" 40 #include "gc_implementation/shared/gcTrace.hpp" 41 #include "gc_implementation/shared/gcTraceTime.hpp" 42 #include "memory/genOopClosures.inline.hpp" 43 #include "memory/referencePolicy.hpp" 44 #include "memory/resourceArea.hpp" 45 #include "oops/oop.inline.hpp" 46 #include "runtime/handles.inline.hpp" 47 #include "runtime/java.hpp" 48 #include "services/memTracker.hpp" 49 50 // Concurrent marking bit map wrapper 51 52 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) : 53 _bm((uintptr_t*)NULL,0), 54 _shifter(shifter) { 55 _bmStartWord = (HeapWord*)(rs.base()); 56 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 57 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 58 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 59 60 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC); 61 62 guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map"); 63 // For now we'll just commit all of the bit map up fromt. 64 // Later on we'll try to be more parsimonious with swap. 65 guarantee(_virtual_space.initialize(brs, brs.size()), 66 "couldn't reseve backing store for concurrent marking bit map"); 67 assert(_virtual_space.committed_size() == brs.size(), 68 "didn't reserve backing store for all of concurrent marking bit map?"); 69 _bm.set_map((uintptr_t*)_virtual_space.low()); 70 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 71 _bmWordSize, "inconsistency in bit map sizing"); 72 _bm.set_size(_bmWordSize >> _shifter); 73 } 74 75 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 76 HeapWord* limit) const { 77 // First we must round addr *up* to a possible object boundary. 78 addr = (HeapWord*)align_size_up((intptr_t)addr, 79 HeapWordSize << _shifter); 80 size_t addrOffset = heapWordToOffset(addr); 81 if (limit == NULL) { 82 limit = _bmStartWord + _bmWordSize; 83 } 84 size_t limitOffset = heapWordToOffset(limit); 85 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 86 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 87 assert(nextAddr >= addr, "get_next_one postcondition"); 88 assert(nextAddr == limit || isMarked(nextAddr), 89 "get_next_one postcondition"); 90 return nextAddr; 91 } 92 93 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 94 HeapWord* limit) const { 95 size_t addrOffset = heapWordToOffset(addr); 96 if (limit == NULL) { 97 limit = _bmStartWord + _bmWordSize; 98 } 99 size_t limitOffset = heapWordToOffset(limit); 100 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 101 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 102 assert(nextAddr >= addr, "get_next_one postcondition"); 103 assert(nextAddr == limit || !isMarked(nextAddr), 104 "get_next_one postcondition"); 105 return nextAddr; 106 } 107 108 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 109 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 110 return (int) (diff >> _shifter); 111 } 112 113 #ifndef PRODUCT 114 bool CMBitMapRO::covers(ReservedSpace rs) const { 115 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 116 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 117 "size inconsistency"); 118 return _bmStartWord == (HeapWord*)(rs.base()) && 119 _bmWordSize == rs.size()>>LogHeapWordSize; 120 } 121 #endif 122 123 void CMBitMap::clearAll() { 124 _bm.clear(); 125 return; 126 } 127 128 void CMBitMap::markRange(MemRegion mr) { 129 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 130 assert(!mr.is_empty(), "unexpected empty region"); 131 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 132 ((HeapWord *) mr.end())), 133 "markRange memory region end is not card aligned"); 134 // convert address range into offset range 135 _bm.at_put_range(heapWordToOffset(mr.start()), 136 heapWordToOffset(mr.end()), true); 137 } 138 139 void CMBitMap::clearRange(MemRegion mr) { 140 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 141 assert(!mr.is_empty(), "unexpected empty region"); 142 // convert address range into offset range 143 _bm.at_put_range(heapWordToOffset(mr.start()), 144 heapWordToOffset(mr.end()), false); 145 } 146 147 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 148 HeapWord* end_addr) { 149 HeapWord* start = getNextMarkedWordAddress(addr); 150 start = MIN2(start, end_addr); 151 HeapWord* end = getNextUnmarkedWordAddress(start); 152 end = MIN2(end, end_addr); 153 assert(start <= end, "Consistency check"); 154 MemRegion mr(start, end); 155 if (!mr.is_empty()) { 156 clearRange(mr); 157 } 158 return mr; 159 } 160 161 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 162 _base(NULL), _cm(cm) 163 #ifdef ASSERT 164 , _drain_in_progress(false) 165 , _drain_in_progress_yields(false) 166 #endif 167 {} 168 169 void CMMarkStack::allocate(size_t size) { 170 _base = NEW_C_HEAP_ARRAY(oop, size, mtGC); 171 if (_base == NULL) { 172 vm_exit_during_initialization("Failed to allocate CM region mark stack"); 173 } 174 _index = 0; 175 _capacity = (jint) size; 176 _saved_index = -1; 177 NOT_PRODUCT(_max_depth = 0); 178 } 179 180 CMMarkStack::~CMMarkStack() { 181 if (_base != NULL) { 182 FREE_C_HEAP_ARRAY(oop, _base, mtGC); 183 } 184 } 185 186 void CMMarkStack::par_push(oop ptr) { 187 while (true) { 188 if (isFull()) { 189 _overflow = true; 190 return; 191 } 192 // Otherwise... 193 jint index = _index; 194 jint next_index = index+1; 195 jint res = Atomic::cmpxchg(next_index, &_index, index); 196 if (res == index) { 197 _base[index] = ptr; 198 // Note that we don't maintain this atomically. We could, but it 199 // doesn't seem necessary. 200 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 201 return; 202 } 203 // Otherwise, we need to try again. 204 } 205 } 206 207 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 208 while (true) { 209 if (isFull()) { 210 _overflow = true; 211 return; 212 } 213 // Otherwise... 214 jint index = _index; 215 jint next_index = index + n; 216 if (next_index > _capacity) { 217 _overflow = true; 218 return; 219 } 220 jint res = Atomic::cmpxchg(next_index, &_index, index); 221 if (res == index) { 222 for (int i = 0; i < n; i++) { 223 int ind = index + i; 224 assert(ind < _capacity, "By overflow test above."); 225 _base[ind] = ptr_arr[i]; 226 } 227 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 228 return; 229 } 230 // Otherwise, we need to try again. 231 } 232 } 233 234 235 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 236 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 237 jint start = _index; 238 jint next_index = start + n; 239 if (next_index > _capacity) { 240 _overflow = true; 241 return; 242 } 243 // Otherwise. 244 _index = next_index; 245 for (int i = 0; i < n; i++) { 246 int ind = start + i; 247 assert(ind < _capacity, "By overflow test above."); 248 _base[ind] = ptr_arr[i]; 249 } 250 } 251 252 253 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 254 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 255 jint index = _index; 256 if (index == 0) { 257 *n = 0; 258 return false; 259 } else { 260 int k = MIN2(max, index); 261 jint new_ind = index - k; 262 for (int j = 0; j < k; j++) { 263 ptr_arr[j] = _base[new_ind + j]; 264 } 265 _index = new_ind; 266 *n = k; 267 return true; 268 } 269 } 270 271 template<class OopClosureClass> 272 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 273 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 274 || SafepointSynchronize::is_at_safepoint(), 275 "Drain recursion must be yield-safe."); 276 bool res = true; 277 debug_only(_drain_in_progress = true); 278 debug_only(_drain_in_progress_yields = yield_after); 279 while (!isEmpty()) { 280 oop newOop = pop(); 281 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 282 assert(newOop->is_oop(), "Expected an oop"); 283 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 284 "only grey objects on this stack"); 285 newOop->oop_iterate(cl); 286 if (yield_after && _cm->do_yield_check()) { 287 res = false; 288 break; 289 } 290 } 291 debug_only(_drain_in_progress = false); 292 return res; 293 } 294 295 void CMMarkStack::note_start_of_gc() { 296 assert(_saved_index == -1, 297 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 298 _saved_index = _index; 299 } 300 301 void CMMarkStack::note_end_of_gc() { 302 // This is intentionally a guarantee, instead of an assert. If we 303 // accidentally add something to the mark stack during GC, it 304 // will be a correctness issue so it's better if we crash. we'll 305 // only check this once per GC anyway, so it won't be a performance 306 // issue in any way. 307 guarantee(_saved_index == _index, 308 err_msg("saved index: %d index: %d", _saved_index, _index)); 309 _saved_index = -1; 310 } 311 312 void CMMarkStack::oops_do(OopClosure* f) { 313 assert(_saved_index == _index, 314 err_msg("saved index: %d index: %d", _saved_index, _index)); 315 for (int i = 0; i < _index; i += 1) { 316 f->do_oop(&_base[i]); 317 } 318 } 319 320 bool ConcurrentMark::not_yet_marked(oop obj) const { 321 return (_g1h->is_obj_ill(obj) 322 || (_g1h->is_in_permanent(obj) 323 && !nextMarkBitMap()->isMarked((HeapWord*)obj))); 324 } 325 326 CMRootRegions::CMRootRegions() : 327 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 328 _should_abort(false), _next_survivor(NULL) { } 329 330 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 331 _young_list = g1h->young_list(); 332 _cm = cm; 333 } 334 335 void CMRootRegions::prepare_for_scan() { 336 assert(!scan_in_progress(), "pre-condition"); 337 338 // Currently, only survivors can be root regions. 339 assert(_next_survivor == NULL, "pre-condition"); 340 _next_survivor = _young_list->first_survivor_region(); 341 _scan_in_progress = (_next_survivor != NULL); 342 _should_abort = false; 343 } 344 345 HeapRegion* CMRootRegions::claim_next() { 346 if (_should_abort) { 347 // If someone has set the should_abort flag, we return NULL to 348 // force the caller to bail out of their loop. 349 return NULL; 350 } 351 352 // Currently, only survivors can be root regions. 353 HeapRegion* res = _next_survivor; 354 if (res != NULL) { 355 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 356 // Read it again in case it changed while we were waiting for the lock. 357 res = _next_survivor; 358 if (res != NULL) { 359 if (res == _young_list->last_survivor_region()) { 360 // We just claimed the last survivor so store NULL to indicate 361 // that we're done. 362 _next_survivor = NULL; 363 } else { 364 _next_survivor = res->get_next_young_region(); 365 } 366 } else { 367 // Someone else claimed the last survivor while we were trying 368 // to take the lock so nothing else to do. 369 } 370 } 371 assert(res == NULL || res->is_survivor(), "post-condition"); 372 373 return res; 374 } 375 376 void CMRootRegions::scan_finished() { 377 assert(scan_in_progress(), "pre-condition"); 378 379 // Currently, only survivors can be root regions. 380 if (!_should_abort) { 381 assert(_next_survivor == NULL, "we should have claimed all survivors"); 382 } 383 _next_survivor = NULL; 384 385 { 386 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 387 _scan_in_progress = false; 388 RootRegionScan_lock->notify_all(); 389 } 390 } 391 392 bool CMRootRegions::wait_until_scan_finished() { 393 if (!scan_in_progress()) return false; 394 395 { 396 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 397 while (scan_in_progress()) { 398 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 399 } 400 } 401 return true; 402 } 403 404 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 405 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 406 #endif // _MSC_VER 407 408 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 409 return MAX2((n_par_threads + 2) / 4, 1U); 410 } 411 412 ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) : 413 _markBitMap1(rs, MinObjAlignment - 1), 414 _markBitMap2(rs, MinObjAlignment - 1), 415 416 _parallel_marking_threads(0), 417 _max_parallel_marking_threads(0), 418 _sleep_factor(0.0), 419 _marking_task_overhead(1.0), 420 _cleanup_sleep_factor(0.0), 421 _cleanup_task_overhead(1.0), 422 _cleanup_list("Cleanup List"), 423 _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/), 424 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 425 CardTableModRefBS::card_shift, 426 false /* in_resource_area*/), 427 428 _prevMarkBitMap(&_markBitMap1), 429 _nextMarkBitMap(&_markBitMap2), 430 431 _markStack(this), 432 // _finger set in set_non_marking_state 433 434 _max_task_num(MAX2((uint)ParallelGCThreads, 1U)), 435 // _active_tasks set in set_non_marking_state 436 // _tasks set inside the constructor 437 _task_queues(new CMTaskQueueSet((int) _max_task_num)), 438 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 439 440 _has_overflown(false), 441 _concurrent(false), 442 _has_aborted(false), 443 _restart_for_overflow(false), 444 _concurrent_marking_in_progress(false), 445 446 // _verbose_level set below 447 448 _init_times(), 449 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 450 _cleanup_times(), 451 _total_counting_time(0.0), 452 _total_rs_scrub_time(0.0), 453 454 _parallel_workers(NULL), 455 456 _count_card_bitmaps(NULL), 457 _count_marked_bytes(NULL) { 458 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 459 if (verbose_level < no_verbose) { 460 verbose_level = no_verbose; 461 } 462 if (verbose_level > high_verbose) { 463 verbose_level = high_verbose; 464 } 465 _verbose_level = verbose_level; 466 467 if (verbose_low()) { 468 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 469 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 470 } 471 472 _markStack.allocate(MarkStackSize); 473 474 // Create & start a ConcurrentMark thread. 475 _cmThread = new ConcurrentMarkThread(this); 476 assert(cmThread() != NULL, "CM Thread should have been created"); 477 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 478 479 _g1h = G1CollectedHeap::heap(); 480 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 481 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 482 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 483 484 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 485 satb_qs.set_buffer_size(G1SATBBufferSize); 486 487 _root_regions.init(_g1h, this); 488 489 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num, mtGC); 490 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num, mtGC); 491 492 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_task_num, mtGC); 493 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num, mtGC); 494 495 BitMap::idx_t card_bm_size = _card_bm.size(); 496 497 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 498 _active_tasks = _max_task_num; 499 for (int i = 0; i < (int) _max_task_num; ++i) { 500 CMTaskQueue* task_queue = new CMTaskQueue(); 501 task_queue->initialize(); 502 _task_queues->register_queue(i, task_queue); 503 504 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 505 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC); 506 507 _tasks[i] = new CMTask(i, this, 508 _count_marked_bytes[i], 509 &_count_card_bitmaps[i], 510 task_queue, _task_queues); 511 512 _accum_task_vtime[i] = 0.0; 513 } 514 515 // Calculate the card number for the bottom of the heap. Used 516 // in biasing indexes into the accounting card bitmaps. 517 _heap_bottom_card_num = 518 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 519 CardTableModRefBS::card_shift); 520 521 // Clear all the liveness counting data 522 clear_all_count_data(); 523 524 if (ConcGCThreads > ParallelGCThreads) { 525 vm_exit_during_initialization("Can't have more ConcGCThreads " 526 "than ParallelGCThreads."); 527 } 528 if (ParallelGCThreads == 0) { 529 // if we are not running with any parallel GC threads we will not 530 // spawn any marking threads either 531 _parallel_marking_threads = 0; 532 _max_parallel_marking_threads = 0; 533 _sleep_factor = 0.0; 534 _marking_task_overhead = 1.0; 535 } else { 536 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 537 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 538 // if both are set 539 _sleep_factor = 0.0; 540 _marking_task_overhead = 1.0; 541 } else if (G1MarkingOverheadPercent > 0) { 542 // We will calculate the number of parallel marking threads based 543 // on a target overhead with respect to the soft real-time goal 544 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 545 double overall_cm_overhead = 546 (double) MaxGCPauseMillis * marking_overhead / 547 (double) GCPauseIntervalMillis; 548 double cpu_ratio = 1.0 / (double) os::processor_count(); 549 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 550 double marking_task_overhead = 551 overall_cm_overhead / marking_thread_num * 552 (double) os::processor_count(); 553 double sleep_factor = 554 (1.0 - marking_task_overhead) / marking_task_overhead; 555 556 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num); 557 _sleep_factor = sleep_factor; 558 _marking_task_overhead = marking_task_overhead; 559 } else { 560 // Calculate the number of parallel marking threads by scaling 561 // the number of parallel GC threads. 562 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads); 563 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num); 564 _sleep_factor = 0.0; 565 _marking_task_overhead = 1.0; 566 } 567 568 assert(ConcGCThreads > 0, "Should have been set"); 569 _parallel_marking_threads = (uint) ConcGCThreads; 570 _max_parallel_marking_threads = _parallel_marking_threads; 571 572 if (parallel_marking_threads() > 1) { 573 _cleanup_task_overhead = 1.0; 574 } else { 575 _cleanup_task_overhead = marking_task_overhead(); 576 } 577 _cleanup_sleep_factor = 578 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 579 580 #if 0 581 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 582 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 583 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 584 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 585 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 586 #endif 587 588 guarantee(parallel_marking_threads() > 0, "peace of mind"); 589 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 590 _max_parallel_marking_threads, false, true); 591 if (_parallel_workers == NULL) { 592 vm_exit_during_initialization("Failed necessary allocation."); 593 } else { 594 _parallel_workers->initialize_workers(); 595 } 596 } 597 598 // so that the call below can read a sensible value 599 _heap_start = (HeapWord*) rs.base(); 600 set_non_marking_state(); 601 } 602 603 void ConcurrentMark::update_g1_committed(bool force) { 604 // If concurrent marking is not in progress, then we do not need to 605 // update _heap_end. 606 if (!concurrent_marking_in_progress() && !force) return; 607 608 MemRegion committed = _g1h->g1_committed(); 609 assert(committed.start() == _heap_start, "start shouldn't change"); 610 HeapWord* new_end = committed.end(); 611 if (new_end > _heap_end) { 612 // The heap has been expanded. 613 614 _heap_end = new_end; 615 } 616 // Notice that the heap can also shrink. However, this only happens 617 // during a Full GC (at least currently) and the entire marking 618 // phase will bail out and the task will not be restarted. So, let's 619 // do nothing. 620 } 621 622 void ConcurrentMark::reset() { 623 // Starting values for these two. This should be called in a STW 624 // phase. CM will be notified of any future g1_committed expansions 625 // will be at the end of evacuation pauses, when tasks are 626 // inactive. 627 MemRegion committed = _g1h->g1_committed(); 628 _heap_start = committed.start(); 629 _heap_end = committed.end(); 630 631 // Separated the asserts so that we know which one fires. 632 assert(_heap_start != NULL, "heap bounds should look ok"); 633 assert(_heap_end != NULL, "heap bounds should look ok"); 634 assert(_heap_start < _heap_end, "heap bounds should look ok"); 635 636 // Reset all the marking data structures and any necessary flags 637 reset_marking_state(); 638 639 if (verbose_low()) { 640 gclog_or_tty->print_cr("[global] resetting"); 641 } 642 643 // We do reset all of them, since different phases will use 644 // different number of active threads. So, it's easiest to have all 645 // of them ready. 646 for (int i = 0; i < (int) _max_task_num; ++i) { 647 _tasks[i]->reset(_nextMarkBitMap); 648 } 649 650 // we need this to make sure that the flag is on during the evac 651 // pause with initial mark piggy-backed 652 set_concurrent_marking_in_progress(); 653 } 654 655 656 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 657 _markStack.setEmpty(); 658 _markStack.clear_overflow(); 659 if (clear_overflow) { 660 clear_has_overflown(); 661 } else { 662 assert(has_overflown(), "pre-condition"); 663 } 664 _finger = _heap_start; 665 666 for (uint i = 0; i < _max_task_num; ++i) { 667 CMTaskQueue* queue = _task_queues->queue(i); 668 queue->set_empty(); 669 } 670 } 671 672 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) { 673 assert(active_tasks <= _max_task_num, "we should not have more"); 674 675 _active_tasks = active_tasks; 676 // Need to update the three data structures below according to the 677 // number of active threads for this phase. 678 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 679 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 680 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 681 682 _concurrent = concurrent; 683 // We propagate this to all tasks, not just the active ones. 684 for (int i = 0; i < (int) _max_task_num; ++i) 685 _tasks[i]->set_concurrent(concurrent); 686 687 if (concurrent) { 688 set_concurrent_marking_in_progress(); 689 } else { 690 // We currently assume that the concurrent flag has been set to 691 // false before we start remark. At this point we should also be 692 // in a STW phase. 693 assert(!concurrent_marking_in_progress(), "invariant"); 694 assert(_finger == _heap_end, "only way to get here"); 695 update_g1_committed(true); 696 } 697 } 698 699 void ConcurrentMark::set_non_marking_state() { 700 // We set the global marking state to some default values when we're 701 // not doing marking. 702 reset_marking_state(); 703 _active_tasks = 0; 704 clear_concurrent_marking_in_progress(); 705 } 706 707 ConcurrentMark::~ConcurrentMark() { 708 // The ConcurrentMark instance is never freed. 709 ShouldNotReachHere(); 710 } 711 712 void ConcurrentMark::clearNextBitmap() { 713 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 714 G1CollectorPolicy* g1p = g1h->g1_policy(); 715 716 // Make sure that the concurrent mark thread looks to still be in 717 // the current cycle. 718 guarantee(cmThread()->during_cycle(), "invariant"); 719 720 // We are finishing up the current cycle by clearing the next 721 // marking bitmap and getting it ready for the next cycle. During 722 // this time no other cycle can start. So, let's make sure that this 723 // is the case. 724 guarantee(!g1h->mark_in_progress(), "invariant"); 725 726 // clear the mark bitmap (no grey objects to start with). 727 // We need to do this in chunks and offer to yield in between 728 // each chunk. 729 HeapWord* start = _nextMarkBitMap->startWord(); 730 HeapWord* end = _nextMarkBitMap->endWord(); 731 HeapWord* cur = start; 732 size_t chunkSize = M; 733 while (cur < end) { 734 HeapWord* next = cur + chunkSize; 735 if (next > end) { 736 next = end; 737 } 738 MemRegion mr(cur,next); 739 _nextMarkBitMap->clearRange(mr); 740 cur = next; 741 do_yield_check(); 742 743 // Repeat the asserts from above. We'll do them as asserts here to 744 // minimize their overhead on the product. However, we'll have 745 // them as guarantees at the beginning / end of the bitmap 746 // clearing to get some checking in the product. 747 assert(cmThread()->during_cycle(), "invariant"); 748 assert(!g1h->mark_in_progress(), "invariant"); 749 } 750 751 // Clear the liveness counting data 752 clear_all_count_data(); 753 754 // Repeat the asserts from above. 755 guarantee(cmThread()->during_cycle(), "invariant"); 756 guarantee(!g1h->mark_in_progress(), "invariant"); 757 } 758 759 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 760 public: 761 bool doHeapRegion(HeapRegion* r) { 762 if (!r->continuesHumongous()) { 763 r->note_start_of_marking(); 764 } 765 return false; 766 } 767 }; 768 769 void ConcurrentMark::checkpointRootsInitialPre() { 770 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 771 G1CollectorPolicy* g1p = g1h->g1_policy(); 772 773 _has_aborted = false; 774 775 #ifndef PRODUCT 776 if (G1PrintReachableAtInitialMark) { 777 print_reachable("at-cycle-start", 778 VerifyOption_G1UsePrevMarking, true /* all */); 779 } 780 #endif 781 782 // Initialise marking structures. This has to be done in a STW phase. 783 reset(); 784 785 // For each region note start of marking. 786 NoteStartOfMarkHRClosure startcl; 787 g1h->heap_region_iterate(&startcl); 788 } 789 790 791 void ConcurrentMark::checkpointRootsInitialPost() { 792 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 793 794 // If we force an overflow during remark, the remark operation will 795 // actually abort and we'll restart concurrent marking. If we always 796 // force an oveflow during remark we'll never actually complete the 797 // marking phase. So, we initilize this here, at the start of the 798 // cycle, so that at the remaining overflow number will decrease at 799 // every remark and we'll eventually not need to cause one. 800 force_overflow_stw()->init(); 801 802 // Start Concurrent Marking weak-reference discovery. 803 ReferenceProcessor* rp = g1h->ref_processor_cm(); 804 // enable ("weak") refs discovery 805 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 806 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 807 808 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 809 // This is the start of the marking cycle, we're expected all 810 // threads to have SATB queues with active set to false. 811 satb_mq_set.set_active_all_threads(true, /* new active value */ 812 false /* expected_active */); 813 814 _root_regions.prepare_for_scan(); 815 816 // update_g1_committed() will be called at the end of an evac pause 817 // when marking is on. So, it's also called at the end of the 818 // initial-mark pause to update the heap end, if the heap expands 819 // during it. No need to call it here. 820 } 821 822 /* 823 * Notice that in the next two methods, we actually leave the STS 824 * during the barrier sync and join it immediately afterwards. If we 825 * do not do this, the following deadlock can occur: one thread could 826 * be in the barrier sync code, waiting for the other thread to also 827 * sync up, whereas another one could be trying to yield, while also 828 * waiting for the other threads to sync up too. 829 * 830 * Note, however, that this code is also used during remark and in 831 * this case we should not attempt to leave / enter the STS, otherwise 832 * we'll either hit an asseert (debug / fastdebug) or deadlock 833 * (product). So we should only leave / enter the STS if we are 834 * operating concurrently. 835 * 836 * Because the thread that does the sync barrier has left the STS, it 837 * is possible to be suspended for a Full GC or an evacuation pause 838 * could occur. This is actually safe, since the entering the sync 839 * barrier is one of the last things do_marking_step() does, and it 840 * doesn't manipulate any data structures afterwards. 841 */ 842 843 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 844 if (verbose_low()) { 845 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 846 } 847 848 if (concurrent()) { 849 ConcurrentGCThread::stsLeave(); 850 } 851 _first_overflow_barrier_sync.enter(); 852 if (concurrent()) { 853 ConcurrentGCThread::stsJoin(); 854 } 855 // at this point everyone should have synced up and not be doing any 856 // more work 857 858 if (verbose_low()) { 859 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 860 } 861 862 // let task 0 do this 863 if (task_num == 0) { 864 // task 0 is responsible for clearing the global data structures 865 // We should be here because of an overflow. During STW we should 866 // not clear the overflow flag since we rely on it being true when 867 // we exit this method to abort the pause and restart concurent 868 // marking. 869 reset_marking_state(concurrent() /* clear_overflow */); 870 force_overflow()->update(); 871 872 if (G1Log::fine()) { 873 gclog_or_tty->date_stamp(PrintGCDateStamps); 874 gclog_or_tty->stamp(PrintGCTimeStamps); 875 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 876 } 877 } 878 879 // after this, each task should reset its own data structures then 880 // then go into the second barrier 881 } 882 883 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 884 if (verbose_low()) { 885 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 886 } 887 888 if (concurrent()) { 889 ConcurrentGCThread::stsLeave(); 890 } 891 _second_overflow_barrier_sync.enter(); 892 if (concurrent()) { 893 ConcurrentGCThread::stsJoin(); 894 } 895 // at this point everything should be re-initialised and ready to go 896 897 if (verbose_low()) { 898 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 899 } 900 } 901 902 #ifndef PRODUCT 903 void ForceOverflowSettings::init() { 904 _num_remaining = G1ConcMarkForceOverflow; 905 _force = false; 906 update(); 907 } 908 909 void ForceOverflowSettings::update() { 910 if (_num_remaining > 0) { 911 _num_remaining -= 1; 912 _force = true; 913 } else { 914 _force = false; 915 } 916 } 917 918 bool ForceOverflowSettings::should_force() { 919 if (_force) { 920 _force = false; 921 return true; 922 } else { 923 return false; 924 } 925 } 926 #endif // !PRODUCT 927 928 class CMConcurrentMarkingTask: public AbstractGangTask { 929 private: 930 ConcurrentMark* _cm; 931 ConcurrentMarkThread* _cmt; 932 933 public: 934 void work(uint worker_id) { 935 assert(Thread::current()->is_ConcurrentGC_thread(), 936 "this should only be done by a conc GC thread"); 937 ResourceMark rm; 938 939 double start_vtime = os::elapsedVTime(); 940 941 ConcurrentGCThread::stsJoin(); 942 943 assert(worker_id < _cm->active_tasks(), "invariant"); 944 CMTask* the_task = _cm->task(worker_id); 945 the_task->record_start_time(); 946 if (!_cm->has_aborted()) { 947 do { 948 double start_vtime_sec = os::elapsedVTime(); 949 double start_time_sec = os::elapsedTime(); 950 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 951 952 the_task->do_marking_step(mark_step_duration_ms, 953 true /* do_termination */, 954 false /* is_serial*/); 955 956 double end_time_sec = os::elapsedTime(); 957 double end_vtime_sec = os::elapsedVTime(); 958 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 959 double elapsed_time_sec = end_time_sec - start_time_sec; 960 _cm->clear_has_overflown(); 961 962 bool ret = _cm->do_yield_check(worker_id); 963 964 jlong sleep_time_ms; 965 if (!_cm->has_aborted() && the_task->has_aborted()) { 966 sleep_time_ms = 967 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 968 ConcurrentGCThread::stsLeave(); 969 os::sleep(Thread::current(), sleep_time_ms, false); 970 ConcurrentGCThread::stsJoin(); 971 } 972 double end_time2_sec = os::elapsedTime(); 973 double elapsed_time2_sec = end_time2_sec - start_time_sec; 974 975 #if 0 976 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 977 "overhead %1.4lf", 978 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 979 the_task->conc_overhead(os::elapsedTime()) * 8.0); 980 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 981 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 982 #endif 983 } while (!_cm->has_aborted() && the_task->has_aborted()); 984 } 985 the_task->record_end_time(); 986 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 987 988 ConcurrentGCThread::stsLeave(); 989 990 double end_vtime = os::elapsedVTime(); 991 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 992 } 993 994 CMConcurrentMarkingTask(ConcurrentMark* cm, 995 ConcurrentMarkThread* cmt) : 996 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 997 998 ~CMConcurrentMarkingTask() { } 999 }; 1000 1001 // Calculates the number of active workers for a concurrent 1002 // phase. 1003 uint ConcurrentMark::calc_parallel_marking_threads() { 1004 if (G1CollectedHeap::use_parallel_gc_threads()) { 1005 uint n_conc_workers = 0; 1006 if (!UseDynamicNumberOfGCThreads || 1007 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1008 !ForceDynamicNumberOfGCThreads)) { 1009 n_conc_workers = max_parallel_marking_threads(); 1010 } else { 1011 n_conc_workers = 1012 AdaptiveSizePolicy::calc_default_active_workers( 1013 max_parallel_marking_threads(), 1014 1, /* Minimum workers */ 1015 parallel_marking_threads(), 1016 Threads::number_of_non_daemon_threads()); 1017 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1018 // that scaling has already gone into "_max_parallel_marking_threads". 1019 } 1020 assert(n_conc_workers > 0, "Always need at least 1"); 1021 return n_conc_workers; 1022 } 1023 // If we are not running with any parallel GC threads we will not 1024 // have spawned any marking threads either. Hence the number of 1025 // concurrent workers should be 0. 1026 return 0; 1027 } 1028 1029 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1030 // Currently, only survivors can be root regions. 1031 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1032 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1033 1034 const uintx interval = PrefetchScanIntervalInBytes; 1035 HeapWord* curr = hr->bottom(); 1036 const HeapWord* end = hr->top(); 1037 while (curr < end) { 1038 Prefetch::read(curr, interval); 1039 oop obj = oop(curr); 1040 int size = obj->oop_iterate(&cl); 1041 assert(size == obj->size(), "sanity"); 1042 curr += size; 1043 } 1044 } 1045 1046 class CMRootRegionScanTask : public AbstractGangTask { 1047 private: 1048 ConcurrentMark* _cm; 1049 1050 public: 1051 CMRootRegionScanTask(ConcurrentMark* cm) : 1052 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1053 1054 void work(uint worker_id) { 1055 assert(Thread::current()->is_ConcurrentGC_thread(), 1056 "this should only be done by a conc GC thread"); 1057 1058 CMRootRegions* root_regions = _cm->root_regions(); 1059 HeapRegion* hr = root_regions->claim_next(); 1060 while (hr != NULL) { 1061 _cm->scanRootRegion(hr, worker_id); 1062 hr = root_regions->claim_next(); 1063 } 1064 } 1065 }; 1066 1067 void ConcurrentMark::scanRootRegions() { 1068 // scan_in_progress() will have been set to true only if there was 1069 // at least one root region to scan. So, if it's false, we 1070 // should not attempt to do any further work. 1071 if (root_regions()->scan_in_progress()) { 1072 _parallel_marking_threads = calc_parallel_marking_threads(); 1073 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1074 "Maximum number of marking threads exceeded"); 1075 uint active_workers = MAX2(1U, parallel_marking_threads()); 1076 1077 CMRootRegionScanTask task(this); 1078 if (use_parallel_marking_threads()) { 1079 _parallel_workers->set_active_workers((int) active_workers); 1080 _parallel_workers->run_task(&task); 1081 } else { 1082 task.work(0); 1083 } 1084 1085 // It's possible that has_aborted() is true here without actually 1086 // aborting the survivor scan earlier. This is OK as it's 1087 // mainly used for sanity checking. 1088 root_regions()->scan_finished(); 1089 } 1090 } 1091 1092 void ConcurrentMark::markFromRoots() { 1093 // we might be tempted to assert that: 1094 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1095 // "inconsistent argument?"); 1096 // However that wouldn't be right, because it's possible that 1097 // a safepoint is indeed in progress as a younger generation 1098 // stop-the-world GC happens even as we mark in this generation. 1099 1100 _restart_for_overflow = false; 1101 force_overflow_conc()->init(); 1102 1103 // _g1h has _n_par_threads 1104 _parallel_marking_threads = calc_parallel_marking_threads(); 1105 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1106 "Maximum number of marking threads exceeded"); 1107 1108 uint active_workers = MAX2(1U, parallel_marking_threads()); 1109 1110 // Parallel task terminator is set in "set_phase()" 1111 set_phase(active_workers, true /* concurrent */); 1112 1113 CMConcurrentMarkingTask markingTask(this, cmThread()); 1114 if (use_parallel_marking_threads()) { 1115 _parallel_workers->set_active_workers((int)active_workers); 1116 // Don't set _n_par_threads because it affects MT in proceess_strong_roots() 1117 // and the decisions on that MT processing is made elsewhere. 1118 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1119 _parallel_workers->run_task(&markingTask); 1120 } else { 1121 markingTask.work(0); 1122 } 1123 print_stats(); 1124 } 1125 1126 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1127 // world is stopped at this checkpoint 1128 assert(SafepointSynchronize::is_at_safepoint(), 1129 "world should be stopped"); 1130 1131 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1132 1133 // If a full collection has happened, we shouldn't do this. 1134 if (has_aborted()) { 1135 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1136 return; 1137 } 1138 1139 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1140 1141 if (VerifyDuringGC) { 1142 HandleMark hm; // handle scope 1143 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1144 Universe::heap()->prepare_for_verify(); 1145 Universe::verify(/* silent */ false, 1146 /* option */ VerifyOption_G1UsePrevMarking); 1147 } 1148 1149 G1CollectorPolicy* g1p = g1h->g1_policy(); 1150 g1p->record_concurrent_mark_remark_start(); 1151 1152 double start = os::elapsedTime(); 1153 1154 checkpointRootsFinalWork(); 1155 1156 double mark_work_end = os::elapsedTime(); 1157 1158 weakRefsWork(clear_all_soft_refs); 1159 1160 if (has_overflown()) { 1161 // Oops. We overflowed. Restart concurrent marking. 1162 _restart_for_overflow = true; 1163 // Clear the marking state because we will be restarting 1164 // marking due to overflowing the global mark stack. 1165 reset_marking_state(); 1166 if (G1TraceMarkStackOverflow) { 1167 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1168 } 1169 } else { 1170 // Aggregate the per-task counting data that we have accumulated 1171 // while marking. 1172 aggregate_count_data(); 1173 1174 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1175 // We're done with marking. 1176 // This is the end of the marking cycle, we're expected all 1177 // threads to have SATB queues with active set to true. 1178 satb_mq_set.set_active_all_threads(false, /* new active value */ 1179 true /* expected_active */); 1180 1181 if (VerifyDuringGC) { 1182 HandleMark hm; // handle scope 1183 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1184 Universe::heap()->prepare_for_verify(); 1185 Universe::verify(/* silent */ false, 1186 /* option */ VerifyOption_G1UseNextMarking); 1187 } 1188 assert(!restart_for_overflow(), "sanity"); 1189 // Completely reset the marking state since marking completed 1190 set_non_marking_state(); 1191 } 1192 1193 #if VERIFY_OBJS_PROCESSED 1194 _scan_obj_cl.objs_processed = 0; 1195 ThreadLocalObjQueue::objs_enqueued = 0; 1196 #endif 1197 1198 // Statistics 1199 double now = os::elapsedTime(); 1200 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1201 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1202 _remark_times.add((now - start) * 1000.0); 1203 1204 g1p->record_concurrent_mark_remark_end(); 1205 1206 G1CMIsAliveClosure is_alive(g1h); 1207 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1208 } 1209 1210 // Base class of the closures that finalize and verify the 1211 // liveness counting data. 1212 class CMCountDataClosureBase: public HeapRegionClosure { 1213 protected: 1214 G1CollectedHeap* _g1h; 1215 ConcurrentMark* _cm; 1216 CardTableModRefBS* _ct_bs; 1217 1218 BitMap* _region_bm; 1219 BitMap* _card_bm; 1220 1221 // Takes a region that's not empty (i.e., it has at least one 1222 // live object in it and sets its corresponding bit on the region 1223 // bitmap to 1. If the region is "starts humongous" it will also set 1224 // to 1 the bits on the region bitmap that correspond to its 1225 // associated "continues humongous" regions. 1226 void set_bit_for_region(HeapRegion* hr) { 1227 assert(!hr->continuesHumongous(), "should have filtered those out"); 1228 1229 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1230 if (!hr->startsHumongous()) { 1231 // Normal (non-humongous) case: just set the bit. 1232 _region_bm->par_at_put(index, true); 1233 } else { 1234 // Starts humongous case: calculate how many regions are part of 1235 // this humongous region and then set the bit range. 1236 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1237 _region_bm->par_at_put_range(index, end_index, true); 1238 } 1239 } 1240 1241 public: 1242 CMCountDataClosureBase(G1CollectedHeap* g1h, 1243 BitMap* region_bm, BitMap* card_bm): 1244 _g1h(g1h), _cm(g1h->concurrent_mark()), 1245 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 1246 _region_bm(region_bm), _card_bm(card_bm) { } 1247 }; 1248 1249 // Closure that calculates the # live objects per region. Used 1250 // for verification purposes during the cleanup pause. 1251 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1252 CMBitMapRO* _bm; 1253 size_t _region_marked_bytes; 1254 1255 public: 1256 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1257 BitMap* region_bm, BitMap* card_bm) : 1258 CMCountDataClosureBase(g1h, region_bm, card_bm), 1259 _bm(bm), _region_marked_bytes(0) { } 1260 1261 bool doHeapRegion(HeapRegion* hr) { 1262 1263 if (hr->continuesHumongous()) { 1264 // We will ignore these here and process them when their 1265 // associated "starts humongous" region is processed (see 1266 // set_bit_for_heap_region()). Note that we cannot rely on their 1267 // associated "starts humongous" region to have their bit set to 1268 // 1 since, due to the region chunking in the parallel region 1269 // iteration, a "continues humongous" region might be visited 1270 // before its associated "starts humongous". 1271 return false; 1272 } 1273 1274 HeapWord* ntams = hr->next_top_at_mark_start(); 1275 HeapWord* start = hr->bottom(); 1276 1277 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1278 err_msg("Preconditions not met - " 1279 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT, 1280 start, ntams, hr->end())); 1281 1282 // Find the first marked object at or after "start". 1283 start = _bm->getNextMarkedWordAddress(start, ntams); 1284 1285 size_t marked_bytes = 0; 1286 1287 while (start < ntams) { 1288 oop obj = oop(start); 1289 int obj_sz = obj->size(); 1290 HeapWord* obj_end = start + obj_sz; 1291 1292 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1293 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1294 1295 // Note: if we're looking at the last region in heap - obj_end 1296 // could be actually just beyond the end of the heap; end_idx 1297 // will then correspond to a (non-existent) card that is also 1298 // just beyond the heap. 1299 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1300 // end of object is not card aligned - increment to cover 1301 // all the cards spanned by the object 1302 end_idx += 1; 1303 } 1304 1305 // Set the bits in the card BM for the cards spanned by this object. 1306 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1307 1308 // Add the size of this object to the number of marked bytes. 1309 marked_bytes += (size_t)obj_sz * HeapWordSize; 1310 1311 // Find the next marked object after this one. 1312 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1313 } 1314 1315 // Mark the allocated-since-marking portion... 1316 HeapWord* top = hr->top(); 1317 if (ntams < top) { 1318 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1319 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1320 1321 // Note: if we're looking at the last region in heap - top 1322 // could be actually just beyond the end of the heap; end_idx 1323 // will then correspond to a (non-existent) card that is also 1324 // just beyond the heap. 1325 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1326 // end of object is not card aligned - increment to cover 1327 // all the cards spanned by the object 1328 end_idx += 1; 1329 } 1330 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1331 1332 // This definitely means the region has live objects. 1333 set_bit_for_region(hr); 1334 } 1335 1336 // Update the live region bitmap. 1337 if (marked_bytes > 0) { 1338 set_bit_for_region(hr); 1339 } 1340 1341 // Set the marked bytes for the current region so that 1342 // it can be queried by a calling verificiation routine 1343 _region_marked_bytes = marked_bytes; 1344 1345 return false; 1346 } 1347 1348 size_t region_marked_bytes() const { return _region_marked_bytes; } 1349 }; 1350 1351 // Heap region closure used for verifying the counting data 1352 // that was accumulated concurrently and aggregated during 1353 // the remark pause. This closure is applied to the heap 1354 // regions during the STW cleanup pause. 1355 1356 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1357 G1CollectedHeap* _g1h; 1358 ConcurrentMark* _cm; 1359 CalcLiveObjectsClosure _calc_cl; 1360 BitMap* _region_bm; // Region BM to be verified 1361 BitMap* _card_bm; // Card BM to be verified 1362 bool _verbose; // verbose output? 1363 1364 BitMap* _exp_region_bm; // Expected Region BM values 1365 BitMap* _exp_card_bm; // Expected card BM values 1366 1367 int _failures; 1368 1369 public: 1370 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1371 BitMap* region_bm, 1372 BitMap* card_bm, 1373 BitMap* exp_region_bm, 1374 BitMap* exp_card_bm, 1375 bool verbose) : 1376 _g1h(g1h), _cm(g1h->concurrent_mark()), 1377 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1378 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1379 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1380 _failures(0) { } 1381 1382 int failures() const { return _failures; } 1383 1384 bool doHeapRegion(HeapRegion* hr) { 1385 if (hr->continuesHumongous()) { 1386 // We will ignore these here and process them when their 1387 // associated "starts humongous" region is processed (see 1388 // set_bit_for_heap_region()). Note that we cannot rely on their 1389 // associated "starts humongous" region to have their bit set to 1390 // 1 since, due to the region chunking in the parallel region 1391 // iteration, a "continues humongous" region might be visited 1392 // before its associated "starts humongous". 1393 return false; 1394 } 1395 1396 int failures = 0; 1397 1398 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1399 // this region and set the corresponding bits in the expected region 1400 // and card bitmaps. 1401 bool res = _calc_cl.doHeapRegion(hr); 1402 assert(res == false, "should be continuing"); 1403 1404 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1405 Mutex::_no_safepoint_check_flag); 1406 1407 // Verify the marked bytes for this region. 1408 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1409 size_t act_marked_bytes = hr->next_marked_bytes(); 1410 1411 // We're not OK if expected marked bytes > actual marked bytes. It means 1412 // we have missed accounting some objects during the actual marking. 1413 if (exp_marked_bytes > act_marked_bytes) { 1414 if (_verbose) { 1415 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1416 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1417 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1418 } 1419 failures += 1; 1420 } 1421 1422 // Verify the bit, for this region, in the actual and expected 1423 // (which was just calculated) region bit maps. 1424 // We're not OK if the bit in the calculated expected region 1425 // bitmap is set and the bit in the actual region bitmap is not. 1426 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1427 1428 bool expected = _exp_region_bm->at(index); 1429 bool actual = _region_bm->at(index); 1430 if (expected && !actual) { 1431 if (_verbose) { 1432 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1433 "expected: %s, actual: %s", 1434 hr->hrs_index(), 1435 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1436 } 1437 failures += 1; 1438 } 1439 1440 // Verify that the card bit maps for the cards spanned by the current 1441 // region match. We have an error if we have a set bit in the expected 1442 // bit map and the corresponding bit in the actual bitmap is not set. 1443 1444 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1445 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1446 1447 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1448 expected = _exp_card_bm->at(i); 1449 actual = _card_bm->at(i); 1450 1451 if (expected && !actual) { 1452 if (_verbose) { 1453 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1454 "expected: %s, actual: %s", 1455 hr->hrs_index(), i, 1456 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1457 } 1458 failures += 1; 1459 } 1460 } 1461 1462 if (failures > 0 && _verbose) { 1463 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1464 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1465 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(), 1466 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1467 } 1468 1469 _failures += failures; 1470 1471 // We could stop iteration over the heap when we 1472 // find the first violating region by returning true. 1473 return false; 1474 } 1475 }; 1476 1477 1478 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1479 protected: 1480 G1CollectedHeap* _g1h; 1481 ConcurrentMark* _cm; 1482 BitMap* _actual_region_bm; 1483 BitMap* _actual_card_bm; 1484 1485 uint _n_workers; 1486 1487 BitMap* _expected_region_bm; 1488 BitMap* _expected_card_bm; 1489 1490 int _failures; 1491 bool _verbose; 1492 1493 public: 1494 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1495 BitMap* region_bm, BitMap* card_bm, 1496 BitMap* expected_region_bm, BitMap* expected_card_bm) 1497 : AbstractGangTask("G1 verify final counting"), 1498 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1499 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1500 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1501 _failures(0), _verbose(false), 1502 _n_workers(0) { 1503 assert(VerifyDuringGC, "don't call this otherwise"); 1504 1505 // Use the value already set as the number of active threads 1506 // in the call to run_task(). 1507 if (G1CollectedHeap::use_parallel_gc_threads()) { 1508 assert( _g1h->workers()->active_workers() > 0, 1509 "Should have been previously set"); 1510 _n_workers = _g1h->workers()->active_workers(); 1511 } else { 1512 _n_workers = 1; 1513 } 1514 1515 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1516 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1517 1518 _verbose = _cm->verbose_medium(); 1519 } 1520 1521 void work(uint worker_id) { 1522 assert(worker_id < _n_workers, "invariant"); 1523 1524 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1525 _actual_region_bm, _actual_card_bm, 1526 _expected_region_bm, 1527 _expected_card_bm, 1528 _verbose); 1529 1530 if (G1CollectedHeap::use_parallel_gc_threads()) { 1531 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1532 worker_id, 1533 _n_workers, 1534 HeapRegion::VerifyCountClaimValue); 1535 } else { 1536 _g1h->heap_region_iterate(&verify_cl); 1537 } 1538 1539 Atomic::add(verify_cl.failures(), &_failures); 1540 } 1541 1542 int failures() const { return _failures; } 1543 }; 1544 1545 // Closure that finalizes the liveness counting data. 1546 // Used during the cleanup pause. 1547 // Sets the bits corresponding to the interval [NTAMS, top] 1548 // (which contains the implicitly live objects) in the 1549 // card liveness bitmap. Also sets the bit for each region, 1550 // containing live data, in the region liveness bitmap. 1551 1552 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1553 public: 1554 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1555 BitMap* region_bm, 1556 BitMap* card_bm) : 1557 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1558 1559 bool doHeapRegion(HeapRegion* hr) { 1560 1561 if (hr->continuesHumongous()) { 1562 // We will ignore these here and process them when their 1563 // associated "starts humongous" region is processed (see 1564 // set_bit_for_heap_region()). Note that we cannot rely on their 1565 // associated "starts humongous" region to have their bit set to 1566 // 1 since, due to the region chunking in the parallel region 1567 // iteration, a "continues humongous" region might be visited 1568 // before its associated "starts humongous". 1569 return false; 1570 } 1571 1572 HeapWord* ntams = hr->next_top_at_mark_start(); 1573 HeapWord* top = hr->top(); 1574 1575 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1576 1577 // Mark the allocated-since-marking portion... 1578 if (ntams < top) { 1579 // This definitely means the region has live objects. 1580 set_bit_for_region(hr); 1581 1582 // Now set the bits in the card bitmap for [ntams, top) 1583 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1584 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1585 1586 // Note: if we're looking at the last region in heap - top 1587 // could be actually just beyond the end of the heap; end_idx 1588 // will then correspond to a (non-existent) card that is also 1589 // just beyond the heap. 1590 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1591 // end of object is not card aligned - increment to cover 1592 // all the cards spanned by the object 1593 end_idx += 1; 1594 } 1595 1596 assert(end_idx <= _card_bm->size(), 1597 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1598 end_idx, _card_bm->size())); 1599 assert(start_idx < _card_bm->size(), 1600 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1601 start_idx, _card_bm->size())); 1602 1603 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1604 } 1605 1606 // Set the bit for the region if it contains live data 1607 if (hr->next_marked_bytes() > 0) { 1608 set_bit_for_region(hr); 1609 } 1610 1611 return false; 1612 } 1613 }; 1614 1615 class G1ParFinalCountTask: public AbstractGangTask { 1616 protected: 1617 G1CollectedHeap* _g1h; 1618 ConcurrentMark* _cm; 1619 BitMap* _actual_region_bm; 1620 BitMap* _actual_card_bm; 1621 1622 uint _n_workers; 1623 1624 public: 1625 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1626 : AbstractGangTask("G1 final counting"), 1627 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1628 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1629 _n_workers(0) { 1630 // Use the value already set as the number of active threads 1631 // in the call to run_task(). 1632 if (G1CollectedHeap::use_parallel_gc_threads()) { 1633 assert( _g1h->workers()->active_workers() > 0, 1634 "Should have been previously set"); 1635 _n_workers = _g1h->workers()->active_workers(); 1636 } else { 1637 _n_workers = 1; 1638 } 1639 } 1640 1641 void work(uint worker_id) { 1642 assert(worker_id < _n_workers, "invariant"); 1643 1644 FinalCountDataUpdateClosure final_update_cl(_g1h, 1645 _actual_region_bm, 1646 _actual_card_bm); 1647 1648 if (G1CollectedHeap::use_parallel_gc_threads()) { 1649 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1650 worker_id, 1651 _n_workers, 1652 HeapRegion::FinalCountClaimValue); 1653 } else { 1654 _g1h->heap_region_iterate(&final_update_cl); 1655 } 1656 } 1657 }; 1658 1659 class G1ParNoteEndTask; 1660 1661 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1662 G1CollectedHeap* _g1; 1663 int _worker_num; 1664 size_t _max_live_bytes; 1665 uint _regions_claimed; 1666 size_t _freed_bytes; 1667 FreeRegionList* _local_cleanup_list; 1668 OldRegionSet* _old_proxy_set; 1669 HumongousRegionSet* _humongous_proxy_set; 1670 HRRSCleanupTask* _hrrs_cleanup_task; 1671 double _claimed_region_time; 1672 double _max_region_time; 1673 1674 public: 1675 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1676 int worker_num, 1677 FreeRegionList* local_cleanup_list, 1678 OldRegionSet* old_proxy_set, 1679 HumongousRegionSet* humongous_proxy_set, 1680 HRRSCleanupTask* hrrs_cleanup_task) : 1681 _g1(g1), _worker_num(worker_num), 1682 _max_live_bytes(0), _regions_claimed(0), 1683 _freed_bytes(0), 1684 _claimed_region_time(0.0), _max_region_time(0.0), 1685 _local_cleanup_list(local_cleanup_list), 1686 _old_proxy_set(old_proxy_set), 1687 _humongous_proxy_set(humongous_proxy_set), 1688 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1689 1690 size_t freed_bytes() { return _freed_bytes; } 1691 1692 bool doHeapRegion(HeapRegion *hr) { 1693 if (hr->continuesHumongous()) { 1694 return false; 1695 } 1696 // We use a claim value of zero here because all regions 1697 // were claimed with value 1 in the FinalCount task. 1698 _g1->reset_gc_time_stamps(hr); 1699 double start = os::elapsedTime(); 1700 _regions_claimed++; 1701 hr->note_end_of_marking(); 1702 _max_live_bytes += hr->max_live_bytes(); 1703 _g1->free_region_if_empty(hr, 1704 &_freed_bytes, 1705 _local_cleanup_list, 1706 _old_proxy_set, 1707 _humongous_proxy_set, 1708 _hrrs_cleanup_task, 1709 true /* par */); 1710 double region_time = (os::elapsedTime() - start); 1711 _claimed_region_time += region_time; 1712 if (region_time > _max_region_time) { 1713 _max_region_time = region_time; 1714 } 1715 return false; 1716 } 1717 1718 size_t max_live_bytes() { return _max_live_bytes; } 1719 uint regions_claimed() { return _regions_claimed; } 1720 double claimed_region_time_sec() { return _claimed_region_time; } 1721 double max_region_time_sec() { return _max_region_time; } 1722 }; 1723 1724 class G1ParNoteEndTask: public AbstractGangTask { 1725 friend class G1NoteEndOfConcMarkClosure; 1726 1727 protected: 1728 G1CollectedHeap* _g1h; 1729 size_t _max_live_bytes; 1730 size_t _freed_bytes; 1731 FreeRegionList* _cleanup_list; 1732 1733 public: 1734 G1ParNoteEndTask(G1CollectedHeap* g1h, 1735 FreeRegionList* cleanup_list) : 1736 AbstractGangTask("G1 note end"), _g1h(g1h), 1737 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1738 1739 void work(uint worker_id) { 1740 double start = os::elapsedTime(); 1741 FreeRegionList local_cleanup_list("Local Cleanup List"); 1742 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set"); 1743 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1744 HRRSCleanupTask hrrs_cleanup_task; 1745 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list, 1746 &old_proxy_set, 1747 &humongous_proxy_set, 1748 &hrrs_cleanup_task); 1749 if (G1CollectedHeap::use_parallel_gc_threads()) { 1750 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1751 _g1h->workers()->active_workers(), 1752 HeapRegion::NoteEndClaimValue); 1753 } else { 1754 _g1h->heap_region_iterate(&g1_note_end); 1755 } 1756 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1757 1758 // Now update the lists 1759 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1760 NULL /* free_list */, 1761 &old_proxy_set, 1762 &humongous_proxy_set, 1763 true /* par */); 1764 { 1765 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1766 _max_live_bytes += g1_note_end.max_live_bytes(); 1767 _freed_bytes += g1_note_end.freed_bytes(); 1768 1769 // If we iterate over the global cleanup list at the end of 1770 // cleanup to do this printing we will not guarantee to only 1771 // generate output for the newly-reclaimed regions (the list 1772 // might not be empty at the beginning of cleanup; we might 1773 // still be working on its previous contents). So we do the 1774 // printing here, before we append the new regions to the global 1775 // cleanup list. 1776 1777 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1778 if (hr_printer->is_active()) { 1779 HeapRegionLinkedListIterator iter(&local_cleanup_list); 1780 while (iter.more_available()) { 1781 HeapRegion* hr = iter.get_next(); 1782 hr_printer->cleanup(hr); 1783 } 1784 } 1785 1786 _cleanup_list->add_as_tail(&local_cleanup_list); 1787 assert(local_cleanup_list.is_empty(), "post-condition"); 1788 1789 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1790 } 1791 } 1792 size_t max_live_bytes() { return _max_live_bytes; } 1793 size_t freed_bytes() { return _freed_bytes; } 1794 }; 1795 1796 class G1ParScrubRemSetTask: public AbstractGangTask { 1797 protected: 1798 G1RemSet* _g1rs; 1799 BitMap* _region_bm; 1800 BitMap* _card_bm; 1801 public: 1802 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1803 BitMap* region_bm, BitMap* card_bm) : 1804 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1805 _region_bm(region_bm), _card_bm(card_bm) { } 1806 1807 void work(uint worker_id) { 1808 if (G1CollectedHeap::use_parallel_gc_threads()) { 1809 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1810 HeapRegion::ScrubRemSetClaimValue); 1811 } else { 1812 _g1rs->scrub(_region_bm, _card_bm); 1813 } 1814 } 1815 1816 }; 1817 1818 void ConcurrentMark::cleanup() { 1819 // world is stopped at this checkpoint 1820 assert(SafepointSynchronize::is_at_safepoint(), 1821 "world should be stopped"); 1822 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1823 1824 // If a full collection has happened, we shouldn't do this. 1825 if (has_aborted()) { 1826 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1827 return; 1828 } 1829 1830 HRSPhaseSetter x(HRSPhaseCleanup); 1831 g1h->verify_region_sets_optional(); 1832 1833 if (VerifyDuringGC) { 1834 HandleMark hm; // handle scope 1835 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1836 Universe::heap()->prepare_for_verify(); 1837 Universe::verify(/* silent */ false, 1838 /* option */ VerifyOption_G1UsePrevMarking); 1839 } 1840 1841 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1842 g1p->record_concurrent_mark_cleanup_start(); 1843 1844 double start = os::elapsedTime(); 1845 1846 HeapRegionRemSet::reset_for_cleanup_tasks(); 1847 1848 uint n_workers; 1849 1850 // Do counting once more with the world stopped for good measure. 1851 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1852 1853 if (G1CollectedHeap::use_parallel_gc_threads()) { 1854 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 1855 "sanity check"); 1856 1857 g1h->set_par_threads(); 1858 n_workers = g1h->n_par_threads(); 1859 assert(g1h->n_par_threads() == n_workers, 1860 "Should not have been reset"); 1861 g1h->workers()->run_task(&g1_par_count_task); 1862 // Done with the parallel phase so reset to 0. 1863 g1h->set_par_threads(0); 1864 1865 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 1866 "sanity check"); 1867 } else { 1868 n_workers = 1; 1869 g1_par_count_task.work(0); 1870 } 1871 1872 if (VerifyDuringGC) { 1873 // Verify that the counting data accumulated during marking matches 1874 // that calculated by walking the marking bitmap. 1875 1876 // Bitmaps to hold expected values 1877 BitMap expected_region_bm(_region_bm.size(), false); 1878 BitMap expected_card_bm(_card_bm.size(), false); 1879 1880 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1881 &_region_bm, 1882 &_card_bm, 1883 &expected_region_bm, 1884 &expected_card_bm); 1885 1886 if (G1CollectedHeap::use_parallel_gc_threads()) { 1887 g1h->set_par_threads((int)n_workers); 1888 g1h->workers()->run_task(&g1_par_verify_task); 1889 // Done with the parallel phase so reset to 0. 1890 g1h->set_par_threads(0); 1891 1892 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 1893 "sanity check"); 1894 } else { 1895 g1_par_verify_task.work(0); 1896 } 1897 1898 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1899 } 1900 1901 size_t start_used_bytes = g1h->used(); 1902 g1h->set_marking_complete(); 1903 1904 double count_end = os::elapsedTime(); 1905 double this_final_counting_time = (count_end - start); 1906 _total_counting_time += this_final_counting_time; 1907 1908 if (G1PrintRegionLivenessInfo) { 1909 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1910 _g1h->heap_region_iterate(&cl); 1911 } 1912 1913 // Install newly created mark bitMap as "prev". 1914 swapMarkBitMaps(); 1915 1916 g1h->reset_gc_time_stamp(); 1917 1918 // Note end of marking in all heap regions. 1919 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 1920 if (G1CollectedHeap::use_parallel_gc_threads()) { 1921 g1h->set_par_threads((int)n_workers); 1922 g1h->workers()->run_task(&g1_par_note_end_task); 1923 g1h->set_par_threads(0); 1924 1925 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 1926 "sanity check"); 1927 } else { 1928 g1_par_note_end_task.work(0); 1929 } 1930 g1h->check_gc_time_stamps(); 1931 1932 if (!cleanup_list_is_empty()) { 1933 // The cleanup list is not empty, so we'll have to process it 1934 // concurrently. Notify anyone else that might be wanting free 1935 // regions that there will be more free regions coming soon. 1936 g1h->set_free_regions_coming(); 1937 } 1938 1939 // call below, since it affects the metric by which we sort the heap 1940 // regions. 1941 if (G1ScrubRemSets) { 1942 double rs_scrub_start = os::elapsedTime(); 1943 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1944 if (G1CollectedHeap::use_parallel_gc_threads()) { 1945 g1h->set_par_threads((int)n_workers); 1946 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1947 g1h->set_par_threads(0); 1948 1949 assert(g1h->check_heap_region_claim_values( 1950 HeapRegion::ScrubRemSetClaimValue), 1951 "sanity check"); 1952 } else { 1953 g1_par_scrub_rs_task.work(0); 1954 } 1955 1956 double rs_scrub_end = os::elapsedTime(); 1957 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1958 _total_rs_scrub_time += this_rs_scrub_time; 1959 } 1960 1961 // this will also free any regions totally full of garbage objects, 1962 // and sort the regions. 1963 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 1964 1965 // Statistics. 1966 double end = os::elapsedTime(); 1967 _cleanup_times.add((end - start) * 1000.0); 1968 1969 if (G1Log::fine()) { 1970 g1h->print_size_transition(gclog_or_tty, 1971 start_used_bytes, 1972 g1h->used(), 1973 g1h->capacity()); 1974 } 1975 1976 // Clean up will have freed any regions completely full of garbage. 1977 // Update the soft reference policy with the new heap occupancy. 1978 Universe::update_heap_info_at_gc(); 1979 1980 // We need to make this be a "collection" so any collection pause that 1981 // races with it goes around and waits for completeCleanup to finish. 1982 g1h->increment_total_collections(); 1983 1984 // We reclaimed old regions so we should calculate the sizes to make 1985 // sure we update the old gen/space data. 1986 g1h->g1mm()->update_sizes(); 1987 1988 if (VerifyDuringGC) { 1989 HandleMark hm; // handle scope 1990 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1991 Universe::heap()->prepare_for_verify(); 1992 Universe::verify(/* silent */ false, 1993 /* option */ VerifyOption_G1UsePrevMarking); 1994 } 1995 1996 g1h->verify_region_sets_optional(); 1997 g1h->trace_heap_after_concurrent_cycle(); 1998 } 1999 2000 void ConcurrentMark::completeCleanup() { 2001 if (has_aborted()) return; 2002 2003 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2004 2005 _cleanup_list.verify_optional(); 2006 FreeRegionList tmp_free_list("Tmp Free List"); 2007 2008 if (G1ConcRegionFreeingVerbose) { 2009 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2010 "cleanup list has %u entries", 2011 _cleanup_list.length()); 2012 } 2013 2014 // Noone else should be accessing the _cleanup_list at this point, 2015 // so it's not necessary to take any locks 2016 while (!_cleanup_list.is_empty()) { 2017 HeapRegion* hr = _cleanup_list.remove_head(); 2018 assert(hr != NULL, "the list was not empty"); 2019 hr->par_clear(); 2020 tmp_free_list.add_as_tail(hr); 2021 2022 // Instead of adding one region at a time to the secondary_free_list, 2023 // we accumulate them in the local list and move them a few at a 2024 // time. This also cuts down on the number of notify_all() calls 2025 // we do during this process. We'll also append the local list when 2026 // _cleanup_list is empty (which means we just removed the last 2027 // region from the _cleanup_list). 2028 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2029 _cleanup_list.is_empty()) { 2030 if (G1ConcRegionFreeingVerbose) { 2031 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2032 "appending %u entries to the secondary_free_list, " 2033 "cleanup list still has %u entries", 2034 tmp_free_list.length(), 2035 _cleanup_list.length()); 2036 } 2037 2038 { 2039 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2040 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 2041 SecondaryFreeList_lock->notify_all(); 2042 } 2043 2044 if (G1StressConcRegionFreeing) { 2045 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2046 os::sleep(Thread::current(), (jlong) 1, false); 2047 } 2048 } 2049 } 2050 } 2051 assert(tmp_free_list.is_empty(), "post-condition"); 2052 } 2053 2054 // Supporting Object and Oop closures for reference discovery 2055 // and processing in during marking 2056 2057 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2058 HeapWord* addr = (HeapWord*)obj; 2059 return addr != NULL && 2060 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2061 } 2062 2063 // 'Keep Alive' oop closure used by both serial parallel reference processing. 2064 // Uses the CMTask associated with a worker thread (for serial reference 2065 // processing the CMTask for worker 0 is used) to preserve (mark) and 2066 // trace referent objects. 2067 // 2068 // Using the CMTask and embedded local queues avoids having the worker 2069 // threads operating on the global mark stack. This reduces the risk 2070 // of overflowing the stack - which we would rather avoid at this late 2071 // state. Also using the tasks' local queues removes the potential 2072 // of the workers interfering with each other that could occur if 2073 // operating on the global stack. 2074 2075 class G1CMKeepAliveAndDrainClosure: public OopClosure { 2076 ConcurrentMark* _cm; 2077 CMTask* _task; 2078 int _ref_counter_limit; 2079 int _ref_counter; 2080 bool _is_serial; 2081 public: 2082 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2083 _cm(cm), _task(task), _is_serial(is_serial), 2084 _ref_counter_limit(G1RefProcDrainInterval) { 2085 assert(_ref_counter_limit > 0, "sanity"); 2086 assert(!_is_serial || _task->task_id() == 0, "only task 0 for serial code"); 2087 _ref_counter = _ref_counter_limit; 2088 } 2089 2090 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2091 virtual void do_oop( oop* p) { do_oop_work(p); } 2092 2093 template <class T> void do_oop_work(T* p) { 2094 if (!_cm->has_overflown()) { 2095 oop obj = oopDesc::load_decode_heap_oop(p); 2096 if (_cm->verbose_high()) { 2097 gclog_or_tty->print_cr("\t[%d] we're looking at location " 2098 "*"PTR_FORMAT" = "PTR_FORMAT, 2099 _task->task_id(), p, (void*) obj); 2100 } 2101 2102 _task->deal_with_reference(obj); 2103 _ref_counter--; 2104 2105 if (_ref_counter == 0) { 2106 // We have dealt with _ref_counter_limit references, pushing them 2107 // and objects reachable from them on to the local stack (and 2108 // possibly the global stack). Call CMTask::do_marking_step() to 2109 // process these entries. 2110 // 2111 // We call CMTask::do_marking_step() in a loop, which we'll exit if 2112 // there's nothing more to do (i.e. we're done with the entries that 2113 // were pushed as a result of the CMTask::deal_with_reference() calls 2114 // above) or we overflow. 2115 // 2116 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2117 // flag while there may still be some work to do. (See the comment at 2118 // the beginning of CMTask::do_marking_step() for those conditions - 2119 // one of which is reaching the specified time target.) It is only 2120 // when CMTask::do_marking_step() returns without setting the 2121 // has_aborted() flag that the marking step has completed. 2122 do { 2123 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2124 _task->do_marking_step(mark_step_duration_ms, 2125 false /* do_termination */, 2126 _is_serial); 2127 } while (_task->has_aborted() && !_cm->has_overflown()); 2128 _ref_counter = _ref_counter_limit; 2129 } 2130 } else { 2131 if (_cm->verbose_high()) { 2132 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id()); 2133 } 2134 } 2135 } 2136 }; 2137 2138 // 'Drain' oop closure used by both serial and parallel reference processing. 2139 // Uses the CMTask associated with a given worker thread (for serial 2140 // reference processing the CMtask for worker 0 is used). Calls the 2141 // do_marking_step routine, with an unbelievably large timeout value, 2142 // to drain the marking data structures of the remaining entries 2143 // added by the 'keep alive' oop closure above. 2144 2145 class G1CMDrainMarkingStackClosure: public VoidClosure { 2146 ConcurrentMark* _cm; 2147 CMTask* _task; 2148 bool _is_serial; 2149 public: 2150 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2151 _cm(cm), _task(task), _is_serial(is_serial) { 2152 assert(!_is_serial || _task->task_id() == 0, "only task 0 for serial code"); 2153 } 2154 2155 void do_void() { 2156 do { 2157 if (_cm->verbose_high()) { 2158 gclog_or_tty->print_cr("\t[%d] Drain: Calling do_marking_step - serial: %s", 2159 _task->task_id(), BOOL_TO_STR(_is_serial)); 2160 } 2161 2162 // We call CMTask::do_marking_step() to completely drain the local 2163 // and global marking stacks of entries pushed by the 'keep alive' 2164 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2165 // 2166 // CMTask::do_marking_step() is called in a loop, which we'll exit 2167 // if there's nothing more to do (i.e. we'completely drained the 2168 // entries that were pushed as a a result of applying the 'keep alive' 2169 // closure to the entries on the discovered ref lists) or we overflow 2170 // the global marking stack. 2171 // 2172 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2173 // flag while there may still be some work to do. (See the comment at 2174 // the beginning of CMTask::do_marking_step() for those conditions - 2175 // one of which is reaching the specified time target.) It is only 2176 // when CMTask::do_marking_step() returns without setting the 2177 // has_aborted() flag that the marking step has completed. 2178 2179 _task->do_marking_step(1000000000.0 /* something very large */, 2180 true /* do_termination */, 2181 _is_serial); 2182 } while (_task->has_aborted() && !_cm->has_overflown()); 2183 } 2184 }; 2185 2186 // Implementation of AbstractRefProcTaskExecutor for parallel 2187 // reference processing at the end of G1 concurrent marking 2188 2189 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2190 private: 2191 G1CollectedHeap* _g1h; 2192 ConcurrentMark* _cm; 2193 WorkGang* _workers; 2194 int _active_workers; 2195 2196 public: 2197 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2198 ConcurrentMark* cm, 2199 WorkGang* workers, 2200 int n_workers) : 2201 _g1h(g1h), _cm(cm), 2202 _workers(workers), _active_workers(n_workers) { } 2203 2204 // Executes the given task using concurrent marking worker threads. 2205 virtual void execute(ProcessTask& task); 2206 virtual void execute(EnqueueTask& task); 2207 }; 2208 2209 class G1CMRefProcTaskProxy: public AbstractGangTask { 2210 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2211 ProcessTask& _proc_task; 2212 G1CollectedHeap* _g1h; 2213 ConcurrentMark* _cm; 2214 2215 public: 2216 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2217 G1CollectedHeap* g1h, 2218 ConcurrentMark* cm) : 2219 AbstractGangTask("Process reference objects in parallel"), 2220 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2221 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2222 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2223 } 2224 2225 virtual void work(uint worker_id) { 2226 CMTask* task = _cm->task(worker_id); 2227 G1CMIsAliveClosure g1_is_alive(_g1h); 2228 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2229 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2230 2231 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2232 } 2233 }; 2234 2235 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2236 assert(_workers != NULL, "Need parallel worker threads."); 2237 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2238 2239 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2240 2241 // We need to reset the phase for each task execution so that 2242 // the termination protocol of CMTask::do_marking_step works. 2243 _cm->set_phase(_active_workers, false /* concurrent */); 2244 _g1h->set_par_threads(_active_workers); 2245 _workers->run_task(&proc_task_proxy); 2246 _g1h->set_par_threads(0); 2247 } 2248 2249 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2250 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2251 EnqueueTask& _enq_task; 2252 2253 public: 2254 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2255 AbstractGangTask("Enqueue reference objects in parallel"), 2256 _enq_task(enq_task) { } 2257 2258 virtual void work(uint worker_id) { 2259 _enq_task.work(worker_id); 2260 } 2261 }; 2262 2263 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2264 assert(_workers != NULL, "Need parallel worker threads."); 2265 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2266 2267 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2268 2269 _g1h->set_par_threads(_active_workers); 2270 _workers->run_task(&enq_task_proxy); 2271 _g1h->set_par_threads(0); 2272 } 2273 2274 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2275 ResourceMark rm; 2276 HandleMark hm; 2277 2278 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2279 2280 // Is alive closure. 2281 G1CMIsAliveClosure g1_is_alive(g1h); 2282 2283 // Inner scope to exclude the cleaning of the string and symbol 2284 // tables from the displayed time. 2285 { 2286 if (G1Log::finer()) { 2287 gclog_or_tty->put(' '); 2288 } 2289 GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm()); 2290 2291 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2292 2293 // See the comment in G1CollectedHeap::ref_processing_init() 2294 // about how reference processing currently works in G1. 2295 2296 // Set the soft reference policy 2297 rp->setup_policy(clear_all_soft_refs); 2298 assert(_markStack.isEmpty(), "mark stack should be empty"); 2299 2300 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2301 // in serial reference processing. Note these closures are also 2302 // used for serially processing (by the the current thread) the 2303 // JNI references during parallel reference processing. 2304 // 2305 // These closures do not need to synchronize with the worker 2306 // threads involved in parallel reference processing as these 2307 // instances are executed serially by the current thread (e.g. 2308 // reference processing is not multi-threaded and is thus 2309 // performed by the current thread instead of a gang worker). 2310 // 2311 // The gang tasks involved in parallel reference procssing create 2312 // their own instances of these closures, which do their own 2313 // synchronization among themselves. 2314 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2315 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2316 2317 // We need at least one active thread. If reference processing 2318 // is not multi-threaded we use the current (VMThread) thread, 2319 // otherwise we use the work gang from the G1CollectedHeap and 2320 // we utilize all the worker threads we can. 2321 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL; 2322 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2323 active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U); 2324 2325 // Parallel processing task executor. 2326 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2327 g1h->workers(), active_workers); 2328 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2329 2330 ReferenceProcessorStats stats; 2331 2332 // Set the degree of MT processing here. If the discovery was done MT, 2333 // the number of threads involved during discovery could differ from 2334 // the number of active workers. This is OK as long as the discovered 2335 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2336 rp->set_active_mt_degree(active_workers); 2337 2338 // Process the weak references. 2339 stats = rp->process_discovered_references(&g1_is_alive, 2340 &g1_keep_alive, 2341 &g1_drain_mark_stack, 2342 executor, 2343 g1h->gc_timer_cm()); 2344 2345 // The do_oop work routines of the keep_alive and drain_marking_stack 2346 // oop closures will set the has_overflown flag if we overflow the 2347 // global marking stack. 2348 2349 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2350 2351 assert(_markStack.overflow() || _markStack.isEmpty(), 2352 "mark stack should be empty (unless it overflowed)"); 2353 2354 if (_markStack.overflow()) { 2355 // This should have been done already when we tried to push an 2356 // entry on to the global mark stack. But let's do it again. 2357 set_has_overflown(); 2358 } 2359 2360 assert(rp->num_q() == active_workers, "why not"); 2361 2362 rp->enqueue_discovered_references(executor); 2363 2364 rp->verify_no_references_recorded(); 2365 assert(!rp->discovery_enabled(), "Post condition"); 2366 } 2367 2368 // Now clean up stale oops in StringTable 2369 StringTable::unlink(&g1_is_alive); 2370 // Clean up unreferenced symbols in symbol table. 2371 SymbolTable::unlink(); 2372 } 2373 2374 void ConcurrentMark::swapMarkBitMaps() { 2375 CMBitMapRO* temp = _prevMarkBitMap; 2376 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2377 _nextMarkBitMap = (CMBitMap*) temp; 2378 } 2379 2380 class CMRemarkTask: public AbstractGangTask { 2381 private: 2382 ConcurrentMark* _cm; 2383 bool _is_serial; 2384 public: 2385 void work(uint worker_id) { 2386 // Since all available tasks are actually started, we should 2387 // only proceed if we're supposed to be actived. 2388 if (worker_id < _cm->active_tasks()) { 2389 CMTask* task = _cm->task(worker_id); 2390 task->record_start_time(); 2391 do { 2392 task->do_marking_step(1000000000.0 /* something very large */, 2393 true /* do_termination */, 2394 _is_serial); 2395 } while (task->has_aborted() && !_cm->has_overflown()); 2396 // If we overflow, then we do not want to restart. We instead 2397 // want to abort remark and do concurrent marking again. 2398 task->record_end_time(); 2399 } 2400 } 2401 2402 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) : 2403 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) { 2404 _cm->terminator()->reset_for_reuse(active_workers); 2405 } 2406 }; 2407 2408 void ConcurrentMark::checkpointRootsFinalWork() { 2409 ResourceMark rm; 2410 HandleMark hm; 2411 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2412 2413 g1h->ensure_parsability(false); 2414 2415 if (G1CollectedHeap::use_parallel_gc_threads()) { 2416 G1CollectedHeap::StrongRootsScope srs(g1h); 2417 // this is remark, so we'll use up all active threads 2418 uint active_workers = g1h->workers()->active_workers(); 2419 if (active_workers == 0) { 2420 assert(active_workers > 0, "Should have been set earlier"); 2421 active_workers = (uint) ParallelGCThreads; 2422 g1h->workers()->set_active_workers(active_workers); 2423 } 2424 set_phase(active_workers, false /* concurrent */); 2425 // Leave _parallel_marking_threads at it's 2426 // value originally calculated in the ConcurrentMark 2427 // constructor and pass values of the active workers 2428 // through the gang in the task. 2429 2430 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */); 2431 // We will start all available threads, even if we decide that the 2432 // active_workers will be fewer. The extra ones will just bail out 2433 // immediately. 2434 g1h->set_par_threads(active_workers); 2435 g1h->workers()->run_task(&remarkTask); 2436 g1h->set_par_threads(0); 2437 } else { 2438 G1CollectedHeap::StrongRootsScope srs(g1h); 2439 uint active_workers = 1; 2440 set_phase(active_workers, false /* concurrent */); 2441 2442 // Note - if there's no work gang then the VMThread will be 2443 // the thread to execute the remark - serially. We have 2444 // to pass true for the is_serial parameter so that 2445 // CMTask::do_marking_step() doesn't enter the sync 2446 // barriers in the event of an overflow. Doing so will 2447 // cause an assert that the current thread is not a 2448 // concurrent GC thread. 2449 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/); 2450 remarkTask.work(0); 2451 } 2452 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2453 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2454 2455 print_stats(); 2456 2457 #if VERIFY_OBJS_PROCESSED 2458 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2459 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2460 _scan_obj_cl.objs_processed, 2461 ThreadLocalObjQueue::objs_enqueued); 2462 guarantee(_scan_obj_cl.objs_processed == 2463 ThreadLocalObjQueue::objs_enqueued, 2464 "Different number of objs processed and enqueued."); 2465 } 2466 #endif 2467 } 2468 2469 #ifndef PRODUCT 2470 2471 class PrintReachableOopClosure: public OopClosure { 2472 private: 2473 G1CollectedHeap* _g1h; 2474 outputStream* _out; 2475 VerifyOption _vo; 2476 bool _all; 2477 2478 public: 2479 PrintReachableOopClosure(outputStream* out, 2480 VerifyOption vo, 2481 bool all) : 2482 _g1h(G1CollectedHeap::heap()), 2483 _out(out), _vo(vo), _all(all) { } 2484 2485 void do_oop(narrowOop* p) { do_oop_work(p); } 2486 void do_oop( oop* p) { do_oop_work(p); } 2487 2488 template <class T> void do_oop_work(T* p) { 2489 oop obj = oopDesc::load_decode_heap_oop(p); 2490 const char* str = NULL; 2491 const char* str2 = ""; 2492 2493 if (obj == NULL) { 2494 str = ""; 2495 } else if (!_g1h->is_in_g1_reserved(obj)) { 2496 str = " O"; 2497 } else { 2498 HeapRegion* hr = _g1h->heap_region_containing(obj); 2499 guarantee(hr != NULL, "invariant"); 2500 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2501 bool marked = _g1h->is_marked(obj, _vo); 2502 2503 if (over_tams) { 2504 str = " >"; 2505 if (marked) { 2506 str2 = " AND MARKED"; 2507 } 2508 } else if (marked) { 2509 str = " M"; 2510 } else { 2511 str = " NOT"; 2512 } 2513 } 2514 2515 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2516 p, (void*) obj, str, str2); 2517 } 2518 }; 2519 2520 class PrintReachableObjectClosure : public ObjectClosure { 2521 private: 2522 G1CollectedHeap* _g1h; 2523 outputStream* _out; 2524 VerifyOption _vo; 2525 bool _all; 2526 HeapRegion* _hr; 2527 2528 public: 2529 PrintReachableObjectClosure(outputStream* out, 2530 VerifyOption vo, 2531 bool all, 2532 HeapRegion* hr) : 2533 _g1h(G1CollectedHeap::heap()), 2534 _out(out), _vo(vo), _all(all), _hr(hr) { } 2535 2536 void do_object(oop o) { 2537 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2538 bool marked = _g1h->is_marked(o, _vo); 2539 bool print_it = _all || over_tams || marked; 2540 2541 if (print_it) { 2542 _out->print_cr(" "PTR_FORMAT"%s", 2543 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2544 PrintReachableOopClosure oopCl(_out, _vo, _all); 2545 o->oop_iterate(&oopCl); 2546 } 2547 } 2548 }; 2549 2550 class PrintReachableRegionClosure : public HeapRegionClosure { 2551 private: 2552 G1CollectedHeap* _g1h; 2553 outputStream* _out; 2554 VerifyOption _vo; 2555 bool _all; 2556 2557 public: 2558 bool doHeapRegion(HeapRegion* hr) { 2559 HeapWord* b = hr->bottom(); 2560 HeapWord* e = hr->end(); 2561 HeapWord* t = hr->top(); 2562 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2563 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2564 "TAMS: "PTR_FORMAT, b, e, t, p); 2565 _out->cr(); 2566 2567 HeapWord* from = b; 2568 HeapWord* to = t; 2569 2570 if (to > from) { 2571 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2572 _out->cr(); 2573 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2574 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2575 _out->cr(); 2576 } 2577 2578 return false; 2579 } 2580 2581 PrintReachableRegionClosure(outputStream* out, 2582 VerifyOption vo, 2583 bool all) : 2584 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2585 }; 2586 2587 void ConcurrentMark::print_reachable(const char* str, 2588 VerifyOption vo, 2589 bool all) { 2590 gclog_or_tty->cr(); 2591 gclog_or_tty->print_cr("== Doing heap dump... "); 2592 2593 if (G1PrintReachableBaseFile == NULL) { 2594 gclog_or_tty->print_cr(" #### error: no base file defined"); 2595 return; 2596 } 2597 2598 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2599 (JVM_MAXPATHLEN - 1)) { 2600 gclog_or_tty->print_cr(" #### error: file name too long"); 2601 return; 2602 } 2603 2604 char file_name[JVM_MAXPATHLEN]; 2605 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2606 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2607 2608 fileStream fout(file_name); 2609 if (!fout.is_open()) { 2610 gclog_or_tty->print_cr(" #### error: could not open file"); 2611 return; 2612 } 2613 2614 outputStream* out = &fout; 2615 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2616 out->cr(); 2617 2618 out->print_cr("--- ITERATING OVER REGIONS"); 2619 out->cr(); 2620 PrintReachableRegionClosure rcl(out, vo, all); 2621 _g1h->heap_region_iterate(&rcl); 2622 out->cr(); 2623 2624 gclog_or_tty->print_cr(" done"); 2625 gclog_or_tty->flush(); 2626 } 2627 2628 #endif // PRODUCT 2629 2630 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2631 // Note we are overriding the read-only view of the prev map here, via 2632 // the cast. 2633 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2634 } 2635 2636 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2637 _nextMarkBitMap->clearRange(mr); 2638 } 2639 2640 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2641 clearRangePrevBitmap(mr); 2642 clearRangeNextBitmap(mr); 2643 } 2644 2645 HeapRegion* 2646 ConcurrentMark::claim_region(int task_num) { 2647 // "checkpoint" the finger 2648 HeapWord* finger = _finger; 2649 2650 // _heap_end will not change underneath our feet; it only changes at 2651 // yield points. 2652 while (finger < _heap_end) { 2653 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2654 2655 // Note on how this code handles humongous regions. In the 2656 // normal case the finger will reach the start of a "starts 2657 // humongous" (SH) region. Its end will either be the end of the 2658 // last "continues humongous" (CH) region in the sequence, or the 2659 // standard end of the SH region (if the SH is the only region in 2660 // the sequence). That way claim_region() will skip over the CH 2661 // regions. However, there is a subtle race between a CM thread 2662 // executing this method and a mutator thread doing a humongous 2663 // object allocation. The two are not mutually exclusive as the CM 2664 // thread does not need to hold the Heap_lock when it gets 2665 // here. So there is a chance that claim_region() will come across 2666 // a free region that's in the progress of becoming a SH or a CH 2667 // region. In the former case, it will either 2668 // a) Miss the update to the region's end, in which case it will 2669 // visit every subsequent CH region, will find their bitmaps 2670 // empty, and do nothing, or 2671 // b) Will observe the update of the region's end (in which case 2672 // it will skip the subsequent CH regions). 2673 // If it comes across a region that suddenly becomes CH, the 2674 // scenario will be similar to b). So, the race between 2675 // claim_region() and a humongous object allocation might force us 2676 // to do a bit of unnecessary work (due to some unnecessary bitmap 2677 // iterations) but it should not introduce and correctness issues. 2678 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2679 HeapWord* bottom = curr_region->bottom(); 2680 HeapWord* end = curr_region->end(); 2681 HeapWord* limit = curr_region->next_top_at_mark_start(); 2682 2683 if (verbose_low()) { 2684 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 2685 "["PTR_FORMAT", "PTR_FORMAT"), " 2686 "limit = "PTR_FORMAT, 2687 task_num, curr_region, bottom, end, limit); 2688 } 2689 2690 // Is the gap between reading the finger and doing the CAS too long? 2691 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2692 if (res == finger) { 2693 // we succeeded 2694 2695 // notice that _finger == end cannot be guaranteed here since, 2696 // someone else might have moved the finger even further 2697 assert(_finger >= end, "the finger should have moved forward"); 2698 2699 if (verbose_low()) { 2700 gclog_or_tty->print_cr("[%d] we were successful with region = " 2701 PTR_FORMAT, task_num, curr_region); 2702 } 2703 2704 if (limit > bottom) { 2705 if (verbose_low()) { 2706 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 2707 "returning it ", task_num, curr_region); 2708 } 2709 return curr_region; 2710 } else { 2711 assert(limit == bottom, 2712 "the region limit should be at bottom"); 2713 if (verbose_low()) { 2714 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 2715 "returning NULL", task_num, curr_region); 2716 } 2717 // we return NULL and the caller should try calling 2718 // claim_region() again. 2719 return NULL; 2720 } 2721 } else { 2722 assert(_finger > finger, "the finger should have moved forward"); 2723 if (verbose_low()) { 2724 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 2725 "global finger = "PTR_FORMAT", " 2726 "our finger = "PTR_FORMAT, 2727 task_num, _finger, finger); 2728 } 2729 2730 // read it again 2731 finger = _finger; 2732 } 2733 } 2734 2735 return NULL; 2736 } 2737 2738 #ifndef PRODUCT 2739 enum VerifyNoCSetOopsPhase { 2740 VerifyNoCSetOopsStack, 2741 VerifyNoCSetOopsQueues, 2742 VerifyNoCSetOopsSATBCompleted, 2743 VerifyNoCSetOopsSATBThread 2744 }; 2745 2746 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 2747 private: 2748 G1CollectedHeap* _g1h; 2749 VerifyNoCSetOopsPhase _phase; 2750 int _info; 2751 2752 const char* phase_str() { 2753 switch (_phase) { 2754 case VerifyNoCSetOopsStack: return "Stack"; 2755 case VerifyNoCSetOopsQueues: return "Queue"; 2756 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 2757 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 2758 default: ShouldNotReachHere(); 2759 } 2760 return NULL; 2761 } 2762 2763 void do_object_work(oop obj) { 2764 guarantee(!_g1h->obj_in_cs(obj), 2765 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 2766 (void*) obj, phase_str(), _info)); 2767 } 2768 2769 public: 2770 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 2771 2772 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 2773 _phase = phase; 2774 _info = info; 2775 } 2776 2777 virtual void do_oop(oop* p) { 2778 oop obj = oopDesc::load_decode_heap_oop(p); 2779 do_object_work(obj); 2780 } 2781 2782 virtual void do_oop(narrowOop* p) { 2783 // We should not come across narrow oops while scanning marking 2784 // stacks and SATB buffers. 2785 ShouldNotReachHere(); 2786 } 2787 2788 virtual void do_object(oop obj) { 2789 do_object_work(obj); 2790 } 2791 }; 2792 2793 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 2794 bool verify_enqueued_buffers, 2795 bool verify_thread_buffers, 2796 bool verify_fingers) { 2797 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2798 if (!G1CollectedHeap::heap()->mark_in_progress()) { 2799 return; 2800 } 2801 2802 VerifyNoCSetOopsClosure cl; 2803 2804 if (verify_stacks) { 2805 // Verify entries on the global mark stack 2806 cl.set_phase(VerifyNoCSetOopsStack); 2807 _markStack.oops_do(&cl); 2808 2809 // Verify entries on the task queues 2810 for (int i = 0; i < (int) _max_task_num; i += 1) { 2811 cl.set_phase(VerifyNoCSetOopsQueues, i); 2812 OopTaskQueue* queue = _task_queues->queue(i); 2813 queue->oops_do(&cl); 2814 } 2815 } 2816 2817 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 2818 2819 // Verify entries on the enqueued SATB buffers 2820 if (verify_enqueued_buffers) { 2821 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 2822 satb_qs.iterate_completed_buffers_read_only(&cl); 2823 } 2824 2825 // Verify entries on the per-thread SATB buffers 2826 if (verify_thread_buffers) { 2827 cl.set_phase(VerifyNoCSetOopsSATBThread); 2828 satb_qs.iterate_thread_buffers_read_only(&cl); 2829 } 2830 2831 if (verify_fingers) { 2832 // Verify the global finger 2833 HeapWord* global_finger = finger(); 2834 if (global_finger != NULL && global_finger < _heap_end) { 2835 // The global finger always points to a heap region boundary. We 2836 // use heap_region_containing_raw() to get the containing region 2837 // given that the global finger could be pointing to a free region 2838 // which subsequently becomes continues humongous. If that 2839 // happens, heap_region_containing() will return the bottom of the 2840 // corresponding starts humongous region and the check below will 2841 // not hold any more. 2842 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 2843 guarantee(global_finger == global_hr->bottom(), 2844 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 2845 global_finger, HR_FORMAT_PARAMS(global_hr))); 2846 } 2847 2848 // Verify the task fingers 2849 assert(parallel_marking_threads() <= _max_task_num, "sanity"); 2850 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 2851 CMTask* task = _tasks[i]; 2852 HeapWord* task_finger = task->finger(); 2853 if (task_finger != NULL && task_finger < _heap_end) { 2854 // See above note on the global finger verification. 2855 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 2856 guarantee(task_finger == task_hr->bottom() || 2857 !task_hr->in_collection_set(), 2858 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 2859 task_finger, HR_FORMAT_PARAMS(task_hr))); 2860 } 2861 } 2862 } 2863 } 2864 #endif // PRODUCT 2865 2866 // Aggregate the counting data that was constructed concurrently 2867 // with marking. 2868 class AggregateCountDataHRClosure: public HeapRegionClosure { 2869 G1CollectedHeap* _g1h; 2870 ConcurrentMark* _cm; 2871 CardTableModRefBS* _ct_bs; 2872 BitMap* _cm_card_bm; 2873 size_t _max_task_num; 2874 2875 public: 2876 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2877 BitMap* cm_card_bm, 2878 size_t max_task_num) : 2879 _g1h(g1h), _cm(g1h->concurrent_mark()), 2880 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 2881 _cm_card_bm(cm_card_bm), _max_task_num(max_task_num) { } 2882 2883 bool doHeapRegion(HeapRegion* hr) { 2884 if (hr->continuesHumongous()) { 2885 // We will ignore these here and process them when their 2886 // associated "starts humongous" region is processed. 2887 // Note that we cannot rely on their associated 2888 // "starts humongous" region to have their bit set to 1 2889 // since, due to the region chunking in the parallel region 2890 // iteration, a "continues humongous" region might be visited 2891 // before its associated "starts humongous". 2892 return false; 2893 } 2894 2895 HeapWord* start = hr->bottom(); 2896 HeapWord* limit = hr->next_top_at_mark_start(); 2897 HeapWord* end = hr->end(); 2898 2899 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2900 err_msg("Preconditions not met - " 2901 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 2902 "top: "PTR_FORMAT", end: "PTR_FORMAT, 2903 start, limit, hr->top(), hr->end())); 2904 2905 assert(hr->next_marked_bytes() == 0, "Precondition"); 2906 2907 if (start == limit) { 2908 // NTAMS of this region has not been set so nothing to do. 2909 return false; 2910 } 2911 2912 // 'start' should be in the heap. 2913 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2914 // 'end' *may* be just beyone the end of the heap (if hr is the last region) 2915 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2916 2917 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2918 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2919 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2920 2921 // If ntams is not card aligned then we bump card bitmap index 2922 // for limit so that we get the all the cards spanned by 2923 // the object ending at ntams. 2924 // Note: if this is the last region in the heap then ntams 2925 // could be actually just beyond the end of the the heap; 2926 // limit_idx will then correspond to a (non-existent) card 2927 // that is also outside the heap. 2928 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2929 limit_idx += 1; 2930 } 2931 2932 assert(limit_idx <= end_idx, "or else use atomics"); 2933 2934 // Aggregate the "stripe" in the count data associated with hr. 2935 uint hrs_index = hr->hrs_index(); 2936 size_t marked_bytes = 0; 2937 2938 for (int i = 0; (size_t)i < _max_task_num; i += 1) { 2939 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2940 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2941 2942 // Fetch the marked_bytes in this region for task i and 2943 // add it to the running total for this region. 2944 marked_bytes += marked_bytes_array[hrs_index]; 2945 2946 // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx) 2947 // into the global card bitmap. 2948 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2949 2950 while (scan_idx < limit_idx) { 2951 assert(task_card_bm->at(scan_idx) == true, "should be"); 2952 _cm_card_bm->set_bit(scan_idx); 2953 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2954 2955 // BitMap::get_next_one_offset() can handle the case when 2956 // its left_offset parameter is greater than its right_offset 2957 // parameter. It does, however, have an early exit if 2958 // left_offset == right_offset. So let's limit the value 2959 // passed in for left offset here. 2960 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2961 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2962 } 2963 } 2964 2965 // Update the marked bytes for this region. 2966 hr->add_to_marked_bytes(marked_bytes); 2967 2968 // Next heap region 2969 return false; 2970 } 2971 }; 2972 2973 class G1AggregateCountDataTask: public AbstractGangTask { 2974 protected: 2975 G1CollectedHeap* _g1h; 2976 ConcurrentMark* _cm; 2977 BitMap* _cm_card_bm; 2978 size_t _max_task_num; 2979 int _active_workers; 2980 2981 public: 2982 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2983 ConcurrentMark* cm, 2984 BitMap* cm_card_bm, 2985 size_t max_task_num, 2986 int n_workers) : 2987 AbstractGangTask("Count Aggregation"), 2988 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2989 _max_task_num(max_task_num), 2990 _active_workers(n_workers) { } 2991 2992 void work(uint worker_id) { 2993 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_task_num); 2994 2995 if (G1CollectedHeap::use_parallel_gc_threads()) { 2996 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 2997 _active_workers, 2998 HeapRegion::AggregateCountClaimValue); 2999 } else { 3000 _g1h->heap_region_iterate(&cl); 3001 } 3002 } 3003 }; 3004 3005 3006 void ConcurrentMark::aggregate_count_data() { 3007 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3008 _g1h->workers()->active_workers() : 3009 1); 3010 3011 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3012 _max_task_num, n_workers); 3013 3014 if (G1CollectedHeap::use_parallel_gc_threads()) { 3015 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3016 "sanity check"); 3017 _g1h->set_par_threads(n_workers); 3018 _g1h->workers()->run_task(&g1_par_agg_task); 3019 _g1h->set_par_threads(0); 3020 3021 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3022 "sanity check"); 3023 _g1h->reset_heap_region_claim_values(); 3024 } else { 3025 g1_par_agg_task.work(0); 3026 } 3027 } 3028 3029 // Clear the per-worker arrays used to store the per-region counting data 3030 void ConcurrentMark::clear_all_count_data() { 3031 // Clear the global card bitmap - it will be filled during 3032 // liveness count aggregation (during remark) and the 3033 // final counting task. 3034 _card_bm.clear(); 3035 3036 // Clear the global region bitmap - it will be filled as part 3037 // of the final counting task. 3038 _region_bm.clear(); 3039 3040 uint max_regions = _g1h->max_regions(); 3041 assert(_max_task_num != 0, "unitialized"); 3042 3043 for (int i = 0; (size_t) i < _max_task_num; i += 1) { 3044 BitMap* task_card_bm = count_card_bitmap_for(i); 3045 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3046 3047 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3048 assert(marked_bytes_array != NULL, "uninitialized"); 3049 3050 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3051 task_card_bm->clear(); 3052 } 3053 } 3054 3055 void ConcurrentMark::print_stats() { 3056 if (verbose_stats()) { 3057 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3058 for (size_t i = 0; i < _active_tasks; ++i) { 3059 _tasks[i]->print_stats(); 3060 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3061 } 3062 } 3063 } 3064 3065 // abandon current marking iteration due to a Full GC 3066 void ConcurrentMark::abort() { 3067 // Clear all marks to force marking thread to do nothing 3068 _nextMarkBitMap->clearAll(); 3069 // Clear the liveness counting data 3070 clear_all_count_data(); 3071 // Empty mark stack 3072 reset_marking_state(); 3073 for (int i = 0; i < (int)_max_task_num; ++i) { 3074 _tasks[i]->clear_region_fields(); 3075 } 3076 _has_aborted = true; 3077 3078 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3079 satb_mq_set.abandon_partial_marking(); 3080 // This can be called either during or outside marking, we'll read 3081 // the expected_active value from the SATB queue set. 3082 satb_mq_set.set_active_all_threads( 3083 false, /* new active value */ 3084 satb_mq_set.is_active() /* expected_active */); 3085 3086 _g1h->trace_heap_after_concurrent_cycle(); 3087 _g1h->register_concurrent_cycle_end(); 3088 } 3089 3090 static void print_ms_time_info(const char* prefix, const char* name, 3091 NumberSeq& ns) { 3092 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3093 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3094 if (ns.num() > 0) { 3095 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3096 prefix, ns.sd(), ns.maximum()); 3097 } 3098 } 3099 3100 void ConcurrentMark::print_summary_info() { 3101 gclog_or_tty->print_cr(" Concurrent marking:"); 3102 print_ms_time_info(" ", "init marks", _init_times); 3103 print_ms_time_info(" ", "remarks", _remark_times); 3104 { 3105 print_ms_time_info(" ", "final marks", _remark_mark_times); 3106 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3107 3108 } 3109 print_ms_time_info(" ", "cleanups", _cleanup_times); 3110 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3111 _total_counting_time, 3112 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3113 (double)_cleanup_times.num() 3114 : 0.0)); 3115 if (G1ScrubRemSets) { 3116 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3117 _total_rs_scrub_time, 3118 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3119 (double)_cleanup_times.num() 3120 : 0.0)); 3121 } 3122 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3123 (_init_times.sum() + _remark_times.sum() + 3124 _cleanup_times.sum())/1000.0); 3125 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3126 "(%8.2f s marking).", 3127 cmThread()->vtime_accum(), 3128 cmThread()->vtime_mark_accum()); 3129 } 3130 3131 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3132 if (use_parallel_marking_threads()) { 3133 _parallel_workers->print_worker_threads_on(st); 3134 } 3135 } 3136 3137 // We take a break if someone is trying to stop the world. 3138 bool ConcurrentMark::do_yield_check(uint worker_id) { 3139 if (should_yield()) { 3140 if (worker_id == 0) { 3141 _g1h->g1_policy()->record_concurrent_pause(); 3142 } 3143 cmThread()->yield(); 3144 return true; 3145 } else { 3146 return false; 3147 } 3148 } 3149 3150 bool ConcurrentMark::should_yield() { 3151 return cmThread()->should_yield(); 3152 } 3153 3154 bool ConcurrentMark::containing_card_is_marked(void* p) { 3155 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3156 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3157 } 3158 3159 bool ConcurrentMark::containing_cards_are_marked(void* start, 3160 void* last) { 3161 return containing_card_is_marked(start) && 3162 containing_card_is_marked(last); 3163 } 3164 3165 #ifndef PRODUCT 3166 // for debugging purposes 3167 void ConcurrentMark::print_finger() { 3168 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3169 _heap_start, _heap_end, _finger); 3170 for (int i = 0; i < (int) _max_task_num; ++i) { 3171 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 3172 } 3173 gclog_or_tty->print_cr(""); 3174 } 3175 #endif 3176 3177 void CMTask::scan_object(oop obj) { 3178 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3179 3180 if (_cm->verbose_high()) { 3181 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT, 3182 _task_id, (void*) obj); 3183 } 3184 3185 size_t obj_size = obj->size(); 3186 _words_scanned += obj_size; 3187 3188 obj->oop_iterate(_cm_oop_closure); 3189 statsOnly( ++_objs_scanned ); 3190 check_limits(); 3191 } 3192 3193 // Closure for iteration over bitmaps 3194 class CMBitMapClosure : public BitMapClosure { 3195 private: 3196 // the bitmap that is being iterated over 3197 CMBitMap* _nextMarkBitMap; 3198 ConcurrentMark* _cm; 3199 CMTask* _task; 3200 3201 public: 3202 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3203 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3204 3205 bool do_bit(size_t offset) { 3206 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3207 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3208 assert( addr < _cm->finger(), "invariant"); 3209 3210 statsOnly( _task->increase_objs_found_on_bitmap() ); 3211 assert(addr >= _task->finger(), "invariant"); 3212 3213 // We move that task's local finger along. 3214 _task->move_finger_to(addr); 3215 3216 _task->scan_object(oop(addr)); 3217 // we only partially drain the local queue and global stack 3218 _task->drain_local_queue(true); 3219 _task->drain_global_stack(true); 3220 3221 // if the has_aborted flag has been raised, we need to bail out of 3222 // the iteration 3223 return !_task->has_aborted(); 3224 } 3225 }; 3226 3227 // Closure for iterating over objects, currently only used for 3228 // processing SATB buffers. 3229 class CMObjectClosure : public ObjectClosure { 3230 private: 3231 CMTask* _task; 3232 3233 public: 3234 void do_object(oop obj) { 3235 _task->deal_with_reference(obj); 3236 } 3237 3238 CMObjectClosure(CMTask* task) : _task(task) { } 3239 }; 3240 3241 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3242 ConcurrentMark* cm, 3243 CMTask* task) 3244 : _g1h(g1h), _cm(cm), _task(task) { 3245 assert(_ref_processor == NULL, "should be initialized to NULL"); 3246 3247 if (G1UseConcMarkReferenceProcessing) { 3248 _ref_processor = g1h->ref_processor_cm(); 3249 assert(_ref_processor != NULL, "should not be NULL"); 3250 } 3251 } 3252 3253 void CMTask::setup_for_region(HeapRegion* hr) { 3254 // Separated the asserts so that we know which one fires. 3255 assert(hr != NULL, 3256 "claim_region() should have filtered out continues humongous regions"); 3257 assert(!hr->continuesHumongous(), 3258 "claim_region() should have filtered out continues humongous regions"); 3259 3260 if (_cm->verbose_low()) { 3261 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 3262 _task_id, hr); 3263 } 3264 3265 _curr_region = hr; 3266 _finger = hr->bottom(); 3267 update_region_limit(); 3268 } 3269 3270 void CMTask::update_region_limit() { 3271 HeapRegion* hr = _curr_region; 3272 HeapWord* bottom = hr->bottom(); 3273 HeapWord* limit = hr->next_top_at_mark_start(); 3274 3275 if (limit == bottom) { 3276 if (_cm->verbose_low()) { 3277 gclog_or_tty->print_cr("[%d] found an empty region " 3278 "["PTR_FORMAT", "PTR_FORMAT")", 3279 _task_id, bottom, limit); 3280 } 3281 // The region was collected underneath our feet. 3282 // We set the finger to bottom to ensure that the bitmap 3283 // iteration that will follow this will not do anything. 3284 // (this is not a condition that holds when we set the region up, 3285 // as the region is not supposed to be empty in the first place) 3286 _finger = bottom; 3287 } else if (limit >= _region_limit) { 3288 assert(limit >= _finger, "peace of mind"); 3289 } else { 3290 assert(limit < _region_limit, "only way to get here"); 3291 // This can happen under some pretty unusual circumstances. An 3292 // evacuation pause empties the region underneath our feet (NTAMS 3293 // at bottom). We then do some allocation in the region (NTAMS 3294 // stays at bottom), followed by the region being used as a GC 3295 // alloc region (NTAMS will move to top() and the objects 3296 // originally below it will be grayed). All objects now marked in 3297 // the region are explicitly grayed, if below the global finger, 3298 // and we do not need in fact to scan anything else. So, we simply 3299 // set _finger to be limit to ensure that the bitmap iteration 3300 // doesn't do anything. 3301 _finger = limit; 3302 } 3303 3304 _region_limit = limit; 3305 } 3306 3307 void CMTask::giveup_current_region() { 3308 assert(_curr_region != NULL, "invariant"); 3309 if (_cm->verbose_low()) { 3310 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 3311 _task_id, _curr_region); 3312 } 3313 clear_region_fields(); 3314 } 3315 3316 void CMTask::clear_region_fields() { 3317 // Values for these three fields that indicate that we're not 3318 // holding on to a region. 3319 _curr_region = NULL; 3320 _finger = NULL; 3321 _region_limit = NULL; 3322 } 3323 3324 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3325 if (cm_oop_closure == NULL) { 3326 assert(_cm_oop_closure != NULL, "invariant"); 3327 } else { 3328 assert(_cm_oop_closure == NULL, "invariant"); 3329 } 3330 _cm_oop_closure = cm_oop_closure; 3331 } 3332 3333 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3334 guarantee(nextMarkBitMap != NULL, "invariant"); 3335 3336 if (_cm->verbose_low()) { 3337 gclog_or_tty->print_cr("[%d] resetting", _task_id); 3338 } 3339 3340 _nextMarkBitMap = nextMarkBitMap; 3341 clear_region_fields(); 3342 3343 _calls = 0; 3344 _elapsed_time_ms = 0.0; 3345 _termination_time_ms = 0.0; 3346 _termination_start_time_ms = 0.0; 3347 3348 #if _MARKING_STATS_ 3349 _local_pushes = 0; 3350 _local_pops = 0; 3351 _local_max_size = 0; 3352 _objs_scanned = 0; 3353 _global_pushes = 0; 3354 _global_pops = 0; 3355 _global_max_size = 0; 3356 _global_transfers_to = 0; 3357 _global_transfers_from = 0; 3358 _regions_claimed = 0; 3359 _objs_found_on_bitmap = 0; 3360 _satb_buffers_processed = 0; 3361 _steal_attempts = 0; 3362 _steals = 0; 3363 _aborted = 0; 3364 _aborted_overflow = 0; 3365 _aborted_cm_aborted = 0; 3366 _aborted_yield = 0; 3367 _aborted_timed_out = 0; 3368 _aborted_satb = 0; 3369 _aborted_termination = 0; 3370 #endif // _MARKING_STATS_ 3371 } 3372 3373 bool CMTask::should_exit_termination() { 3374 regular_clock_call(); 3375 // This is called when we are in the termination protocol. We should 3376 // quit if, for some reason, this task wants to abort or the global 3377 // stack is not empty (this means that we can get work from it). 3378 return !_cm->mark_stack_empty() || has_aborted(); 3379 } 3380 3381 void CMTask::reached_limit() { 3382 assert(_words_scanned >= _words_scanned_limit || 3383 _refs_reached >= _refs_reached_limit , 3384 "shouldn't have been called otherwise"); 3385 regular_clock_call(); 3386 } 3387 3388 void CMTask::regular_clock_call() { 3389 if (has_aborted()) return; 3390 3391 // First, we need to recalculate the words scanned and refs reached 3392 // limits for the next clock call. 3393 recalculate_limits(); 3394 3395 // During the regular clock call we do the following 3396 3397 // (1) If an overflow has been flagged, then we abort. 3398 if (_cm->has_overflown()) { 3399 set_has_aborted(); 3400 return; 3401 } 3402 3403 // If we are not concurrent (i.e. we're doing remark) we don't need 3404 // to check anything else. The other steps are only needed during 3405 // the concurrent marking phase. 3406 if (!concurrent()) return; 3407 3408 // (2) If marking has been aborted for Full GC, then we also abort. 3409 if (_cm->has_aborted()) { 3410 set_has_aborted(); 3411 statsOnly( ++_aborted_cm_aborted ); 3412 return; 3413 } 3414 3415 double curr_time_ms = os::elapsedVTime() * 1000.0; 3416 3417 // (3) If marking stats are enabled, then we update the step history. 3418 #if _MARKING_STATS_ 3419 if (_words_scanned >= _words_scanned_limit) { 3420 ++_clock_due_to_scanning; 3421 } 3422 if (_refs_reached >= _refs_reached_limit) { 3423 ++_clock_due_to_marking; 3424 } 3425 3426 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3427 _interval_start_time_ms = curr_time_ms; 3428 _all_clock_intervals_ms.add(last_interval_ms); 3429 3430 if (_cm->verbose_medium()) { 3431 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 3432 "scanned = %d%s, refs reached = %d%s", 3433 _task_id, last_interval_ms, 3434 _words_scanned, 3435 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3436 _refs_reached, 3437 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3438 } 3439 #endif // _MARKING_STATS_ 3440 3441 // (4) We check whether we should yield. If we have to, then we abort. 3442 if (_cm->should_yield()) { 3443 // We should yield. To do this we abort the task. The caller is 3444 // responsible for yielding. 3445 set_has_aborted(); 3446 statsOnly( ++_aborted_yield ); 3447 return; 3448 } 3449 3450 // (5) We check whether we've reached our time quota. If we have, 3451 // then we abort. 3452 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3453 if (elapsed_time_ms > _time_target_ms) { 3454 set_has_aborted(); 3455 _has_timed_out = true; 3456 statsOnly( ++_aborted_timed_out ); 3457 return; 3458 } 3459 3460 // (6) Finally, we check whether there are enough completed STAB 3461 // buffers available for processing. If there are, we abort. 3462 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3463 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3464 if (_cm->verbose_low()) { 3465 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 3466 _task_id); 3467 } 3468 // we do need to process SATB buffers, we'll abort and restart 3469 // the marking task to do so 3470 set_has_aborted(); 3471 statsOnly( ++_aborted_satb ); 3472 return; 3473 } 3474 } 3475 3476 void CMTask::recalculate_limits() { 3477 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3478 _words_scanned_limit = _real_words_scanned_limit; 3479 3480 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3481 _refs_reached_limit = _real_refs_reached_limit; 3482 } 3483 3484 void CMTask::decrease_limits() { 3485 // This is called when we believe that we're going to do an infrequent 3486 // operation which will increase the per byte scanned cost (i.e. move 3487 // entries to/from the global stack). It basically tries to decrease the 3488 // scanning limit so that the clock is called earlier. 3489 3490 if (_cm->verbose_medium()) { 3491 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 3492 } 3493 3494 _words_scanned_limit = _real_words_scanned_limit - 3495 3 * words_scanned_period / 4; 3496 _refs_reached_limit = _real_refs_reached_limit - 3497 3 * refs_reached_period / 4; 3498 } 3499 3500 void CMTask::move_entries_to_global_stack() { 3501 // local array where we'll store the entries that will be popped 3502 // from the local queue 3503 oop buffer[global_stack_transfer_size]; 3504 3505 int n = 0; 3506 oop obj; 3507 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3508 buffer[n] = obj; 3509 ++n; 3510 } 3511 3512 if (n > 0) { 3513 // we popped at least one entry from the local queue 3514 3515 statsOnly( ++_global_transfers_to; _local_pops += n ); 3516 3517 if (!_cm->mark_stack_push(buffer, n)) { 3518 if (_cm->verbose_low()) { 3519 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", 3520 _task_id); 3521 } 3522 set_has_aborted(); 3523 } else { 3524 // the transfer was successful 3525 3526 if (_cm->verbose_medium()) { 3527 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 3528 _task_id, n); 3529 } 3530 statsOnly( int tmp_size = _cm->mark_stack_size(); 3531 if (tmp_size > _global_max_size) { 3532 _global_max_size = tmp_size; 3533 } 3534 _global_pushes += n ); 3535 } 3536 } 3537 3538 // this operation was quite expensive, so decrease the limits 3539 decrease_limits(); 3540 } 3541 3542 void CMTask::get_entries_from_global_stack() { 3543 // local array where we'll store the entries that will be popped 3544 // from the global stack. 3545 oop buffer[global_stack_transfer_size]; 3546 int n; 3547 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3548 assert(n <= global_stack_transfer_size, 3549 "we should not pop more than the given limit"); 3550 if (n > 0) { 3551 // yes, we did actually pop at least one entry 3552 3553 statsOnly( ++_global_transfers_from; _global_pops += n ); 3554 if (_cm->verbose_medium()) { 3555 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 3556 _task_id, n); 3557 } 3558 for (int i = 0; i < n; ++i) { 3559 bool success = _task_queue->push(buffer[i]); 3560 // We only call this when the local queue is empty or under a 3561 // given target limit. So, we do not expect this push to fail. 3562 assert(success, "invariant"); 3563 } 3564 3565 statsOnly( int tmp_size = _task_queue->size(); 3566 if (tmp_size > _local_max_size) { 3567 _local_max_size = tmp_size; 3568 } 3569 _local_pushes += n ); 3570 } 3571 3572 // this operation was quite expensive, so decrease the limits 3573 decrease_limits(); 3574 } 3575 3576 void CMTask::drain_local_queue(bool partially) { 3577 if (has_aborted()) return; 3578 3579 // Decide what the target size is, depending whether we're going to 3580 // drain it partially (so that other tasks can steal if they run out 3581 // of things to do) or totally (at the very end). 3582 size_t target_size; 3583 if (partially) { 3584 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3585 } else { 3586 target_size = 0; 3587 } 3588 3589 if (_task_queue->size() > target_size) { 3590 if (_cm->verbose_high()) { 3591 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 3592 _task_id, target_size); 3593 } 3594 3595 oop obj; 3596 bool ret = _task_queue->pop_local(obj); 3597 while (ret) { 3598 statsOnly( ++_local_pops ); 3599 3600 if (_cm->verbose_high()) { 3601 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 3602 (void*) obj); 3603 } 3604 3605 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3606 assert(!_g1h->is_on_master_free_list( 3607 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3608 3609 scan_object(obj); 3610 3611 if (_task_queue->size() <= target_size || has_aborted()) { 3612 ret = false; 3613 } else { 3614 ret = _task_queue->pop_local(obj); 3615 } 3616 } 3617 3618 if (_cm->verbose_high()) { 3619 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 3620 _task_id, _task_queue->size()); 3621 } 3622 } 3623 } 3624 3625 void CMTask::drain_global_stack(bool partially) { 3626 if (has_aborted()) return; 3627 3628 // We have a policy to drain the local queue before we attempt to 3629 // drain the global stack. 3630 assert(partially || _task_queue->size() == 0, "invariant"); 3631 3632 // Decide what the target size is, depending whether we're going to 3633 // drain it partially (so that other tasks can steal if they run out 3634 // of things to do) or totally (at the very end). Notice that, 3635 // because we move entries from the global stack in chunks or 3636 // because another task might be doing the same, we might in fact 3637 // drop below the target. But, this is not a problem. 3638 size_t target_size; 3639 if (partially) { 3640 target_size = _cm->partial_mark_stack_size_target(); 3641 } else { 3642 target_size = 0; 3643 } 3644 3645 if (_cm->mark_stack_size() > target_size) { 3646 if (_cm->verbose_low()) { 3647 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 3648 _task_id, target_size); 3649 } 3650 3651 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3652 get_entries_from_global_stack(); 3653 drain_local_queue(partially); 3654 } 3655 3656 if (_cm->verbose_low()) { 3657 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 3658 _task_id, _cm->mark_stack_size()); 3659 } 3660 } 3661 } 3662 3663 // SATB Queue has several assumptions on whether to call the par or 3664 // non-par versions of the methods. this is why some of the code is 3665 // replicated. We should really get rid of the single-threaded version 3666 // of the code to simplify things. 3667 void CMTask::drain_satb_buffers() { 3668 if (has_aborted()) return; 3669 3670 // We set this so that the regular clock knows that we're in the 3671 // middle of draining buffers and doesn't set the abort flag when it 3672 // notices that SATB buffers are available for draining. It'd be 3673 // very counter productive if it did that. :-) 3674 _draining_satb_buffers = true; 3675 3676 CMObjectClosure oc(this); 3677 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3678 if (G1CollectedHeap::use_parallel_gc_threads()) { 3679 satb_mq_set.set_par_closure(_task_id, &oc); 3680 } else { 3681 satb_mq_set.set_closure(&oc); 3682 } 3683 3684 // This keeps claiming and applying the closure to completed buffers 3685 // until we run out of buffers or we need to abort. 3686 if (G1CollectedHeap::use_parallel_gc_threads()) { 3687 while (!has_aborted() && 3688 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 3689 if (_cm->verbose_medium()) { 3690 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3691 } 3692 statsOnly( ++_satb_buffers_processed ); 3693 regular_clock_call(); 3694 } 3695 } else { 3696 while (!has_aborted() && 3697 satb_mq_set.apply_closure_to_completed_buffer()) { 3698 if (_cm->verbose_medium()) { 3699 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3700 } 3701 statsOnly( ++_satb_buffers_processed ); 3702 regular_clock_call(); 3703 } 3704 } 3705 3706 if (!concurrent() && !has_aborted()) { 3707 // We should only do this during remark. 3708 if (G1CollectedHeap::use_parallel_gc_threads()) { 3709 satb_mq_set.par_iterate_closure_all_threads(_task_id); 3710 } else { 3711 satb_mq_set.iterate_closure_all_threads(); 3712 } 3713 } 3714 3715 _draining_satb_buffers = false; 3716 3717 assert(has_aborted() || 3718 concurrent() || 3719 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3720 3721 if (G1CollectedHeap::use_parallel_gc_threads()) { 3722 satb_mq_set.set_par_closure(_task_id, NULL); 3723 } else { 3724 satb_mq_set.set_closure(NULL); 3725 } 3726 3727 // again, this was a potentially expensive operation, decrease the 3728 // limits to get the regular clock call early 3729 decrease_limits(); 3730 } 3731 3732 void CMTask::print_stats() { 3733 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 3734 _task_id, _calls); 3735 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3736 _elapsed_time_ms, _termination_time_ms); 3737 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3738 _step_times_ms.num(), _step_times_ms.avg(), 3739 _step_times_ms.sd()); 3740 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3741 _step_times_ms.maximum(), _step_times_ms.sum()); 3742 3743 #if _MARKING_STATS_ 3744 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3745 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3746 _all_clock_intervals_ms.sd()); 3747 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3748 _all_clock_intervals_ms.maximum(), 3749 _all_clock_intervals_ms.sum()); 3750 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3751 _clock_due_to_scanning, _clock_due_to_marking); 3752 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3753 _objs_scanned, _objs_found_on_bitmap); 3754 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3755 _local_pushes, _local_pops, _local_max_size); 3756 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3757 _global_pushes, _global_pops, _global_max_size); 3758 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3759 _global_transfers_to,_global_transfers_from); 3760 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 3761 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3762 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3763 _steal_attempts, _steals); 3764 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3765 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3766 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3767 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3768 _aborted_timed_out, _aborted_satb, _aborted_termination); 3769 #endif // _MARKING_STATS_ 3770 } 3771 3772 /***************************************************************************** 3773 3774 The do_marking_step(time_target_ms, ...) method is the building 3775 block of the parallel marking framework. It can be called in parallel 3776 with other invocations of do_marking_step() on different tasks 3777 (but only one per task, obviously) and concurrently with the 3778 mutator threads, or during remark, hence it eliminates the need 3779 for two versions of the code. When called during remark, it will 3780 pick up from where the task left off during the concurrent marking 3781 phase. Interestingly, tasks are also claimable during evacuation 3782 pauses too, since do_marking_step() ensures that it aborts before 3783 it needs to yield. 3784 3785 The data structures that it uses to do marking work are the 3786 following: 3787 3788 (1) Marking Bitmap. If there are gray objects that appear only 3789 on the bitmap (this happens either when dealing with an overflow 3790 or when the initial marking phase has simply marked the roots 3791 and didn't push them on the stack), then tasks claim heap 3792 regions whose bitmap they then scan to find gray objects. A 3793 global finger indicates where the end of the last claimed region 3794 is. A local finger indicates how far into the region a task has 3795 scanned. The two fingers are used to determine how to gray an 3796 object (i.e. whether simply marking it is OK, as it will be 3797 visited by a task in the future, or whether it needs to be also 3798 pushed on a stack). 3799 3800 (2) Local Queue. The local queue of the task which is accessed 3801 reasonably efficiently by the task. Other tasks can steal from 3802 it when they run out of work. Throughout the marking phase, a 3803 task attempts to keep its local queue short but not totally 3804 empty, so that entries are available for stealing by other 3805 tasks. Only when there is no more work, a task will totally 3806 drain its local queue. 3807 3808 (3) Global Mark Stack. This handles local queue overflow. During 3809 marking only sets of entries are moved between it and the local 3810 queues, as access to it requires a mutex and more fine-grain 3811 interaction with it which might cause contention. If it 3812 overflows, then the marking phase should restart and iterate 3813 over the bitmap to identify gray objects. Throughout the marking 3814 phase, tasks attempt to keep the global mark stack at a small 3815 length but not totally empty, so that entries are available for 3816 popping by other tasks. Only when there is no more work, tasks 3817 will totally drain the global mark stack. 3818 3819 (4) SATB Buffer Queue. This is where completed SATB buffers are 3820 made available. Buffers are regularly removed from this queue 3821 and scanned for roots, so that the queue doesn't get too 3822 long. During remark, all completed buffers are processed, as 3823 well as the filled in parts of any uncompleted buffers. 3824 3825 The do_marking_step() method tries to abort when the time target 3826 has been reached. There are a few other cases when the 3827 do_marking_step() method also aborts: 3828 3829 (1) When the marking phase has been aborted (after a Full GC). 3830 3831 (2) When a global overflow (on the global stack) has been 3832 triggered. Before the task aborts, it will actually sync up with 3833 the other tasks to ensure that all the marking data structures 3834 (local queues, stacks, fingers etc.) are re-initialised so that 3835 when do_marking_step() completes, the marking phase can 3836 immediately restart. 3837 3838 (3) When enough completed SATB buffers are available. The 3839 do_marking_step() method only tries to drain SATB buffers right 3840 at the beginning. So, if enough buffers are available, the 3841 marking step aborts and the SATB buffers are processed at 3842 the beginning of the next invocation. 3843 3844 (4) To yield. when we have to yield then we abort and yield 3845 right at the end of do_marking_step(). This saves us from a lot 3846 of hassle as, by yielding we might allow a Full GC. If this 3847 happens then objects will be compacted underneath our feet, the 3848 heap might shrink, etc. We save checking for this by just 3849 aborting and doing the yield right at the end. 3850 3851 From the above it follows that the do_marking_step() method should 3852 be called in a loop (or, otherwise, regularly) until it completes. 3853 3854 If a marking step completes without its has_aborted() flag being 3855 true, it means it has completed the current marking phase (and 3856 also all other marking tasks have done so and have all synced up). 3857 3858 A method called regular_clock_call() is invoked "regularly" (in 3859 sub ms intervals) throughout marking. It is this clock method that 3860 checks all the abort conditions which were mentioned above and 3861 decides when the task should abort. A work-based scheme is used to 3862 trigger this clock method: when the number of object words the 3863 marking phase has scanned or the number of references the marking 3864 phase has visited reach a given limit. Additional invocations to 3865 the method clock have been planted in a few other strategic places 3866 too. The initial reason for the clock method was to avoid calling 3867 vtime too regularly, as it is quite expensive. So, once it was in 3868 place, it was natural to piggy-back all the other conditions on it 3869 too and not constantly check them throughout the code. 3870 3871 If do_termination is true then do_marking_step will enter its 3872 termination protocol. 3873 3874 The value of is_serial must be true when do_marking_step is being 3875 called serially (i.e. by the VMThread) and do_marking_step should 3876 skip any synchronization in the termination and overflow code. 3877 Examples include the serial remark code and the serial reference 3878 processing closures. 3879 3880 The value of is_serial must be false when do_marking_step is 3881 being called by any of the worker threads in a work gang. 3882 Examples include the concurrent marking code (CMMarkingTask), 3883 the MT remark code, and the MT reference processing closures. 3884 3885 *****************************************************************************/ 3886 3887 void CMTask::do_marking_step(double time_target_ms, 3888 bool do_termination, 3889 bool is_serial) { 3890 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3891 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3892 3893 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3894 assert(_task_queues != NULL, "invariant"); 3895 assert(_task_queue != NULL, "invariant"); 3896 assert(_task_queues->queue(_task_id) == _task_queue, "invariant"); 3897 3898 assert(!_claimed, 3899 "only one thread should claim this task at any one time"); 3900 3901 // OK, this doesn't safeguard again all possible scenarios, as it is 3902 // possible for two threads to set the _claimed flag at the same 3903 // time. But it is only for debugging purposes anyway and it will 3904 // catch most problems. 3905 _claimed = true; 3906 3907 _start_time_ms = os::elapsedVTime() * 1000.0; 3908 statsOnly( _interval_start_time_ms = _start_time_ms ); 3909 3910 // If do_stealing is true then do_marking_step will attempt to 3911 // steal work from the other CMTasks. It only makes sense to 3912 // enable stealing when the termination protocol is enabled 3913 // and do_marking_step() is not being called serially. 3914 bool do_stealing = do_termination && !is_serial; 3915 3916 double diff_prediction_ms = 3917 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 3918 _time_target_ms = time_target_ms - diff_prediction_ms; 3919 3920 // set up the variables that are used in the work-based scheme to 3921 // call the regular clock method 3922 _words_scanned = 0; 3923 _refs_reached = 0; 3924 recalculate_limits(); 3925 3926 // clear all flags 3927 clear_has_aborted(); 3928 _has_timed_out = false; 3929 _draining_satb_buffers = false; 3930 3931 ++_calls; 3932 3933 if (_cm->verbose_low()) { 3934 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 3935 "target = %1.2lfms >>>>>>>>>>", 3936 _task_id, _calls, _time_target_ms); 3937 } 3938 3939 // Set up the bitmap and oop closures. Anything that uses them is 3940 // eventually called from this method, so it is OK to allocate these 3941 // statically. 3942 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3943 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3944 set_cm_oop_closure(&cm_oop_closure); 3945 3946 if (_cm->has_overflown()) { 3947 // This can happen if the mark stack overflows during a GC pause 3948 // and this task, after a yield point, restarts. We have to abort 3949 // as we need to get into the overflow protocol which happens 3950 // right at the end of this task. 3951 set_has_aborted(); 3952 } 3953 3954 // First drain any available SATB buffers. After this, we will not 3955 // look at SATB buffers before the next invocation of this method. 3956 // If enough completed SATB buffers are queued up, the regular clock 3957 // will abort this task so that it restarts. 3958 drain_satb_buffers(); 3959 // ...then partially drain the local queue and the global stack 3960 drain_local_queue(true); 3961 drain_global_stack(true); 3962 3963 do { 3964 if (!has_aborted() && _curr_region != NULL) { 3965 // This means that we're already holding on to a region. 3966 assert(_finger != NULL, "if region is not NULL, then the finger " 3967 "should not be NULL either"); 3968 3969 // We might have restarted this task after an evacuation pause 3970 // which might have evacuated the region we're holding on to 3971 // underneath our feet. Let's read its limit again to make sure 3972 // that we do not iterate over a region of the heap that 3973 // contains garbage (update_region_limit() will also move 3974 // _finger to the start of the region if it is found empty). 3975 update_region_limit(); 3976 // We will start from _finger not from the start of the region, 3977 // as we might be restarting this task after aborting half-way 3978 // through scanning this region. In this case, _finger points to 3979 // the address where we last found a marked object. If this is a 3980 // fresh region, _finger points to start(). 3981 MemRegion mr = MemRegion(_finger, _region_limit); 3982 3983 if (_cm->verbose_low()) { 3984 gclog_or_tty->print_cr("[%d] we're scanning part " 3985 "["PTR_FORMAT", "PTR_FORMAT") " 3986 "of region "PTR_FORMAT, 3987 _task_id, _finger, _region_limit, _curr_region); 3988 } 3989 3990 // Let's iterate over the bitmap of the part of the 3991 // region that is left. 3992 if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3993 // We successfully completed iterating over the region. Now, 3994 // let's give up the region. 3995 giveup_current_region(); 3996 regular_clock_call(); 3997 } else { 3998 assert(has_aborted(), "currently the only way to do so"); 3999 // The only way to abort the bitmap iteration is to return 4000 // false from the do_bit() method. However, inside the 4001 // do_bit() method we move the _finger to point to the 4002 // object currently being looked at. So, if we bail out, we 4003 // have definitely set _finger to something non-null. 4004 assert(_finger != NULL, "invariant"); 4005 4006 // Region iteration was actually aborted. So now _finger 4007 // points to the address of the object we last scanned. If we 4008 // leave it there, when we restart this task, we will rescan 4009 // the object. It is easy to avoid this. We move the finger by 4010 // enough to point to the next possible object header (the 4011 // bitmap knows by how much we need to move it as it knows its 4012 // granularity). 4013 assert(_finger < _region_limit, "invariant"); 4014 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 4015 // Check if bitmap iteration was aborted while scanning the last object 4016 if (new_finger >= _region_limit) { 4017 giveup_current_region(); 4018 } else { 4019 move_finger_to(new_finger); 4020 } 4021 } 4022 } 4023 // At this point we have either completed iterating over the 4024 // region we were holding on to, or we have aborted. 4025 4026 // We then partially drain the local queue and the global stack. 4027 // (Do we really need this?) 4028 drain_local_queue(true); 4029 drain_global_stack(true); 4030 4031 // Read the note on the claim_region() method on why it might 4032 // return NULL with potentially more regions available for 4033 // claiming and why we have to check out_of_regions() to determine 4034 // whether we're done or not. 4035 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4036 // We are going to try to claim a new region. We should have 4037 // given up on the previous one. 4038 // Separated the asserts so that we know which one fires. 4039 assert(_curr_region == NULL, "invariant"); 4040 assert(_finger == NULL, "invariant"); 4041 assert(_region_limit == NULL, "invariant"); 4042 if (_cm->verbose_low()) { 4043 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 4044 } 4045 HeapRegion* claimed_region = _cm->claim_region(_task_id); 4046 if (claimed_region != NULL) { 4047 // Yes, we managed to claim one 4048 statsOnly( ++_regions_claimed ); 4049 4050 if (_cm->verbose_low()) { 4051 gclog_or_tty->print_cr("[%d] we successfully claimed " 4052 "region "PTR_FORMAT, 4053 _task_id, claimed_region); 4054 } 4055 4056 setup_for_region(claimed_region); 4057 assert(_curr_region == claimed_region, "invariant"); 4058 } 4059 // It is important to call the regular clock here. It might take 4060 // a while to claim a region if, for example, we hit a large 4061 // block of empty regions. So we need to call the regular clock 4062 // method once round the loop to make sure it's called 4063 // frequently enough. 4064 regular_clock_call(); 4065 } 4066 4067 if (!has_aborted() && _curr_region == NULL) { 4068 assert(_cm->out_of_regions(), 4069 "at this point we should be out of regions"); 4070 } 4071 } while ( _curr_region != NULL && !has_aborted()); 4072 4073 if (!has_aborted()) { 4074 // We cannot check whether the global stack is empty, since other 4075 // tasks might be pushing objects to it concurrently. 4076 assert(_cm->out_of_regions(), 4077 "at this point we should be out of regions"); 4078 4079 if (_cm->verbose_low()) { 4080 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 4081 } 4082 4083 // Try to reduce the number of available SATB buffers so that 4084 // remark has less work to do. 4085 drain_satb_buffers(); 4086 } 4087 4088 // Since we've done everything else, we can now totally drain the 4089 // local queue and global stack. 4090 drain_local_queue(false); 4091 drain_global_stack(false); 4092 4093 // Attempt at work stealing from other task's queues. 4094 if (do_stealing && !has_aborted()) { 4095 // We have not aborted. This means that we have finished all that 4096 // we could. Let's try to do some stealing... 4097 4098 // We cannot check whether the global stack is empty, since other 4099 // tasks might be pushing objects to it concurrently. 4100 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4101 "only way to reach here"); 4102 4103 if (_cm->verbose_low()) { 4104 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 4105 } 4106 4107 while (!has_aborted()) { 4108 oop obj; 4109 statsOnly( ++_steal_attempts ); 4110 4111 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 4112 if (_cm->verbose_medium()) { 4113 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 4114 _task_id, (void*) obj); 4115 } 4116 4117 statsOnly( ++_steals ); 4118 4119 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4120 "any stolen object should be marked"); 4121 scan_object(obj); 4122 4123 // And since we're towards the end, let's totally drain the 4124 // local queue and global stack. 4125 drain_local_queue(false); 4126 drain_global_stack(false); 4127 } else { 4128 break; 4129 } 4130 } 4131 } 4132 4133 // If we are about to wrap up and go into termination, check if we 4134 // should raise the overflow flag. 4135 if (do_termination && !has_aborted()) { 4136 if (_cm->force_overflow()->should_force()) { 4137 _cm->set_has_overflown(); 4138 regular_clock_call(); 4139 } 4140 } 4141 4142 // We still haven't aborted. Now, let's try to get into the 4143 // termination protocol. 4144 if (do_termination && !has_aborted()) { 4145 // We cannot check whether the global stack is empty, since other 4146 // tasks might be concurrently pushing objects on it. 4147 // Separated the asserts so that we know which one fires. 4148 assert(_cm->out_of_regions(), "only way to reach here"); 4149 assert(_task_queue->size() == 0, "only way to reach here"); 4150 4151 if (_cm->verbose_low()) { 4152 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 4153 } 4154 4155 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4156 4157 // The CMTask class also extends the TerminatorTerminator class, 4158 // hence its should_exit_termination() method will also decide 4159 // whether to exit the termination protocol or not. 4160 bool finished = (is_serial || 4161 _cm->terminator()->offer_termination(this)); 4162 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4163 _termination_time_ms += 4164 termination_end_time_ms - _termination_start_time_ms; 4165 4166 if (finished) { 4167 // We're all done. 4168 4169 if (_task_id == 0) { 4170 // let's allow task 0 to do this 4171 if (concurrent()) { 4172 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4173 // we need to set this to false before the next 4174 // safepoint. This way we ensure that the marking phase 4175 // doesn't observe any more heap expansions. 4176 _cm->clear_concurrent_marking_in_progress(); 4177 } 4178 } 4179 4180 // We can now guarantee that the global stack is empty, since 4181 // all other tasks have finished. We separated the guarantees so 4182 // that, if a condition is false, we can immediately find out 4183 // which one. 4184 guarantee(_cm->out_of_regions(), "only way to reach here"); 4185 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4186 guarantee(_task_queue->size() == 0, "only way to reach here"); 4187 guarantee(!_cm->has_overflown(), "only way to reach here"); 4188 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4189 4190 if (_cm->verbose_low()) { 4191 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 4192 } 4193 } else { 4194 // Apparently there's more work to do. Let's abort this task. It 4195 // will restart it and we can hopefully find more things to do. 4196 4197 if (_cm->verbose_low()) { 4198 gclog_or_tty->print_cr("[%d] apparently there is more work to do", 4199 _task_id); 4200 } 4201 4202 set_has_aborted(); 4203 statsOnly( ++_aborted_termination ); 4204 } 4205 } 4206 4207 // Mainly for debugging purposes to make sure that a pointer to the 4208 // closure which was statically allocated in this frame doesn't 4209 // escape it by accident. 4210 set_cm_oop_closure(NULL); 4211 double end_time_ms = os::elapsedVTime() * 1000.0; 4212 double elapsed_time_ms = end_time_ms - _start_time_ms; 4213 // Update the step history. 4214 _step_times_ms.add(elapsed_time_ms); 4215 4216 if (has_aborted()) { 4217 // The task was aborted for some reason. 4218 4219 statsOnly( ++_aborted ); 4220 4221 if (_has_timed_out) { 4222 double diff_ms = elapsed_time_ms - _time_target_ms; 4223 // Keep statistics of how well we did with respect to hitting 4224 // our target only if we actually timed out (if we aborted for 4225 // other reasons, then the results might get skewed). 4226 _marking_step_diffs_ms.add(diff_ms); 4227 } 4228 4229 if (_cm->has_overflown()) { 4230 // This is the interesting one. We aborted because a global 4231 // overflow was raised. This means we have to restart the 4232 // marking phase and start iterating over regions. However, in 4233 // order to do this we have to make sure that all tasks stop 4234 // what they are doing and re-initialise in a safe manner. We 4235 // will achieve this with the use of two barrier sync points. 4236 4237 if (_cm->verbose_low()) { 4238 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 4239 } 4240 4241 if (!is_serial) { 4242 // We only need to enter the sync barrier if being called 4243 // from a parallel context 4244 _cm->enter_first_sync_barrier(_task_id); 4245 4246 // When we exit this sync barrier we know that all tasks have 4247 // stopped doing marking work. So, it's now safe to 4248 // re-initialise our data structures. At the end of this method, 4249 // task 0 will clear the global data structures. 4250 } 4251 4252 statsOnly( ++_aborted_overflow ); 4253 4254 // We clear the local state of this task... 4255 clear_region_fields(); 4256 4257 if (!is_serial) { 4258 // ...and enter the second barrier. 4259 _cm->enter_second_sync_barrier(_task_id); 4260 } 4261 // At this point everything has bee re-initialised and we're 4262 // ready to restart. 4263 } 4264 4265 if (_cm->verbose_low()) { 4266 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4267 "elapsed = %1.2lfms <<<<<<<<<<", 4268 _task_id, _time_target_ms, elapsed_time_ms); 4269 if (_cm->has_aborted()) { 4270 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 4271 _task_id); 4272 } 4273 } 4274 } else { 4275 if (_cm->verbose_low()) { 4276 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4277 "elapsed = %1.2lfms <<<<<<<<<<", 4278 _task_id, _time_target_ms, elapsed_time_ms); 4279 } 4280 } 4281 4282 _claimed = false; 4283 } 4284 4285 CMTask::CMTask(int task_id, 4286 ConcurrentMark* cm, 4287 size_t* marked_bytes, 4288 BitMap* card_bm, 4289 CMTaskQueue* task_queue, 4290 CMTaskQueueSet* task_queues) 4291 : _g1h(G1CollectedHeap::heap()), 4292 _task_id(task_id), _cm(cm), 4293 _claimed(false), 4294 _nextMarkBitMap(NULL), _hash_seed(17), 4295 _task_queue(task_queue), 4296 _task_queues(task_queues), 4297 _cm_oop_closure(NULL), 4298 _marked_bytes_array(marked_bytes), 4299 _card_bm(card_bm) { 4300 guarantee(task_queue != NULL, "invariant"); 4301 guarantee(task_queues != NULL, "invariant"); 4302 4303 statsOnly( _clock_due_to_scanning = 0; 4304 _clock_due_to_marking = 0 ); 4305 4306 _marking_step_diffs_ms.add(0.5); 4307 } 4308 4309 // These are formatting macros that are used below to ensure 4310 // consistent formatting. The *_H_* versions are used to format the 4311 // header for a particular value and they should be kept consistent 4312 // with the corresponding macro. Also note that most of the macros add 4313 // the necessary white space (as a prefix) which makes them a bit 4314 // easier to compose. 4315 4316 // All the output lines are prefixed with this string to be able to 4317 // identify them easily in a large log file. 4318 #define G1PPRL_LINE_PREFIX "###" 4319 4320 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4321 #ifdef _LP64 4322 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4323 #else // _LP64 4324 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4325 #endif // _LP64 4326 4327 // For per-region info 4328 #define G1PPRL_TYPE_FORMAT " %-4s" 4329 #define G1PPRL_TYPE_H_FORMAT " %4s" 4330 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4331 #define G1PPRL_BYTE_H_FORMAT " %9s" 4332 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4333 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4334 4335 // For summary info 4336 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4337 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4338 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4339 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4340 4341 G1PrintRegionLivenessInfoClosure:: 4342 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4343 : _out(out), 4344 _total_used_bytes(0), _total_capacity_bytes(0), 4345 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4346 _hum_used_bytes(0), _hum_capacity_bytes(0), 4347 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) { 4348 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4349 MemRegion g1_committed = g1h->g1_committed(); 4350 MemRegion g1_reserved = g1h->g1_reserved(); 4351 double now = os::elapsedTime(); 4352 4353 // Print the header of the output. 4354 _out->cr(); 4355 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4356 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4357 G1PPRL_SUM_ADDR_FORMAT("committed") 4358 G1PPRL_SUM_ADDR_FORMAT("reserved") 4359 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4360 g1_committed.start(), g1_committed.end(), 4361 g1_reserved.start(), g1_reserved.end(), 4362 HeapRegion::GrainBytes); 4363 _out->print_cr(G1PPRL_LINE_PREFIX); 4364 _out->print_cr(G1PPRL_LINE_PREFIX 4365 G1PPRL_TYPE_H_FORMAT 4366 G1PPRL_ADDR_BASE_H_FORMAT 4367 G1PPRL_BYTE_H_FORMAT 4368 G1PPRL_BYTE_H_FORMAT 4369 G1PPRL_BYTE_H_FORMAT 4370 G1PPRL_DOUBLE_H_FORMAT, 4371 "type", "address-range", 4372 "used", "prev-live", "next-live", "gc-eff"); 4373 _out->print_cr(G1PPRL_LINE_PREFIX 4374 G1PPRL_TYPE_H_FORMAT 4375 G1PPRL_ADDR_BASE_H_FORMAT 4376 G1PPRL_BYTE_H_FORMAT 4377 G1PPRL_BYTE_H_FORMAT 4378 G1PPRL_BYTE_H_FORMAT 4379 G1PPRL_DOUBLE_H_FORMAT, 4380 "", "", 4381 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)"); 4382 } 4383 4384 // It takes as a parameter a reference to one of the _hum_* fields, it 4385 // deduces the corresponding value for a region in a humongous region 4386 // series (either the region size, or what's left if the _hum_* field 4387 // is < the region size), and updates the _hum_* field accordingly. 4388 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4389 size_t bytes = 0; 4390 // The > 0 check is to deal with the prev and next live bytes which 4391 // could be 0. 4392 if (*hum_bytes > 0) { 4393 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4394 *hum_bytes -= bytes; 4395 } 4396 return bytes; 4397 } 4398 4399 // It deduces the values for a region in a humongous region series 4400 // from the _hum_* fields and updates those accordingly. It assumes 4401 // that that _hum_* fields have already been set up from the "starts 4402 // humongous" region and we visit the regions in address order. 4403 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4404 size_t* capacity_bytes, 4405 size_t* prev_live_bytes, 4406 size_t* next_live_bytes) { 4407 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4408 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4409 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4410 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4411 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4412 } 4413 4414 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4415 const char* type = ""; 4416 HeapWord* bottom = r->bottom(); 4417 HeapWord* end = r->end(); 4418 size_t capacity_bytes = r->capacity(); 4419 size_t used_bytes = r->used(); 4420 size_t prev_live_bytes = r->live_bytes(); 4421 size_t next_live_bytes = r->next_live_bytes(); 4422 double gc_eff = r->gc_efficiency(); 4423 if (r->used() == 0) { 4424 type = "FREE"; 4425 } else if (r->is_survivor()) { 4426 type = "SURV"; 4427 } else if (r->is_young()) { 4428 type = "EDEN"; 4429 } else if (r->startsHumongous()) { 4430 type = "HUMS"; 4431 4432 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4433 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4434 "they should have been zeroed after the last time we used them"); 4435 // Set up the _hum_* fields. 4436 _hum_capacity_bytes = capacity_bytes; 4437 _hum_used_bytes = used_bytes; 4438 _hum_prev_live_bytes = prev_live_bytes; 4439 _hum_next_live_bytes = next_live_bytes; 4440 get_hum_bytes(&used_bytes, &capacity_bytes, 4441 &prev_live_bytes, &next_live_bytes); 4442 end = bottom + HeapRegion::GrainWords; 4443 } else if (r->continuesHumongous()) { 4444 type = "HUMC"; 4445 get_hum_bytes(&used_bytes, &capacity_bytes, 4446 &prev_live_bytes, &next_live_bytes); 4447 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4448 } else { 4449 type = "OLD"; 4450 } 4451 4452 _total_used_bytes += used_bytes; 4453 _total_capacity_bytes += capacity_bytes; 4454 _total_prev_live_bytes += prev_live_bytes; 4455 _total_next_live_bytes += next_live_bytes; 4456 4457 // Print a line for this particular region. 4458 _out->print_cr(G1PPRL_LINE_PREFIX 4459 G1PPRL_TYPE_FORMAT 4460 G1PPRL_ADDR_BASE_FORMAT 4461 G1PPRL_BYTE_FORMAT 4462 G1PPRL_BYTE_FORMAT 4463 G1PPRL_BYTE_FORMAT 4464 G1PPRL_DOUBLE_FORMAT, 4465 type, bottom, end, 4466 used_bytes, prev_live_bytes, next_live_bytes, gc_eff); 4467 4468 return false; 4469 } 4470 4471 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4472 // Print the footer of the output. 4473 _out->print_cr(G1PPRL_LINE_PREFIX); 4474 _out->print_cr(G1PPRL_LINE_PREFIX 4475 " SUMMARY" 4476 G1PPRL_SUM_MB_FORMAT("capacity") 4477 G1PPRL_SUM_MB_PERC_FORMAT("used") 4478 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4479 G1PPRL_SUM_MB_PERC_FORMAT("next-live"), 4480 bytes_to_mb(_total_capacity_bytes), 4481 bytes_to_mb(_total_used_bytes), 4482 perc(_total_used_bytes, _total_capacity_bytes), 4483 bytes_to_mb(_total_prev_live_bytes), 4484 perc(_total_prev_live_bytes, _total_capacity_bytes), 4485 bytes_to_mb(_total_next_live_bytes), 4486 perc(_total_next_live_bytes, _total_capacity_bytes)); 4487 _out->cr(); 4488 }