1 /* 2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 32 #include "gc_implementation/g1/g1Log.hpp" 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 34 #include "gc_implementation/g1/g1RemSet.hpp" 35 #include "gc_implementation/g1/heapRegion.inline.hpp" 36 #include "gc_implementation/g1/heapRegionRemSet.hpp" 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 38 #include "gc_implementation/g1/stringDedup.hpp" 39 #include "gc_implementation/shared/vmGCOperations.hpp" 40 #include "gc_implementation/shared/gcTimer.hpp" 41 #include "gc_implementation/shared/gcTrace.hpp" 42 #include "gc_implementation/shared/gcTraceTime.hpp" 43 #include "memory/genOopClosures.inline.hpp" 44 #include "memory/referencePolicy.hpp" 45 #include "memory/resourceArea.hpp" 46 #include "oops/oop.inline.hpp" 47 #include "runtime/handles.inline.hpp" 48 #include "runtime/java.hpp" 49 #include "services/memTracker.hpp" 50 51 // Concurrent marking bit map wrapper 52 53 CMBitMapRO::CMBitMapRO(int shifter) : 54 _bm(), 55 _shifter(shifter) { 56 _bmStartWord = 0; 57 _bmWordSize = 0; 58 } 59 60 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 61 HeapWord* limit) const { 62 // First we must round addr *up* to a possible object boundary. 63 addr = (HeapWord*)align_size_up((intptr_t)addr, 64 HeapWordSize << _shifter); 65 size_t addrOffset = heapWordToOffset(addr); 66 if (limit == NULL) { 67 limit = _bmStartWord + _bmWordSize; 68 } 69 size_t limitOffset = heapWordToOffset(limit); 70 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 71 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 72 assert(nextAddr >= addr, "get_next_one postcondition"); 73 assert(nextAddr == limit || isMarked(nextAddr), 74 "get_next_one postcondition"); 75 return nextAddr; 76 } 77 78 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 79 HeapWord* limit) const { 80 size_t addrOffset = heapWordToOffset(addr); 81 if (limit == NULL) { 82 limit = _bmStartWord + _bmWordSize; 83 } 84 size_t limitOffset = heapWordToOffset(limit); 85 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 86 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 87 assert(nextAddr >= addr, "get_next_one postcondition"); 88 assert(nextAddr == limit || !isMarked(nextAddr), 89 "get_next_one postcondition"); 90 return nextAddr; 91 } 92 93 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 94 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 95 return (int) (diff >> _shifter); 96 } 97 98 #ifndef PRODUCT 99 bool CMBitMapRO::covers(ReservedSpace heap_rs) const { 100 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 101 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 102 "size inconsistency"); 103 return _bmStartWord == (HeapWord*)(heap_rs.base()) && 104 _bmWordSize == heap_rs.size()>>LogHeapWordSize; 105 } 106 #endif 107 108 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 109 _bm.print_on_error(st, prefix); 110 } 111 112 bool CMBitMap::allocate(ReservedSpace heap_rs) { 113 _bmStartWord = (HeapWord*)(heap_rs.base()); 114 _bmWordSize = heap_rs.size()/HeapWordSize; // heap_rs.size() is in bytes 115 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 116 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 117 if (!brs.is_reserved()) { 118 warning("ConcurrentMark marking bit map allocation failure"); 119 return false; 120 } 121 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC); 122 // For now we'll just commit all of the bit map up front. 123 // Later on we'll try to be more parsimonious with swap. 124 if (!_virtual_space.initialize(brs, brs.size())) { 125 warning("ConcurrentMark marking bit map backing store failure"); 126 return false; 127 } 128 assert(_virtual_space.committed_size() == brs.size(), 129 "didn't reserve backing store for all of concurrent marking bit map?"); 130 _bm.set_map((uintptr_t*)_virtual_space.low()); 131 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 132 _bmWordSize, "inconsistency in bit map sizing"); 133 _bm.set_size(_bmWordSize >> _shifter); 134 return true; 135 } 136 137 void CMBitMap::clearAll() { 138 _bm.clear(); 139 return; 140 } 141 142 void CMBitMap::markRange(MemRegion mr) { 143 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 144 assert(!mr.is_empty(), "unexpected empty region"); 145 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 146 ((HeapWord *) mr.end())), 147 "markRange memory region end is not card aligned"); 148 // convert address range into offset range 149 _bm.at_put_range(heapWordToOffset(mr.start()), 150 heapWordToOffset(mr.end()), true); 151 } 152 153 void CMBitMap::clearRange(MemRegion mr) { 154 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 155 assert(!mr.is_empty(), "unexpected empty region"); 156 // convert address range into offset range 157 _bm.at_put_range(heapWordToOffset(mr.start()), 158 heapWordToOffset(mr.end()), false); 159 } 160 161 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 162 HeapWord* end_addr) { 163 HeapWord* start = getNextMarkedWordAddress(addr); 164 start = MIN2(start, end_addr); 165 HeapWord* end = getNextUnmarkedWordAddress(start); 166 end = MIN2(end, end_addr); 167 assert(start <= end, "Consistency check"); 168 MemRegion mr(start, end); 169 if (!mr.is_empty()) { 170 clearRange(mr); 171 } 172 return mr; 173 } 174 175 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 176 _base(NULL), _cm(cm) 177 #ifdef ASSERT 178 , _drain_in_progress(false) 179 , _drain_in_progress_yields(false) 180 #endif 181 {} 182 183 bool CMMarkStack::allocate(size_t capacity) { 184 // allocate a stack of the requisite depth 185 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 186 if (!rs.is_reserved()) { 187 warning("ConcurrentMark MarkStack allocation failure"); 188 return false; 189 } 190 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 191 if (!_virtual_space.initialize(rs, rs.size())) { 192 warning("ConcurrentMark MarkStack backing store failure"); 193 // Release the virtual memory reserved for the marking stack 194 rs.release(); 195 return false; 196 } 197 assert(_virtual_space.committed_size() == rs.size(), 198 "Didn't reserve backing store for all of ConcurrentMark stack?"); 199 _base = (oop*) _virtual_space.low(); 200 setEmpty(); 201 _capacity = (jint) capacity; 202 _saved_index = -1; 203 _should_expand = false; 204 NOT_PRODUCT(_max_depth = 0); 205 return true; 206 } 207 208 void CMMarkStack::expand() { 209 // Called, during remark, if we've overflown the marking stack during marking. 210 assert(isEmpty(), "stack should been emptied while handling overflow"); 211 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 212 // Clear expansion flag 213 _should_expand = false; 214 if (_capacity == (jint) MarkStackSizeMax) { 215 if (PrintGCDetails && Verbose) { 216 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 217 } 218 return; 219 } 220 // Double capacity if possible 221 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 222 // Do not give up existing stack until we have managed to 223 // get the double capacity that we desired. 224 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 225 sizeof(oop))); 226 if (rs.is_reserved()) { 227 // Release the backing store associated with old stack 228 _virtual_space.release(); 229 // Reinitialize virtual space for new stack 230 if (!_virtual_space.initialize(rs, rs.size())) { 231 fatal("Not enough swap for expanded marking stack capacity"); 232 } 233 _base = (oop*)(_virtual_space.low()); 234 _index = 0; 235 _capacity = new_capacity; 236 } else { 237 if (PrintGCDetails && Verbose) { 238 // Failed to double capacity, continue; 239 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 240 SIZE_FORMAT"K to " SIZE_FORMAT"K", 241 _capacity / K, new_capacity / K); 242 } 243 } 244 } 245 246 void CMMarkStack::set_should_expand() { 247 // If we're resetting the marking state because of an 248 // marking stack overflow, record that we should, if 249 // possible, expand the stack. 250 _should_expand = _cm->has_overflown(); 251 } 252 253 CMMarkStack::~CMMarkStack() { 254 if (_base != NULL) { 255 _base = NULL; 256 _virtual_space.release(); 257 } 258 } 259 260 void CMMarkStack::par_push(oop ptr) { 261 while (true) { 262 if (isFull()) { 263 _overflow = true; 264 return; 265 } 266 // Otherwise... 267 jint index = _index; 268 jint next_index = index+1; 269 jint res = Atomic::cmpxchg(next_index, &_index, index); 270 if (res == index) { 271 _base[index] = ptr; 272 // Note that we don't maintain this atomically. We could, but it 273 // doesn't seem necessary. 274 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 275 return; 276 } 277 // Otherwise, we need to try again. 278 } 279 } 280 281 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 282 while (true) { 283 if (isFull()) { 284 _overflow = true; 285 return; 286 } 287 // Otherwise... 288 jint index = _index; 289 jint next_index = index + n; 290 if (next_index > _capacity) { 291 _overflow = true; 292 return; 293 } 294 jint res = Atomic::cmpxchg(next_index, &_index, index); 295 if (res == index) { 296 for (int i = 0; i < n; i++) { 297 int ind = index + i; 298 assert(ind < _capacity, "By overflow test above."); 299 _base[ind] = ptr_arr[i]; 300 } 301 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 302 return; 303 } 304 // Otherwise, we need to try again. 305 } 306 } 307 308 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 309 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 310 jint start = _index; 311 jint next_index = start + n; 312 if (next_index > _capacity) { 313 _overflow = true; 314 return; 315 } 316 // Otherwise. 317 _index = next_index; 318 for (int i = 0; i < n; i++) { 319 int ind = start + i; 320 assert(ind < _capacity, "By overflow test above."); 321 _base[ind] = ptr_arr[i]; 322 } 323 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 324 } 325 326 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 327 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 328 jint index = _index; 329 if (index == 0) { 330 *n = 0; 331 return false; 332 } else { 333 int k = MIN2(max, index); 334 jint new_ind = index - k; 335 for (int j = 0; j < k; j++) { 336 ptr_arr[j] = _base[new_ind + j]; 337 } 338 _index = new_ind; 339 *n = k; 340 return true; 341 } 342 } 343 344 template<class OopClosureClass> 345 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 346 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 347 || SafepointSynchronize::is_at_safepoint(), 348 "Drain recursion must be yield-safe."); 349 bool res = true; 350 debug_only(_drain_in_progress = true); 351 debug_only(_drain_in_progress_yields = yield_after); 352 while (!isEmpty()) { 353 oop newOop = pop(); 354 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 355 assert(newOop->is_oop(), "Expected an oop"); 356 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 357 "only grey objects on this stack"); 358 newOop->oop_iterate(cl); 359 if (yield_after && _cm->do_yield_check()) { 360 res = false; 361 break; 362 } 363 } 364 debug_only(_drain_in_progress = false); 365 return res; 366 } 367 368 void CMMarkStack::note_start_of_gc() { 369 assert(_saved_index == -1, 370 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 371 _saved_index = _index; 372 } 373 374 void CMMarkStack::note_end_of_gc() { 375 // This is intentionally a guarantee, instead of an assert. If we 376 // accidentally add something to the mark stack during GC, it 377 // will be a correctness issue so it's better if we crash. we'll 378 // only check this once per GC anyway, so it won't be a performance 379 // issue in any way. 380 guarantee(_saved_index == _index, 381 err_msg("saved index: %d index: %d", _saved_index, _index)); 382 _saved_index = -1; 383 } 384 385 void CMMarkStack::oops_do(OopClosure* f) { 386 assert(_saved_index == _index, 387 err_msg("saved index: %d index: %d", _saved_index, _index)); 388 for (int i = 0; i < _index; i += 1) { 389 f->do_oop(&_base[i]); 390 } 391 } 392 393 bool ConcurrentMark::not_yet_marked(oop obj) const { 394 return _g1h->is_obj_ill(obj); 395 } 396 397 CMRootRegions::CMRootRegions() : 398 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 399 _should_abort(false), _next_survivor(NULL) { } 400 401 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 402 _young_list = g1h->young_list(); 403 _cm = cm; 404 } 405 406 void CMRootRegions::prepare_for_scan() { 407 assert(!scan_in_progress(), "pre-condition"); 408 409 // Currently, only survivors can be root regions. 410 assert(_next_survivor == NULL, "pre-condition"); 411 _next_survivor = _young_list->first_survivor_region(); 412 _scan_in_progress = (_next_survivor != NULL); 413 _should_abort = false; 414 } 415 416 HeapRegion* CMRootRegions::claim_next() { 417 if (_should_abort) { 418 // If someone has set the should_abort flag, we return NULL to 419 // force the caller to bail out of their loop. 420 return NULL; 421 } 422 423 // Currently, only survivors can be root regions. 424 HeapRegion* res = _next_survivor; 425 if (res != NULL) { 426 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 427 // Read it again in case it changed while we were waiting for the lock. 428 res = _next_survivor; 429 if (res != NULL) { 430 if (res == _young_list->last_survivor_region()) { 431 // We just claimed the last survivor so store NULL to indicate 432 // that we're done. 433 _next_survivor = NULL; 434 } else { 435 _next_survivor = res->get_next_young_region(); 436 } 437 } else { 438 // Someone else claimed the last survivor while we were trying 439 // to take the lock so nothing else to do. 440 } 441 } 442 assert(res == NULL || res->is_survivor(), "post-condition"); 443 444 return res; 445 } 446 447 void CMRootRegions::scan_finished() { 448 assert(scan_in_progress(), "pre-condition"); 449 450 // Currently, only survivors can be root regions. 451 if (!_should_abort) { 452 assert(_next_survivor == NULL, "we should have claimed all survivors"); 453 } 454 _next_survivor = NULL; 455 456 { 457 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 458 _scan_in_progress = false; 459 RootRegionScan_lock->notify_all(); 460 } 461 } 462 463 bool CMRootRegions::wait_until_scan_finished() { 464 if (!scan_in_progress()) return false; 465 466 { 467 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 468 while (scan_in_progress()) { 469 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 470 } 471 } 472 return true; 473 } 474 475 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 476 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 477 #endif // _MSC_VER 478 479 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 480 return MAX2((n_par_threads + 2) / 4, 1U); 481 } 482 483 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) : 484 _g1h(g1h), 485 _markBitMap1(log2_intptr(MinObjAlignment)), 486 _markBitMap2(log2_intptr(MinObjAlignment)), 487 _parallel_marking_threads(0), 488 _max_parallel_marking_threads(0), 489 _sleep_factor(0.0), 490 _marking_task_overhead(1.0), 491 _cleanup_sleep_factor(0.0), 492 _cleanup_task_overhead(1.0), 493 _cleanup_list("Cleanup List"), 494 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 495 _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >> 496 CardTableModRefBS::card_shift, 497 false /* in_resource_area*/), 498 499 _prevMarkBitMap(&_markBitMap1), 500 _nextMarkBitMap(&_markBitMap2), 501 502 _markStack(this), 503 // _finger set in set_non_marking_state 504 505 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)), 506 // _active_tasks set in set_non_marking_state 507 // _tasks set inside the constructor 508 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 509 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 510 511 _has_overflown(false), 512 _concurrent(false), 513 _has_aborted(false), 514 _restart_for_overflow(false), 515 _concurrent_marking_in_progress(false), 516 517 // _verbose_level set below 518 519 _init_times(), 520 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 521 _cleanup_times(), 522 _total_counting_time(0.0), 523 _total_rs_scrub_time(0.0), 524 525 _parallel_workers(NULL), 526 527 _count_card_bitmaps(NULL), 528 _count_marked_bytes(NULL), 529 _completed_initialization(false) { 530 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 531 if (verbose_level < no_verbose) { 532 verbose_level = no_verbose; 533 } 534 if (verbose_level > high_verbose) { 535 verbose_level = high_verbose; 536 } 537 _verbose_level = verbose_level; 538 539 if (verbose_low()) { 540 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 541 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 542 } 543 544 if (!_markBitMap1.allocate(heap_rs)) { 545 warning("Failed to allocate first CM bit map"); 546 return; 547 } 548 if (!_markBitMap2.allocate(heap_rs)) { 549 warning("Failed to allocate second CM bit map"); 550 return; 551 } 552 553 // Create & start a ConcurrentMark thread. 554 _cmThread = new ConcurrentMarkThread(this); 555 assert(cmThread() != NULL, "CM Thread should have been created"); 556 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 557 if (_cmThread->osthread() == NULL) { 558 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 559 } 560 561 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 562 assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency"); 563 assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency"); 564 565 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 566 satb_qs.set_buffer_size(G1SATBBufferSize); 567 568 _root_regions.init(_g1h, this); 569 570 if (ConcGCThreads > ParallelGCThreads) { 571 warning("Can't have more ConcGCThreads (" UINT32_FORMAT ") " 572 "than ParallelGCThreads (" UINT32_FORMAT ").", 573 ConcGCThreads, ParallelGCThreads); 574 return; 575 } 576 if (ParallelGCThreads == 0) { 577 // if we are not running with any parallel GC threads we will not 578 // spawn any marking threads either 579 _parallel_marking_threads = 0; 580 _max_parallel_marking_threads = 0; 581 _sleep_factor = 0.0; 582 _marking_task_overhead = 1.0; 583 } else { 584 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 585 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 586 // if both are set 587 _sleep_factor = 0.0; 588 _marking_task_overhead = 1.0; 589 } else if (G1MarkingOverheadPercent > 0) { 590 // We will calculate the number of parallel marking threads based 591 // on a target overhead with respect to the soft real-time goal 592 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 593 double overall_cm_overhead = 594 (double) MaxGCPauseMillis * marking_overhead / 595 (double) GCPauseIntervalMillis; 596 double cpu_ratio = 1.0 / (double) os::processor_count(); 597 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 598 double marking_task_overhead = 599 overall_cm_overhead / marking_thread_num * 600 (double) os::processor_count(); 601 double sleep_factor = 602 (1.0 - marking_task_overhead) / marking_task_overhead; 603 604 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num); 605 _sleep_factor = sleep_factor; 606 _marking_task_overhead = marking_task_overhead; 607 } else { 608 // Calculate the number of parallel marking threads by scaling 609 // the number of parallel GC threads. 610 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads); 611 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num); 612 _sleep_factor = 0.0; 613 _marking_task_overhead = 1.0; 614 } 615 616 assert(ConcGCThreads > 0, "Should have been set"); 617 _parallel_marking_threads = (uint) ConcGCThreads; 618 _max_parallel_marking_threads = _parallel_marking_threads; 619 620 if (parallel_marking_threads() > 1) { 621 _cleanup_task_overhead = 1.0; 622 } else { 623 _cleanup_task_overhead = marking_task_overhead(); 624 } 625 _cleanup_sleep_factor = 626 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 627 628 #if 0 629 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 630 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 631 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 632 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 633 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 634 #endif 635 636 guarantee(parallel_marking_threads() > 0, "peace of mind"); 637 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 638 _max_parallel_marking_threads, false, true); 639 if (_parallel_workers == NULL) { 640 vm_exit_during_initialization("Failed necessary allocation."); 641 } else { 642 _parallel_workers->initialize_workers(); 643 } 644 } 645 646 if (FLAG_IS_DEFAULT(MarkStackSize)) { 647 uintx mark_stack_size = 648 MIN2(MarkStackSizeMax, 649 MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE))); 650 // Verify that the calculated value for MarkStackSize is in range. 651 // It would be nice to use the private utility routine from Arguments. 652 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 653 warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): " 654 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 655 mark_stack_size, 1, MarkStackSizeMax); 656 return; 657 } 658 FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size); 659 } else { 660 // Verify MarkStackSize is in range. 661 if (FLAG_IS_CMDLINE(MarkStackSize)) { 662 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 663 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 664 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): " 665 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 666 MarkStackSize, 1, MarkStackSizeMax); 667 return; 668 } 669 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 670 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 671 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")" 672 " or for MarkStackSizeMax (" UINTX_FORMAT ")", 673 MarkStackSize, MarkStackSizeMax); 674 return; 675 } 676 } 677 } 678 } 679 680 if (!_markStack.allocate(MarkStackSize)) { 681 warning("Failed to allocate CM marking stack"); 682 return; 683 } 684 685 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 686 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 687 688 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 689 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 690 691 BitMap::idx_t card_bm_size = _card_bm.size(); 692 693 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 694 _active_tasks = _max_worker_id; 695 696 size_t max_regions = (size_t) _g1h->max_regions(); 697 for (uint i = 0; i < _max_worker_id; ++i) { 698 CMTaskQueue* task_queue = new CMTaskQueue(); 699 task_queue->initialize(); 700 _task_queues->register_queue(i, task_queue); 701 702 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 703 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 704 705 _tasks[i] = new CMTask(i, this, 706 _count_marked_bytes[i], 707 &_count_card_bitmaps[i], 708 task_queue, _task_queues); 709 710 _accum_task_vtime[i] = 0.0; 711 } 712 713 // Calculate the card number for the bottom of the heap. Used 714 // in biasing indexes into the accounting card bitmaps. 715 _heap_bottom_card_num = 716 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 717 CardTableModRefBS::card_shift); 718 719 // Clear all the liveness counting data 720 clear_all_count_data(); 721 722 // so that the call below can read a sensible value 723 _heap_start = (HeapWord*) heap_rs.base(); 724 set_non_marking_state(); 725 _completed_initialization = true; 726 } 727 728 void ConcurrentMark::update_g1_committed(bool force) { 729 // If concurrent marking is not in progress, then we do not need to 730 // update _heap_end. 731 if (!concurrent_marking_in_progress() && !force) return; 732 733 MemRegion committed = _g1h->g1_committed(); 734 assert(committed.start() == _heap_start, "start shouldn't change"); 735 HeapWord* new_end = committed.end(); 736 if (new_end > _heap_end) { 737 // The heap has been expanded. 738 739 _heap_end = new_end; 740 } 741 // Notice that the heap can also shrink. However, this only happens 742 // during a Full GC (at least currently) and the entire marking 743 // phase will bail out and the task will not be restarted. So, let's 744 // do nothing. 745 } 746 747 void ConcurrentMark::reset() { 748 // Starting values for these two. This should be called in a STW 749 // phase. CM will be notified of any future g1_committed expansions 750 // will be at the end of evacuation pauses, when tasks are 751 // inactive. 752 MemRegion committed = _g1h->g1_committed(); 753 _heap_start = committed.start(); 754 _heap_end = committed.end(); 755 756 // Separated the asserts so that we know which one fires. 757 assert(_heap_start != NULL, "heap bounds should look ok"); 758 assert(_heap_end != NULL, "heap bounds should look ok"); 759 assert(_heap_start < _heap_end, "heap bounds should look ok"); 760 761 // Reset all the marking data structures and any necessary flags 762 reset_marking_state(); 763 764 if (verbose_low()) { 765 gclog_or_tty->print_cr("[global] resetting"); 766 } 767 768 // We do reset all of them, since different phases will use 769 // different number of active threads. So, it's easiest to have all 770 // of them ready. 771 for (uint i = 0; i < _max_worker_id; ++i) { 772 _tasks[i]->reset(_nextMarkBitMap); 773 } 774 775 // we need this to make sure that the flag is on during the evac 776 // pause with initial mark piggy-backed 777 set_concurrent_marking_in_progress(); 778 } 779 780 781 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 782 _markStack.set_should_expand(); 783 _markStack.setEmpty(); // Also clears the _markStack overflow flag 784 if (clear_overflow) { 785 clear_has_overflown(); 786 } else { 787 assert(has_overflown(), "pre-condition"); 788 } 789 _finger = _heap_start; 790 791 for (uint i = 0; i < _max_worker_id; ++i) { 792 CMTaskQueue* queue = _task_queues->queue(i); 793 queue->set_empty(); 794 } 795 } 796 797 void ConcurrentMark::set_concurrency(uint active_tasks) { 798 assert(active_tasks <= _max_worker_id, "we should not have more"); 799 800 _active_tasks = active_tasks; 801 // Need to update the three data structures below according to the 802 // number of active threads for this phase. 803 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 804 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 805 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 806 } 807 808 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 809 set_concurrency(active_tasks); 810 811 _concurrent = concurrent; 812 // We propagate this to all tasks, not just the active ones. 813 for (uint i = 0; i < _max_worker_id; ++i) 814 _tasks[i]->set_concurrent(concurrent); 815 816 if (concurrent) { 817 set_concurrent_marking_in_progress(); 818 } else { 819 // We currently assume that the concurrent flag has been set to 820 // false before we start remark. At this point we should also be 821 // in a STW phase. 822 assert(!concurrent_marking_in_progress(), "invariant"); 823 assert(_finger == _heap_end, 824 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT, 825 _finger, _heap_end)); 826 update_g1_committed(true); 827 } 828 } 829 830 void ConcurrentMark::set_non_marking_state() { 831 // We set the global marking state to some default values when we're 832 // not doing marking. 833 reset_marking_state(); 834 _active_tasks = 0; 835 clear_concurrent_marking_in_progress(); 836 } 837 838 ConcurrentMark::~ConcurrentMark() { 839 // The ConcurrentMark instance is never freed. 840 ShouldNotReachHere(); 841 } 842 843 void ConcurrentMark::clearNextBitmap() { 844 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 845 G1CollectorPolicy* g1p = g1h->g1_policy(); 846 847 // Make sure that the concurrent mark thread looks to still be in 848 // the current cycle. 849 guarantee(cmThread()->during_cycle(), "invariant"); 850 851 // We are finishing up the current cycle by clearing the next 852 // marking bitmap and getting it ready for the next cycle. During 853 // this time no other cycle can start. So, let's make sure that this 854 // is the case. 855 guarantee(!g1h->mark_in_progress(), "invariant"); 856 857 // clear the mark bitmap (no grey objects to start with). 858 // We need to do this in chunks and offer to yield in between 859 // each chunk. 860 HeapWord* start = _nextMarkBitMap->startWord(); 861 HeapWord* end = _nextMarkBitMap->endWord(); 862 HeapWord* cur = start; 863 size_t chunkSize = M; 864 while (cur < end) { 865 HeapWord* next = cur + chunkSize; 866 if (next > end) { 867 next = end; 868 } 869 MemRegion mr(cur,next); 870 _nextMarkBitMap->clearRange(mr); 871 cur = next; 872 do_yield_check(); 873 874 // Repeat the asserts from above. We'll do them as asserts here to 875 // minimize their overhead on the product. However, we'll have 876 // them as guarantees at the beginning / end of the bitmap 877 // clearing to get some checking in the product. 878 assert(cmThread()->during_cycle(), "invariant"); 879 assert(!g1h->mark_in_progress(), "invariant"); 880 } 881 882 // Clear the liveness counting data 883 clear_all_count_data(); 884 885 // Repeat the asserts from above. 886 guarantee(cmThread()->during_cycle(), "invariant"); 887 guarantee(!g1h->mark_in_progress(), "invariant"); 888 } 889 890 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 891 public: 892 bool doHeapRegion(HeapRegion* r) { 893 if (!r->continuesHumongous()) { 894 r->note_start_of_marking(); 895 } 896 return false; 897 } 898 }; 899 900 void ConcurrentMark::checkpointRootsInitialPre() { 901 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 902 G1CollectorPolicy* g1p = g1h->g1_policy(); 903 904 _has_aborted = false; 905 906 #ifndef PRODUCT 907 if (G1PrintReachableAtInitialMark) { 908 print_reachable("at-cycle-start", 909 VerifyOption_G1UsePrevMarking, true /* all */); 910 } 911 #endif 912 913 // Initialize marking structures. This has to be done in a STW phase. 914 reset(); 915 916 // For each region note start of marking. 917 NoteStartOfMarkHRClosure startcl; 918 g1h->heap_region_iterate(&startcl); 919 } 920 921 922 void ConcurrentMark::checkpointRootsInitialPost() { 923 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 924 925 // If we force an overflow during remark, the remark operation will 926 // actually abort and we'll restart concurrent marking. If we always 927 // force an overflow during remark we'll never actually complete the 928 // marking phase. So, we initialize this here, at the start of the 929 // cycle, so that at the remaining overflow number will decrease at 930 // every remark and we'll eventually not need to cause one. 931 force_overflow_stw()->init(); 932 933 // Start Concurrent Marking weak-reference discovery. 934 ReferenceProcessor* rp = g1h->ref_processor_cm(); 935 // enable ("weak") refs discovery 936 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 937 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 938 939 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 940 // This is the start of the marking cycle, we're expected all 941 // threads to have SATB queues with active set to false. 942 satb_mq_set.set_active_all_threads(true, /* new active value */ 943 false /* expected_active */); 944 945 _root_regions.prepare_for_scan(); 946 947 // update_g1_committed() will be called at the end of an evac pause 948 // when marking is on. So, it's also called at the end of the 949 // initial-mark pause to update the heap end, if the heap expands 950 // during it. No need to call it here. 951 } 952 953 /* 954 * Notice that in the next two methods, we actually leave the STS 955 * during the barrier sync and join it immediately afterwards. If we 956 * do not do this, the following deadlock can occur: one thread could 957 * be in the barrier sync code, waiting for the other thread to also 958 * sync up, whereas another one could be trying to yield, while also 959 * waiting for the other threads to sync up too. 960 * 961 * Note, however, that this code is also used during remark and in 962 * this case we should not attempt to leave / enter the STS, otherwise 963 * we'll either hit an assert (debug / fastdebug) or deadlock 964 * (product). So we should only leave / enter the STS if we are 965 * operating concurrently. 966 * 967 * Because the thread that does the sync barrier has left the STS, it 968 * is possible to be suspended for a Full GC or an evacuation pause 969 * could occur. This is actually safe, since the entering the sync 970 * barrier is one of the last things do_marking_step() does, and it 971 * doesn't manipulate any data structures afterwards. 972 */ 973 974 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 975 if (verbose_low()) { 976 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id); 977 } 978 979 if (concurrent()) { 980 ConcurrentGCThread::stsLeave(); 981 } 982 _first_overflow_barrier_sync.enter(); 983 if (concurrent()) { 984 ConcurrentGCThread::stsJoin(); 985 } 986 // at this point everyone should have synced up and not be doing any 987 // more work 988 989 if (verbose_low()) { 990 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); 991 } 992 993 // If we're executing the concurrent phase of marking, reset the marking 994 // state; otherwise the marking state is reset after reference processing, 995 // during the remark pause. 996 // If we reset here as a result of an overflow during the remark we will 997 // see assertion failures from any subsequent set_concurrency_and_phase() 998 // calls. 999 if (concurrent()) { 1000 // let the task associated with with worker 0 do this 1001 if (worker_id == 0) { 1002 // task 0 is responsible for clearing the global data structures 1003 // We should be here because of an overflow. During STW we should 1004 // not clear the overflow flag since we rely on it being true when 1005 // we exit this method to abort the pause and restart concurrent 1006 // marking. 1007 reset_marking_state(true /* clear_overflow */); 1008 force_overflow()->update(); 1009 1010 if (G1Log::fine()) { 1011 gclog_or_tty->date_stamp(PrintGCDateStamps); 1012 gclog_or_tty->stamp(PrintGCTimeStamps); 1013 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 1014 } 1015 } 1016 } 1017 1018 // after this, each task should reset its own data structures then 1019 // then go into the second barrier 1020 } 1021 1022 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 1023 if (verbose_low()) { 1024 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id); 1025 } 1026 1027 if (concurrent()) { 1028 ConcurrentGCThread::stsLeave(); 1029 } 1030 _second_overflow_barrier_sync.enter(); 1031 if (concurrent()) { 1032 ConcurrentGCThread::stsJoin(); 1033 } 1034 // at this point everything should be re-initialized and ready to go 1035 1036 if (verbose_low()) { 1037 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); 1038 } 1039 } 1040 1041 #ifndef PRODUCT 1042 void ForceOverflowSettings::init() { 1043 _num_remaining = G1ConcMarkForceOverflow; 1044 _force = false; 1045 update(); 1046 } 1047 1048 void ForceOverflowSettings::update() { 1049 if (_num_remaining > 0) { 1050 _num_remaining -= 1; 1051 _force = true; 1052 } else { 1053 _force = false; 1054 } 1055 } 1056 1057 bool ForceOverflowSettings::should_force() { 1058 if (_force) { 1059 _force = false; 1060 return true; 1061 } else { 1062 return false; 1063 } 1064 } 1065 #endif // !PRODUCT 1066 1067 class CMConcurrentMarkingTask: public AbstractGangTask { 1068 private: 1069 ConcurrentMark* _cm; 1070 ConcurrentMarkThread* _cmt; 1071 1072 public: 1073 void work(uint worker_id) { 1074 assert(Thread::current()->is_ConcurrentGC_thread(), 1075 "this should only be done by a conc GC thread"); 1076 ResourceMark rm; 1077 1078 double start_vtime = os::elapsedVTime(); 1079 1080 ConcurrentGCThread::stsJoin(); 1081 1082 assert(worker_id < _cm->active_tasks(), "invariant"); 1083 CMTask* the_task = _cm->task(worker_id); 1084 the_task->record_start_time(); 1085 if (!_cm->has_aborted()) { 1086 do { 1087 double start_vtime_sec = os::elapsedVTime(); 1088 double start_time_sec = os::elapsedTime(); 1089 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1090 1091 the_task->do_marking_step(mark_step_duration_ms, 1092 true /* do_termination */, 1093 false /* is_serial*/); 1094 1095 double end_time_sec = os::elapsedTime(); 1096 double end_vtime_sec = os::elapsedVTime(); 1097 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1098 double elapsed_time_sec = end_time_sec - start_time_sec; 1099 _cm->clear_has_overflown(); 1100 1101 bool ret = _cm->do_yield_check(worker_id); 1102 1103 jlong sleep_time_ms; 1104 if (!_cm->has_aborted() && the_task->has_aborted()) { 1105 sleep_time_ms = 1106 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1107 ConcurrentGCThread::stsLeave(); 1108 os::sleep(Thread::current(), sleep_time_ms, false); 1109 ConcurrentGCThread::stsJoin(); 1110 } 1111 double end_time2_sec = os::elapsedTime(); 1112 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1113 1114 #if 0 1115 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1116 "overhead %1.4lf", 1117 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1118 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1119 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1120 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1121 #endif 1122 } while (!_cm->has_aborted() && the_task->has_aborted()); 1123 } 1124 the_task->record_end_time(); 1125 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1126 1127 ConcurrentGCThread::stsLeave(); 1128 1129 double end_vtime = os::elapsedVTime(); 1130 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1131 } 1132 1133 CMConcurrentMarkingTask(ConcurrentMark* cm, 1134 ConcurrentMarkThread* cmt) : 1135 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1136 1137 ~CMConcurrentMarkingTask() { } 1138 }; 1139 1140 // Calculates the number of active workers for a concurrent 1141 // phase. 1142 uint ConcurrentMark::calc_parallel_marking_threads() { 1143 if (G1CollectedHeap::use_parallel_gc_threads()) { 1144 uint n_conc_workers = 0; 1145 if (!UseDynamicNumberOfGCThreads || 1146 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1147 !ForceDynamicNumberOfGCThreads)) { 1148 n_conc_workers = max_parallel_marking_threads(); 1149 } else { 1150 n_conc_workers = 1151 AdaptiveSizePolicy::calc_default_active_workers( 1152 max_parallel_marking_threads(), 1153 1, /* Minimum workers */ 1154 parallel_marking_threads(), 1155 Threads::number_of_non_daemon_threads()); 1156 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1157 // that scaling has already gone into "_max_parallel_marking_threads". 1158 } 1159 assert(n_conc_workers > 0, "Always need at least 1"); 1160 return n_conc_workers; 1161 } 1162 // If we are not running with any parallel GC threads we will not 1163 // have spawned any marking threads either. Hence the number of 1164 // concurrent workers should be 0. 1165 return 0; 1166 } 1167 1168 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1169 // Currently, only survivors can be root regions. 1170 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1171 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1172 1173 const uintx interval = PrefetchScanIntervalInBytes; 1174 HeapWord* curr = hr->bottom(); 1175 const HeapWord* end = hr->top(); 1176 while (curr < end) { 1177 Prefetch::read(curr, interval); 1178 oop obj = oop(curr); 1179 int size = obj->oop_iterate(&cl); 1180 assert(size == obj->size(), "sanity"); 1181 curr += size; 1182 } 1183 } 1184 1185 class CMRootRegionScanTask : public AbstractGangTask { 1186 private: 1187 ConcurrentMark* _cm; 1188 1189 public: 1190 CMRootRegionScanTask(ConcurrentMark* cm) : 1191 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1192 1193 void work(uint worker_id) { 1194 assert(Thread::current()->is_ConcurrentGC_thread(), 1195 "this should only be done by a conc GC thread"); 1196 1197 CMRootRegions* root_regions = _cm->root_regions(); 1198 HeapRegion* hr = root_regions->claim_next(); 1199 while (hr != NULL) { 1200 _cm->scanRootRegion(hr, worker_id); 1201 hr = root_regions->claim_next(); 1202 } 1203 } 1204 }; 1205 1206 void ConcurrentMark::scanRootRegions() { 1207 // scan_in_progress() will have been set to true only if there was 1208 // at least one root region to scan. So, if it's false, we 1209 // should not attempt to do any further work. 1210 if (root_regions()->scan_in_progress()) { 1211 _parallel_marking_threads = calc_parallel_marking_threads(); 1212 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1213 "Maximum number of marking threads exceeded"); 1214 uint active_workers = MAX2(1U, parallel_marking_threads()); 1215 1216 CMRootRegionScanTask task(this); 1217 if (use_parallel_marking_threads()) { 1218 _parallel_workers->set_active_workers((int) active_workers); 1219 _parallel_workers->run_task(&task); 1220 } else { 1221 task.work(0); 1222 } 1223 1224 // It's possible that has_aborted() is true here without actually 1225 // aborting the survivor scan earlier. This is OK as it's 1226 // mainly used for sanity checking. 1227 root_regions()->scan_finished(); 1228 } 1229 } 1230 1231 void ConcurrentMark::markFromRoots() { 1232 // we might be tempted to assert that: 1233 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1234 // "inconsistent argument?"); 1235 // However that wouldn't be right, because it's possible that 1236 // a safepoint is indeed in progress as a younger generation 1237 // stop-the-world GC happens even as we mark in this generation. 1238 1239 _restart_for_overflow = false; 1240 force_overflow_conc()->init(); 1241 1242 // _g1h has _n_par_threads 1243 _parallel_marking_threads = calc_parallel_marking_threads(); 1244 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1245 "Maximum number of marking threads exceeded"); 1246 1247 uint active_workers = MAX2(1U, parallel_marking_threads()); 1248 1249 // Parallel task terminator is set in "set_concurrency_and_phase()" 1250 set_concurrency_and_phase(active_workers, true /* concurrent */); 1251 1252 CMConcurrentMarkingTask markingTask(this, cmThread()); 1253 if (use_parallel_marking_threads()) { 1254 _parallel_workers->set_active_workers((int)active_workers); 1255 // Don't set _n_par_threads because it affects MT in process_strong_roots() 1256 // and the decisions on that MT processing is made elsewhere. 1257 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1258 _parallel_workers->run_task(&markingTask); 1259 } else { 1260 markingTask.work(0); 1261 } 1262 print_stats(); 1263 } 1264 1265 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1266 // world is stopped at this checkpoint 1267 assert(SafepointSynchronize::is_at_safepoint(), 1268 "world should be stopped"); 1269 1270 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1271 1272 // If a full collection has happened, we shouldn't do this. 1273 if (has_aborted()) { 1274 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1275 return; 1276 } 1277 1278 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1279 1280 if (VerifyDuringGC) { 1281 HandleMark hm; // handle scope 1282 Universe::heap()->prepare_for_verify(); 1283 Universe::verify(VerifyOption_G1UsePrevMarking, 1284 " VerifyDuringGC:(before)"); 1285 } 1286 1287 G1CollectorPolicy* g1p = g1h->g1_policy(); 1288 g1p->record_concurrent_mark_remark_start(); 1289 1290 double start = os::elapsedTime(); 1291 1292 checkpointRootsFinalWork(); 1293 1294 double mark_work_end = os::elapsedTime(); 1295 1296 weakRefsWork(clear_all_soft_refs); 1297 1298 if (has_overflown()) { 1299 // Oops. We overflowed. Restart concurrent marking. 1300 _restart_for_overflow = true; 1301 if (G1TraceMarkStackOverflow) { 1302 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1303 } 1304 1305 // Verify the heap w.r.t. the previous marking bitmap. 1306 if (VerifyDuringGC) { 1307 HandleMark hm; // handle scope 1308 Universe::heap()->prepare_for_verify(); 1309 Universe::verify(VerifyOption_G1UsePrevMarking, 1310 " VerifyDuringGC:(overflow)"); 1311 } 1312 1313 // Clear the marking state because we will be restarting 1314 // marking due to overflowing the global mark stack. 1315 reset_marking_state(); 1316 } else { 1317 // Aggregate the per-task counting data that we have accumulated 1318 // while marking. 1319 aggregate_count_data(); 1320 1321 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1322 // We're done with marking. 1323 // This is the end of the marking cycle, we're expected all 1324 // threads to have SATB queues with active set to true. 1325 satb_mq_set.set_active_all_threads(false, /* new active value */ 1326 true /* expected_active */); 1327 1328 if (VerifyDuringGC) { 1329 HandleMark hm; // handle scope 1330 Universe::heap()->prepare_for_verify(); 1331 Universe::verify(VerifyOption_G1UseNextMarking, 1332 " VerifyDuringGC:(after)"); 1333 } 1334 assert(!restart_for_overflow(), "sanity"); 1335 // Completely reset the marking state since marking completed 1336 set_non_marking_state(); 1337 } 1338 1339 // Expand the marking stack, if we have to and if we can. 1340 if (_markStack.should_expand()) { 1341 _markStack.expand(); 1342 } 1343 1344 // Statistics 1345 double now = os::elapsedTime(); 1346 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1347 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1348 _remark_times.add((now - start) * 1000.0); 1349 1350 g1p->record_concurrent_mark_remark_end(); 1351 1352 G1CMIsAliveClosure is_alive(g1h); 1353 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1354 } 1355 1356 // Base class of the closures that finalize and verify the 1357 // liveness counting data. 1358 class CMCountDataClosureBase: public HeapRegionClosure { 1359 protected: 1360 G1CollectedHeap* _g1h; 1361 ConcurrentMark* _cm; 1362 CardTableModRefBS* _ct_bs; 1363 1364 BitMap* _region_bm; 1365 BitMap* _card_bm; 1366 1367 // Takes a region that's not empty (i.e., it has at least one 1368 // live object in it and sets its corresponding bit on the region 1369 // bitmap to 1. If the region is "starts humongous" it will also set 1370 // to 1 the bits on the region bitmap that correspond to its 1371 // associated "continues humongous" regions. 1372 void set_bit_for_region(HeapRegion* hr) { 1373 assert(!hr->continuesHumongous(), "should have filtered those out"); 1374 1375 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1376 if (!hr->startsHumongous()) { 1377 // Normal (non-humongous) case: just set the bit. 1378 _region_bm->par_at_put(index, true); 1379 } else { 1380 // Starts humongous case: calculate how many regions are part of 1381 // this humongous region and then set the bit range. 1382 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1383 _region_bm->par_at_put_range(index, end_index, true); 1384 } 1385 } 1386 1387 public: 1388 CMCountDataClosureBase(G1CollectedHeap* g1h, 1389 BitMap* region_bm, BitMap* card_bm): 1390 _g1h(g1h), _cm(g1h->concurrent_mark()), 1391 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 1392 _region_bm(region_bm), _card_bm(card_bm) { } 1393 }; 1394 1395 // Closure that calculates the # live objects per region. Used 1396 // for verification purposes during the cleanup pause. 1397 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1398 CMBitMapRO* _bm; 1399 size_t _region_marked_bytes; 1400 1401 public: 1402 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1403 BitMap* region_bm, BitMap* card_bm) : 1404 CMCountDataClosureBase(g1h, region_bm, card_bm), 1405 _bm(bm), _region_marked_bytes(0) { } 1406 1407 bool doHeapRegion(HeapRegion* hr) { 1408 1409 if (hr->continuesHumongous()) { 1410 // We will ignore these here and process them when their 1411 // associated "starts humongous" region is processed (see 1412 // set_bit_for_heap_region()). Note that we cannot rely on their 1413 // associated "starts humongous" region to have their bit set to 1414 // 1 since, due to the region chunking in the parallel region 1415 // iteration, a "continues humongous" region might be visited 1416 // before its associated "starts humongous". 1417 return false; 1418 } 1419 1420 HeapWord* ntams = hr->next_top_at_mark_start(); 1421 HeapWord* start = hr->bottom(); 1422 1423 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1424 err_msg("Preconditions not met - " 1425 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT, 1426 start, ntams, hr->end())); 1427 1428 // Find the first marked object at or after "start". 1429 start = _bm->getNextMarkedWordAddress(start, ntams); 1430 1431 size_t marked_bytes = 0; 1432 1433 while (start < ntams) { 1434 oop obj = oop(start); 1435 int obj_sz = obj->size(); 1436 HeapWord* obj_end = start + obj_sz; 1437 1438 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1439 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1440 1441 // Note: if we're looking at the last region in heap - obj_end 1442 // could be actually just beyond the end of the heap; end_idx 1443 // will then correspond to a (non-existent) card that is also 1444 // just beyond the heap. 1445 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1446 // end of object is not card aligned - increment to cover 1447 // all the cards spanned by the object 1448 end_idx += 1; 1449 } 1450 1451 // Set the bits in the card BM for the cards spanned by this object. 1452 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1453 1454 // Add the size of this object to the number of marked bytes. 1455 marked_bytes += (size_t)obj_sz * HeapWordSize; 1456 1457 // Find the next marked object after this one. 1458 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1459 } 1460 1461 // Mark the allocated-since-marking portion... 1462 HeapWord* top = hr->top(); 1463 if (ntams < top) { 1464 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1465 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1466 1467 // Note: if we're looking at the last region in heap - top 1468 // could be actually just beyond the end of the heap; end_idx 1469 // will then correspond to a (non-existent) card that is also 1470 // just beyond the heap. 1471 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1472 // end of object is not card aligned - increment to cover 1473 // all the cards spanned by the object 1474 end_idx += 1; 1475 } 1476 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1477 1478 // This definitely means the region has live objects. 1479 set_bit_for_region(hr); 1480 } 1481 1482 // Update the live region bitmap. 1483 if (marked_bytes > 0) { 1484 set_bit_for_region(hr); 1485 } 1486 1487 // Set the marked bytes for the current region so that 1488 // it can be queried by a calling verification routine 1489 _region_marked_bytes = marked_bytes; 1490 1491 return false; 1492 } 1493 1494 size_t region_marked_bytes() const { return _region_marked_bytes; } 1495 }; 1496 1497 // Heap region closure used for verifying the counting data 1498 // that was accumulated concurrently and aggregated during 1499 // the remark pause. This closure is applied to the heap 1500 // regions during the STW cleanup pause. 1501 1502 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1503 G1CollectedHeap* _g1h; 1504 ConcurrentMark* _cm; 1505 CalcLiveObjectsClosure _calc_cl; 1506 BitMap* _region_bm; // Region BM to be verified 1507 BitMap* _card_bm; // Card BM to be verified 1508 bool _verbose; // verbose output? 1509 1510 BitMap* _exp_region_bm; // Expected Region BM values 1511 BitMap* _exp_card_bm; // Expected card BM values 1512 1513 int _failures; 1514 1515 public: 1516 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1517 BitMap* region_bm, 1518 BitMap* card_bm, 1519 BitMap* exp_region_bm, 1520 BitMap* exp_card_bm, 1521 bool verbose) : 1522 _g1h(g1h), _cm(g1h->concurrent_mark()), 1523 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1524 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1525 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1526 _failures(0) { } 1527 1528 int failures() const { return _failures; } 1529 1530 bool doHeapRegion(HeapRegion* hr) { 1531 if (hr->continuesHumongous()) { 1532 // We will ignore these here and process them when their 1533 // associated "starts humongous" region is processed (see 1534 // set_bit_for_heap_region()). Note that we cannot rely on their 1535 // associated "starts humongous" region to have their bit set to 1536 // 1 since, due to the region chunking in the parallel region 1537 // iteration, a "continues humongous" region might be visited 1538 // before its associated "starts humongous". 1539 return false; 1540 } 1541 1542 int failures = 0; 1543 1544 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1545 // this region and set the corresponding bits in the expected region 1546 // and card bitmaps. 1547 bool res = _calc_cl.doHeapRegion(hr); 1548 assert(res == false, "should be continuing"); 1549 1550 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1551 Mutex::_no_safepoint_check_flag); 1552 1553 // Verify the marked bytes for this region. 1554 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1555 size_t act_marked_bytes = hr->next_marked_bytes(); 1556 1557 // We're not OK if expected marked bytes > actual marked bytes. It means 1558 // we have missed accounting some objects during the actual marking. 1559 if (exp_marked_bytes > act_marked_bytes) { 1560 if (_verbose) { 1561 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1562 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1563 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1564 } 1565 failures += 1; 1566 } 1567 1568 // Verify the bit, for this region, in the actual and expected 1569 // (which was just calculated) region bit maps. 1570 // We're not OK if the bit in the calculated expected region 1571 // bitmap is set and the bit in the actual region bitmap is not. 1572 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1573 1574 bool expected = _exp_region_bm->at(index); 1575 bool actual = _region_bm->at(index); 1576 if (expected && !actual) { 1577 if (_verbose) { 1578 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1579 "expected: %s, actual: %s", 1580 hr->hrs_index(), 1581 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1582 } 1583 failures += 1; 1584 } 1585 1586 // Verify that the card bit maps for the cards spanned by the current 1587 // region match. We have an error if we have a set bit in the expected 1588 // bit map and the corresponding bit in the actual bitmap is not set. 1589 1590 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1591 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1592 1593 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1594 expected = _exp_card_bm->at(i); 1595 actual = _card_bm->at(i); 1596 1597 if (expected && !actual) { 1598 if (_verbose) { 1599 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1600 "expected: %s, actual: %s", 1601 hr->hrs_index(), i, 1602 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1603 } 1604 failures += 1; 1605 } 1606 } 1607 1608 if (failures > 0 && _verbose) { 1609 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1610 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1611 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(), 1612 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1613 } 1614 1615 _failures += failures; 1616 1617 // We could stop iteration over the heap when we 1618 // find the first violating region by returning true. 1619 return false; 1620 } 1621 }; 1622 1623 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1624 protected: 1625 G1CollectedHeap* _g1h; 1626 ConcurrentMark* _cm; 1627 BitMap* _actual_region_bm; 1628 BitMap* _actual_card_bm; 1629 1630 uint _n_workers; 1631 1632 BitMap* _expected_region_bm; 1633 BitMap* _expected_card_bm; 1634 1635 int _failures; 1636 bool _verbose; 1637 1638 public: 1639 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1640 BitMap* region_bm, BitMap* card_bm, 1641 BitMap* expected_region_bm, BitMap* expected_card_bm) 1642 : AbstractGangTask("G1 verify final counting"), 1643 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1644 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1645 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1646 _failures(0), _verbose(false), 1647 _n_workers(0) { 1648 assert(VerifyDuringGC, "don't call this otherwise"); 1649 1650 // Use the value already set as the number of active threads 1651 // in the call to run_task(). 1652 if (G1CollectedHeap::use_parallel_gc_threads()) { 1653 assert( _g1h->workers()->active_workers() > 0, 1654 "Should have been previously set"); 1655 _n_workers = _g1h->workers()->active_workers(); 1656 } else { 1657 _n_workers = 1; 1658 } 1659 1660 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1661 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1662 1663 _verbose = _cm->verbose_medium(); 1664 } 1665 1666 void work(uint worker_id) { 1667 assert(worker_id < _n_workers, "invariant"); 1668 1669 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1670 _actual_region_bm, _actual_card_bm, 1671 _expected_region_bm, 1672 _expected_card_bm, 1673 _verbose); 1674 1675 if (G1CollectedHeap::use_parallel_gc_threads()) { 1676 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1677 worker_id, 1678 _n_workers, 1679 HeapRegion::VerifyCountClaimValue); 1680 } else { 1681 _g1h->heap_region_iterate(&verify_cl); 1682 } 1683 1684 Atomic::add(verify_cl.failures(), &_failures); 1685 } 1686 1687 int failures() const { return _failures; } 1688 }; 1689 1690 // Closure that finalizes the liveness counting data. 1691 // Used during the cleanup pause. 1692 // Sets the bits corresponding to the interval [NTAMS, top] 1693 // (which contains the implicitly live objects) in the 1694 // card liveness bitmap. Also sets the bit for each region, 1695 // containing live data, in the region liveness bitmap. 1696 1697 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1698 public: 1699 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1700 BitMap* region_bm, 1701 BitMap* card_bm) : 1702 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1703 1704 bool doHeapRegion(HeapRegion* hr) { 1705 1706 if (hr->continuesHumongous()) { 1707 // We will ignore these here and process them when their 1708 // associated "starts humongous" region is processed (see 1709 // set_bit_for_heap_region()). Note that we cannot rely on their 1710 // associated "starts humongous" region to have their bit set to 1711 // 1 since, due to the region chunking in the parallel region 1712 // iteration, a "continues humongous" region might be visited 1713 // before its associated "starts humongous". 1714 return false; 1715 } 1716 1717 HeapWord* ntams = hr->next_top_at_mark_start(); 1718 HeapWord* top = hr->top(); 1719 1720 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1721 1722 // Mark the allocated-since-marking portion... 1723 if (ntams < top) { 1724 // This definitely means the region has live objects. 1725 set_bit_for_region(hr); 1726 1727 // Now set the bits in the card bitmap for [ntams, top) 1728 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1729 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1730 1731 // Note: if we're looking at the last region in heap - top 1732 // could be actually just beyond the end of the heap; end_idx 1733 // will then correspond to a (non-existent) card that is also 1734 // just beyond the heap. 1735 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1736 // end of object is not card aligned - increment to cover 1737 // all the cards spanned by the object 1738 end_idx += 1; 1739 } 1740 1741 assert(end_idx <= _card_bm->size(), 1742 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1743 end_idx, _card_bm->size())); 1744 assert(start_idx < _card_bm->size(), 1745 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1746 start_idx, _card_bm->size())); 1747 1748 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1749 } 1750 1751 // Set the bit for the region if it contains live data 1752 if (hr->next_marked_bytes() > 0) { 1753 set_bit_for_region(hr); 1754 } 1755 1756 return false; 1757 } 1758 }; 1759 1760 class G1ParFinalCountTask: public AbstractGangTask { 1761 protected: 1762 G1CollectedHeap* _g1h; 1763 ConcurrentMark* _cm; 1764 BitMap* _actual_region_bm; 1765 BitMap* _actual_card_bm; 1766 1767 uint _n_workers; 1768 1769 public: 1770 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1771 : AbstractGangTask("G1 final counting"), 1772 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1773 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1774 _n_workers(0) { 1775 // Use the value already set as the number of active threads 1776 // in the call to run_task(). 1777 if (G1CollectedHeap::use_parallel_gc_threads()) { 1778 assert( _g1h->workers()->active_workers() > 0, 1779 "Should have been previously set"); 1780 _n_workers = _g1h->workers()->active_workers(); 1781 } else { 1782 _n_workers = 1; 1783 } 1784 } 1785 1786 void work(uint worker_id) { 1787 assert(worker_id < _n_workers, "invariant"); 1788 1789 FinalCountDataUpdateClosure final_update_cl(_g1h, 1790 _actual_region_bm, 1791 _actual_card_bm); 1792 1793 if (G1CollectedHeap::use_parallel_gc_threads()) { 1794 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1795 worker_id, 1796 _n_workers, 1797 HeapRegion::FinalCountClaimValue); 1798 } else { 1799 _g1h->heap_region_iterate(&final_update_cl); 1800 } 1801 } 1802 }; 1803 1804 class G1ParNoteEndTask; 1805 1806 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1807 G1CollectedHeap* _g1; 1808 int _worker_num; 1809 size_t _max_live_bytes; 1810 uint _regions_claimed; 1811 size_t _freed_bytes; 1812 FreeRegionList* _local_cleanup_list; 1813 OldRegionSet* _old_proxy_set; 1814 HumongousRegionSet* _humongous_proxy_set; 1815 HRRSCleanupTask* _hrrs_cleanup_task; 1816 double _claimed_region_time; 1817 double _max_region_time; 1818 1819 public: 1820 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1821 int worker_num, 1822 FreeRegionList* local_cleanup_list, 1823 OldRegionSet* old_proxy_set, 1824 HumongousRegionSet* humongous_proxy_set, 1825 HRRSCleanupTask* hrrs_cleanup_task) : 1826 _g1(g1), _worker_num(worker_num), 1827 _max_live_bytes(0), _regions_claimed(0), 1828 _freed_bytes(0), 1829 _claimed_region_time(0.0), _max_region_time(0.0), 1830 _local_cleanup_list(local_cleanup_list), 1831 _old_proxy_set(old_proxy_set), 1832 _humongous_proxy_set(humongous_proxy_set), 1833 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1834 1835 size_t freed_bytes() { return _freed_bytes; } 1836 1837 bool doHeapRegion(HeapRegion *hr) { 1838 if (hr->continuesHumongous()) { 1839 return false; 1840 } 1841 // We use a claim value of zero here because all regions 1842 // were claimed with value 1 in the FinalCount task. 1843 _g1->reset_gc_time_stamps(hr); 1844 double start = os::elapsedTime(); 1845 _regions_claimed++; 1846 hr->note_end_of_marking(); 1847 _max_live_bytes += hr->max_live_bytes(); 1848 _g1->free_region_if_empty(hr, 1849 &_freed_bytes, 1850 _local_cleanup_list, 1851 _old_proxy_set, 1852 _humongous_proxy_set, 1853 _hrrs_cleanup_task, 1854 true /* par */); 1855 double region_time = (os::elapsedTime() - start); 1856 _claimed_region_time += region_time; 1857 if (region_time > _max_region_time) { 1858 _max_region_time = region_time; 1859 } 1860 return false; 1861 } 1862 1863 size_t max_live_bytes() { return _max_live_bytes; } 1864 uint regions_claimed() { return _regions_claimed; } 1865 double claimed_region_time_sec() { return _claimed_region_time; } 1866 double max_region_time_sec() { return _max_region_time; } 1867 }; 1868 1869 class G1ParNoteEndTask: public AbstractGangTask { 1870 friend class G1NoteEndOfConcMarkClosure; 1871 1872 protected: 1873 G1CollectedHeap* _g1h; 1874 size_t _max_live_bytes; 1875 size_t _freed_bytes; 1876 FreeRegionList* _cleanup_list; 1877 1878 public: 1879 G1ParNoteEndTask(G1CollectedHeap* g1h, 1880 FreeRegionList* cleanup_list) : 1881 AbstractGangTask("G1 note end"), _g1h(g1h), 1882 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1883 1884 void work(uint worker_id) { 1885 double start = os::elapsedTime(); 1886 FreeRegionList local_cleanup_list("Local Cleanup List"); 1887 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set"); 1888 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1889 HRRSCleanupTask hrrs_cleanup_task; 1890 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list, 1891 &old_proxy_set, 1892 &humongous_proxy_set, 1893 &hrrs_cleanup_task); 1894 if (G1CollectedHeap::use_parallel_gc_threads()) { 1895 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1896 _g1h->workers()->active_workers(), 1897 HeapRegion::NoteEndClaimValue); 1898 } else { 1899 _g1h->heap_region_iterate(&g1_note_end); 1900 } 1901 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1902 1903 // Now update the lists 1904 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1905 NULL /* free_list */, 1906 &old_proxy_set, 1907 &humongous_proxy_set, 1908 true /* par */); 1909 { 1910 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1911 _max_live_bytes += g1_note_end.max_live_bytes(); 1912 _freed_bytes += g1_note_end.freed_bytes(); 1913 1914 // If we iterate over the global cleanup list at the end of 1915 // cleanup to do this printing we will not guarantee to only 1916 // generate output for the newly-reclaimed regions (the list 1917 // might not be empty at the beginning of cleanup; we might 1918 // still be working on its previous contents). So we do the 1919 // printing here, before we append the new regions to the global 1920 // cleanup list. 1921 1922 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1923 if (hr_printer->is_active()) { 1924 HeapRegionLinkedListIterator iter(&local_cleanup_list); 1925 while (iter.more_available()) { 1926 HeapRegion* hr = iter.get_next(); 1927 hr_printer->cleanup(hr); 1928 } 1929 } 1930 1931 _cleanup_list->add_as_tail(&local_cleanup_list); 1932 assert(local_cleanup_list.is_empty(), "post-condition"); 1933 1934 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1935 } 1936 } 1937 size_t max_live_bytes() { return _max_live_bytes; } 1938 size_t freed_bytes() { return _freed_bytes; } 1939 }; 1940 1941 class G1ParScrubRemSetTask: public AbstractGangTask { 1942 protected: 1943 G1RemSet* _g1rs; 1944 BitMap* _region_bm; 1945 BitMap* _card_bm; 1946 public: 1947 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1948 BitMap* region_bm, BitMap* card_bm) : 1949 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1950 _region_bm(region_bm), _card_bm(card_bm) { } 1951 1952 void work(uint worker_id) { 1953 if (G1CollectedHeap::use_parallel_gc_threads()) { 1954 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1955 HeapRegion::ScrubRemSetClaimValue); 1956 } else { 1957 _g1rs->scrub(_region_bm, _card_bm); 1958 } 1959 } 1960 1961 }; 1962 1963 void ConcurrentMark::cleanup() { 1964 // world is stopped at this checkpoint 1965 assert(SafepointSynchronize::is_at_safepoint(), 1966 "world should be stopped"); 1967 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1968 1969 // If a full collection has happened, we shouldn't do this. 1970 if (has_aborted()) { 1971 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1972 return; 1973 } 1974 1975 HRSPhaseSetter x(HRSPhaseCleanup); 1976 g1h->verify_region_sets_optional(); 1977 1978 if (VerifyDuringGC) { 1979 HandleMark hm; // handle scope 1980 Universe::heap()->prepare_for_verify(); 1981 Universe::verify(VerifyOption_G1UsePrevMarking, 1982 " VerifyDuringGC:(before)"); 1983 } 1984 1985 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1986 g1p->record_concurrent_mark_cleanup_start(); 1987 1988 double start = os::elapsedTime(); 1989 1990 HeapRegionRemSet::reset_for_cleanup_tasks(); 1991 1992 uint n_workers; 1993 1994 // Do counting once more with the world stopped for good measure. 1995 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1996 1997 if (G1CollectedHeap::use_parallel_gc_threads()) { 1998 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 1999 "sanity check"); 2000 2001 g1h->set_par_threads(); 2002 n_workers = g1h->n_par_threads(); 2003 assert(g1h->n_par_threads() == n_workers, 2004 "Should not have been reset"); 2005 g1h->workers()->run_task(&g1_par_count_task); 2006 // Done with the parallel phase so reset to 0. 2007 g1h->set_par_threads(0); 2008 2009 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 2010 "sanity check"); 2011 } else { 2012 n_workers = 1; 2013 g1_par_count_task.work(0); 2014 } 2015 2016 if (VerifyDuringGC) { 2017 // Verify that the counting data accumulated during marking matches 2018 // that calculated by walking the marking bitmap. 2019 2020 // Bitmaps to hold expected values 2021 BitMap expected_region_bm(_region_bm.size(), false); 2022 BitMap expected_card_bm(_card_bm.size(), false); 2023 2024 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 2025 &_region_bm, 2026 &_card_bm, 2027 &expected_region_bm, 2028 &expected_card_bm); 2029 2030 if (G1CollectedHeap::use_parallel_gc_threads()) { 2031 g1h->set_par_threads((int)n_workers); 2032 g1h->workers()->run_task(&g1_par_verify_task); 2033 // Done with the parallel phase so reset to 0. 2034 g1h->set_par_threads(0); 2035 2036 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 2037 "sanity check"); 2038 } else { 2039 g1_par_verify_task.work(0); 2040 } 2041 2042 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 2043 } 2044 2045 size_t start_used_bytes = g1h->used(); 2046 g1h->set_marking_complete(); 2047 2048 double count_end = os::elapsedTime(); 2049 double this_final_counting_time = (count_end - start); 2050 _total_counting_time += this_final_counting_time; 2051 2052 if (G1PrintRegionLivenessInfo) { 2053 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 2054 _g1h->heap_region_iterate(&cl); 2055 } 2056 2057 // Install newly created mark bitMap as "prev". 2058 swapMarkBitMaps(); 2059 2060 g1h->reset_gc_time_stamp(); 2061 2062 // Note end of marking in all heap regions. 2063 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 2064 if (G1CollectedHeap::use_parallel_gc_threads()) { 2065 g1h->set_par_threads((int)n_workers); 2066 g1h->workers()->run_task(&g1_par_note_end_task); 2067 g1h->set_par_threads(0); 2068 2069 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 2070 "sanity check"); 2071 } else { 2072 g1_par_note_end_task.work(0); 2073 } 2074 g1h->check_gc_time_stamps(); 2075 2076 if (!cleanup_list_is_empty()) { 2077 // The cleanup list is not empty, so we'll have to process it 2078 // concurrently. Notify anyone else that might be wanting free 2079 // regions that there will be more free regions coming soon. 2080 g1h->set_free_regions_coming(); 2081 } 2082 2083 // call below, since it affects the metric by which we sort the heap 2084 // regions. 2085 if (G1ScrubRemSets) { 2086 double rs_scrub_start = os::elapsedTime(); 2087 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 2088 if (G1CollectedHeap::use_parallel_gc_threads()) { 2089 g1h->set_par_threads((int)n_workers); 2090 g1h->workers()->run_task(&g1_par_scrub_rs_task); 2091 g1h->set_par_threads(0); 2092 2093 assert(g1h->check_heap_region_claim_values( 2094 HeapRegion::ScrubRemSetClaimValue), 2095 "sanity check"); 2096 } else { 2097 g1_par_scrub_rs_task.work(0); 2098 } 2099 2100 double rs_scrub_end = os::elapsedTime(); 2101 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 2102 _total_rs_scrub_time += this_rs_scrub_time; 2103 } 2104 2105 // this will also free any regions totally full of garbage objects, 2106 // and sort the regions. 2107 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 2108 2109 // Statistics. 2110 double end = os::elapsedTime(); 2111 _cleanup_times.add((end - start) * 1000.0); 2112 2113 if (G1Log::fine()) { 2114 g1h->print_size_transition(gclog_or_tty, 2115 start_used_bytes, 2116 g1h->used(), 2117 g1h->capacity()); 2118 } 2119 2120 // Clean up will have freed any regions completely full of garbage. 2121 // Update the soft reference policy with the new heap occupancy. 2122 Universe::update_heap_info_at_gc(); 2123 2124 // We need to make this be a "collection" so any collection pause that 2125 // races with it goes around and waits for completeCleanup to finish. 2126 g1h->increment_total_collections(); 2127 2128 // We reclaimed old regions so we should calculate the sizes to make 2129 // sure we update the old gen/space data. 2130 g1h->g1mm()->update_sizes(); 2131 2132 if (VerifyDuringGC) { 2133 HandleMark hm; // handle scope 2134 Universe::heap()->prepare_for_verify(); 2135 Universe::verify(VerifyOption_G1UsePrevMarking, 2136 " VerifyDuringGC:(after)"); 2137 } 2138 2139 g1h->verify_region_sets_optional(); 2140 g1h->trace_heap_after_concurrent_cycle(); 2141 } 2142 2143 void ConcurrentMark::completeCleanup() { 2144 if (has_aborted()) return; 2145 2146 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2147 2148 _cleanup_list.verify_optional(); 2149 FreeRegionList tmp_free_list("Tmp Free List"); 2150 2151 if (G1ConcRegionFreeingVerbose) { 2152 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2153 "cleanup list has %u entries", 2154 _cleanup_list.length()); 2155 } 2156 2157 // Noone else should be accessing the _cleanup_list at this point, 2158 // so it's not necessary to take any locks 2159 while (!_cleanup_list.is_empty()) { 2160 HeapRegion* hr = _cleanup_list.remove_head(); 2161 assert(hr != NULL, "the list was not empty"); 2162 hr->par_clear(); 2163 tmp_free_list.add_as_tail(hr); 2164 2165 // Instead of adding one region at a time to the secondary_free_list, 2166 // we accumulate them in the local list and move them a few at a 2167 // time. This also cuts down on the number of notify_all() calls 2168 // we do during this process. We'll also append the local list when 2169 // _cleanup_list is empty (which means we just removed the last 2170 // region from the _cleanup_list). 2171 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2172 _cleanup_list.is_empty()) { 2173 if (G1ConcRegionFreeingVerbose) { 2174 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2175 "appending %u entries to the secondary_free_list, " 2176 "cleanup list still has %u entries", 2177 tmp_free_list.length(), 2178 _cleanup_list.length()); 2179 } 2180 2181 { 2182 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2183 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 2184 SecondaryFreeList_lock->notify_all(); 2185 } 2186 2187 if (G1StressConcRegionFreeing) { 2188 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2189 os::sleep(Thread::current(), (jlong) 1, false); 2190 } 2191 } 2192 } 2193 } 2194 assert(tmp_free_list.is_empty(), "post-condition"); 2195 } 2196 2197 // Supporting Object and Oop closures for reference discovery 2198 // and processing in during marking 2199 2200 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2201 HeapWord* addr = (HeapWord*)obj; 2202 return addr != NULL && 2203 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2204 } 2205 2206 // 'Keep Alive' oop closure used by both serial parallel reference processing. 2207 // Uses the CMTask associated with a worker thread (for serial reference 2208 // processing the CMTask for worker 0 is used) to preserve (mark) and 2209 // trace referent objects. 2210 // 2211 // Using the CMTask and embedded local queues avoids having the worker 2212 // threads operating on the global mark stack. This reduces the risk 2213 // of overflowing the stack - which we would rather avoid at this late 2214 // state. Also using the tasks' local queues removes the potential 2215 // of the workers interfering with each other that could occur if 2216 // operating on the global stack. 2217 2218 class G1CMKeepAliveAndDrainClosure: public OopClosure { 2219 ConcurrentMark* _cm; 2220 CMTask* _task; 2221 int _ref_counter_limit; 2222 int _ref_counter; 2223 bool _is_serial; 2224 public: 2225 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2226 _cm(cm), _task(task), _is_serial(is_serial), 2227 _ref_counter_limit(G1RefProcDrainInterval) { 2228 assert(_ref_counter_limit > 0, "sanity"); 2229 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2230 _ref_counter = _ref_counter_limit; 2231 } 2232 2233 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2234 virtual void do_oop( oop* p) { do_oop_work(p); } 2235 2236 template <class T> void do_oop_work(T* p) { 2237 if (!_cm->has_overflown()) { 2238 oop obj = oopDesc::load_decode_heap_oop(p); 2239 if (_cm->verbose_high()) { 2240 gclog_or_tty->print_cr("\t[%u] we're looking at location " 2241 "*"PTR_FORMAT" = "PTR_FORMAT, 2242 _task->worker_id(), p, (void*) obj); 2243 } 2244 2245 _task->deal_with_reference(obj); 2246 _ref_counter--; 2247 2248 if (_ref_counter == 0) { 2249 // We have dealt with _ref_counter_limit references, pushing them 2250 // and objects reachable from them on to the local stack (and 2251 // possibly the global stack). Call CMTask::do_marking_step() to 2252 // process these entries. 2253 // 2254 // We call CMTask::do_marking_step() in a loop, which we'll exit if 2255 // there's nothing more to do (i.e. we're done with the entries that 2256 // were pushed as a result of the CMTask::deal_with_reference() calls 2257 // above) or we overflow. 2258 // 2259 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2260 // flag while there may still be some work to do. (See the comment at 2261 // the beginning of CMTask::do_marking_step() for those conditions - 2262 // one of which is reaching the specified time target.) It is only 2263 // when CMTask::do_marking_step() returns without setting the 2264 // has_aborted() flag that the marking step has completed. 2265 do { 2266 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2267 _task->do_marking_step(mark_step_duration_ms, 2268 false /* do_termination */, 2269 _is_serial); 2270 } while (_task->has_aborted() && !_cm->has_overflown()); 2271 _ref_counter = _ref_counter_limit; 2272 } 2273 } else { 2274 if (_cm->verbose_high()) { 2275 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id()); 2276 } 2277 } 2278 } 2279 }; 2280 2281 // 'Drain' oop closure used by both serial and parallel reference processing. 2282 // Uses the CMTask associated with a given worker thread (for serial 2283 // reference processing the CMtask for worker 0 is used). Calls the 2284 // do_marking_step routine, with an unbelievably large timeout value, 2285 // to drain the marking data structures of the remaining entries 2286 // added by the 'keep alive' oop closure above. 2287 2288 class G1CMDrainMarkingStackClosure: public VoidClosure { 2289 ConcurrentMark* _cm; 2290 CMTask* _task; 2291 bool _is_serial; 2292 public: 2293 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2294 _cm(cm), _task(task), _is_serial(is_serial) { 2295 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2296 } 2297 2298 void do_void() { 2299 do { 2300 if (_cm->verbose_high()) { 2301 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s", 2302 _task->worker_id(), BOOL_TO_STR(_is_serial)); 2303 } 2304 2305 // We call CMTask::do_marking_step() to completely drain the local 2306 // and global marking stacks of entries pushed by the 'keep alive' 2307 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2308 // 2309 // CMTask::do_marking_step() is called in a loop, which we'll exit 2310 // if there's nothing more to do (i.e. we've completely drained the 2311 // entries that were pushed as a a result of applying the 'keep alive' 2312 // closure to the entries on the discovered ref lists) or we overflow 2313 // the global marking stack. 2314 // 2315 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2316 // flag while there may still be some work to do. (See the comment at 2317 // the beginning of CMTask::do_marking_step() for those conditions - 2318 // one of which is reaching the specified time target.) It is only 2319 // when CMTask::do_marking_step() returns without setting the 2320 // has_aborted() flag that the marking step has completed. 2321 2322 _task->do_marking_step(1000000000.0 /* something very large */, 2323 true /* do_termination */, 2324 _is_serial); 2325 } while (_task->has_aborted() && !_cm->has_overflown()); 2326 } 2327 }; 2328 2329 // Implementation of AbstractRefProcTaskExecutor for parallel 2330 // reference processing at the end of G1 concurrent marking 2331 2332 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2333 private: 2334 G1CollectedHeap* _g1h; 2335 ConcurrentMark* _cm; 2336 WorkGang* _workers; 2337 int _active_workers; 2338 2339 public: 2340 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2341 ConcurrentMark* cm, 2342 WorkGang* workers, 2343 int n_workers) : 2344 _g1h(g1h), _cm(cm), 2345 _workers(workers), _active_workers(n_workers) { } 2346 2347 // Executes the given task using concurrent marking worker threads. 2348 virtual void execute(ProcessTask& task); 2349 virtual void execute(EnqueueTask& task); 2350 }; 2351 2352 class G1CMRefProcTaskProxy: public AbstractGangTask { 2353 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2354 ProcessTask& _proc_task; 2355 G1CollectedHeap* _g1h; 2356 ConcurrentMark* _cm; 2357 2358 public: 2359 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2360 G1CollectedHeap* g1h, 2361 ConcurrentMark* cm) : 2362 AbstractGangTask("Process reference objects in parallel"), 2363 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2364 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2365 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2366 } 2367 2368 virtual void work(uint worker_id) { 2369 CMTask* task = _cm->task(worker_id); 2370 G1CMIsAliveClosure g1_is_alive(_g1h); 2371 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2372 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2373 2374 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2375 } 2376 }; 2377 2378 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2379 assert(_workers != NULL, "Need parallel worker threads."); 2380 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2381 2382 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2383 2384 // We need to reset the concurrency level before each 2385 // proxy task execution, so that the termination protocol 2386 // and overflow handling in CMTask::do_marking_step() knows 2387 // how many workers to wait for. 2388 _cm->set_concurrency(_active_workers); 2389 _g1h->set_par_threads(_active_workers); 2390 _workers->run_task(&proc_task_proxy); 2391 _g1h->set_par_threads(0); 2392 } 2393 2394 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2395 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2396 EnqueueTask& _enq_task; 2397 2398 public: 2399 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2400 AbstractGangTask("Enqueue reference objects in parallel"), 2401 _enq_task(enq_task) { } 2402 2403 virtual void work(uint worker_id) { 2404 _enq_task.work(worker_id); 2405 } 2406 }; 2407 2408 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2409 assert(_workers != NULL, "Need parallel worker threads."); 2410 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2411 2412 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2413 2414 // Not strictly necessary but... 2415 // 2416 // We need to reset the concurrency level before each 2417 // proxy task execution, so that the termination protocol 2418 // and overflow handling in CMTask::do_marking_step() knows 2419 // how many workers to wait for. 2420 _cm->set_concurrency(_active_workers); 2421 _g1h->set_par_threads(_active_workers); 2422 _workers->run_task(&enq_task_proxy); 2423 _g1h->set_par_threads(0); 2424 } 2425 2426 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2427 if (has_overflown()) { 2428 // Skip processing the discovered references if we have 2429 // overflown the global marking stack. Reference objects 2430 // only get discovered once so it is OK to not 2431 // de-populate the discovered reference lists. We could have, 2432 // but the only benefit would be that, when marking restarts, 2433 // less reference objects are discovered. 2434 return; 2435 } 2436 2437 ResourceMark rm; 2438 HandleMark hm; 2439 2440 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2441 2442 // Is alive closure. 2443 G1CMIsAliveClosure g1_is_alive(g1h); 2444 2445 // Inner scope to exclude the cleaning of the string and symbol 2446 // tables from the displayed time. 2447 { 2448 if (G1Log::finer()) { 2449 gclog_or_tty->put(' '); 2450 } 2451 GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm()); 2452 2453 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2454 2455 // See the comment in G1CollectedHeap::ref_processing_init() 2456 // about how reference processing currently works in G1. 2457 2458 // Set the soft reference policy 2459 rp->setup_policy(clear_all_soft_refs); 2460 assert(_markStack.isEmpty(), "mark stack should be empty"); 2461 2462 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2463 // in serial reference processing. Note these closures are also 2464 // used for serially processing (by the the current thread) the 2465 // JNI references during parallel reference processing. 2466 // 2467 // These closures do not need to synchronize with the worker 2468 // threads involved in parallel reference processing as these 2469 // instances are executed serially by the current thread (e.g. 2470 // reference processing is not multi-threaded and is thus 2471 // performed by the current thread instead of a gang worker). 2472 // 2473 // The gang tasks involved in parallel reference processing create 2474 // their own instances of these closures, which do their own 2475 // synchronization among themselves. 2476 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2477 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2478 2479 // We need at least one active thread. If reference processing 2480 // is not multi-threaded we use the current (VMThread) thread, 2481 // otherwise we use the work gang from the G1CollectedHeap and 2482 // we utilize all the worker threads we can. 2483 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL; 2484 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2485 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2486 2487 // Parallel processing task executor. 2488 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2489 g1h->workers(), active_workers); 2490 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2491 2492 // Set the concurrency level. The phase was already set prior to 2493 // executing the remark task. 2494 set_concurrency(active_workers); 2495 2496 // Set the degree of MT processing here. If the discovery was done MT, 2497 // the number of threads involved during discovery could differ from 2498 // the number of active workers. This is OK as long as the discovered 2499 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2500 rp->set_active_mt_degree(active_workers); 2501 2502 // Process the weak references. 2503 const ReferenceProcessorStats& stats = 2504 rp->process_discovered_references(&g1_is_alive, 2505 &g1_keep_alive, 2506 &g1_drain_mark_stack, 2507 executor, 2508 g1h->gc_timer_cm()); 2509 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2510 2511 // The do_oop work routines of the keep_alive and drain_marking_stack 2512 // oop closures will set the has_overflown flag if we overflow the 2513 // global marking stack. 2514 2515 assert(_markStack.overflow() || _markStack.isEmpty(), 2516 "mark stack should be empty (unless it overflowed)"); 2517 2518 if (_markStack.overflow()) { 2519 // This should have been done already when we tried to push an 2520 // entry on to the global mark stack. But let's do it again. 2521 set_has_overflown(); 2522 } 2523 2524 assert(rp->num_q() == active_workers, "why not"); 2525 2526 rp->enqueue_discovered_references(executor); 2527 2528 rp->verify_no_references_recorded(); 2529 assert(!rp->discovery_enabled(), "Post condition"); 2530 } 2531 2532 if (has_overflown()) { 2533 // We can not trust g1_is_alive if the marking stack overflowed 2534 return; 2535 } 2536 2537 // Unlink stale oops in string deduplication queue/table 2538 StringDedup::unlink(&g1_is_alive); 2539 2540 g1h->unlink_string_and_symbol_table(&g1_is_alive, 2541 /* process_strings */ false, // currently strings are always roots 2542 /* process_symbols */ true); 2543 } 2544 2545 void ConcurrentMark::swapMarkBitMaps() { 2546 CMBitMapRO* temp = _prevMarkBitMap; 2547 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2548 _nextMarkBitMap = (CMBitMap*) temp; 2549 } 2550 2551 class CMRemarkTask: public AbstractGangTask { 2552 private: 2553 ConcurrentMark* _cm; 2554 bool _is_serial; 2555 public: 2556 void work(uint worker_id) { 2557 // Since all available tasks are actually started, we should 2558 // only proceed if we're supposed to be active. 2559 if (worker_id < _cm->active_tasks()) { 2560 CMTask* task = _cm->task(worker_id); 2561 task->record_start_time(); 2562 do { 2563 task->do_marking_step(1000000000.0 /* something very large */, 2564 true /* do_termination */, 2565 _is_serial); 2566 } while (task->has_aborted() && !_cm->has_overflown()); 2567 // If we overflow, then we do not want to restart. We instead 2568 // want to abort remark and do concurrent marking again. 2569 task->record_end_time(); 2570 } 2571 } 2572 2573 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) : 2574 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) { 2575 _cm->terminator()->reset_for_reuse(active_workers); 2576 } 2577 }; 2578 2579 void ConcurrentMark::checkpointRootsFinalWork() { 2580 ResourceMark rm; 2581 HandleMark hm; 2582 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2583 2584 g1h->ensure_parsability(false); 2585 2586 if (G1CollectedHeap::use_parallel_gc_threads()) { 2587 G1CollectedHeap::StrongRootsScope srs(g1h); 2588 // this is remark, so we'll use up all active threads 2589 uint active_workers = g1h->workers()->active_workers(); 2590 if (active_workers == 0) { 2591 assert(active_workers > 0, "Should have been set earlier"); 2592 active_workers = (uint) ParallelGCThreads; 2593 g1h->workers()->set_active_workers(active_workers); 2594 } 2595 set_concurrency_and_phase(active_workers, false /* concurrent */); 2596 // Leave _parallel_marking_threads at it's 2597 // value originally calculated in the ConcurrentMark 2598 // constructor and pass values of the active workers 2599 // through the gang in the task. 2600 2601 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */); 2602 // We will start all available threads, even if we decide that the 2603 // active_workers will be fewer. The extra ones will just bail out 2604 // immediately. 2605 g1h->set_par_threads(active_workers); 2606 g1h->workers()->run_task(&remarkTask); 2607 g1h->set_par_threads(0); 2608 } else { 2609 G1CollectedHeap::StrongRootsScope srs(g1h); 2610 uint active_workers = 1; 2611 set_concurrency_and_phase(active_workers, false /* concurrent */); 2612 2613 // Note - if there's no work gang then the VMThread will be 2614 // the thread to execute the remark - serially. We have 2615 // to pass true for the is_serial parameter so that 2616 // CMTask::do_marking_step() doesn't enter the sync 2617 // barriers in the event of an overflow. Doing so will 2618 // cause an assert that the current thread is not a 2619 // concurrent GC thread. 2620 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/); 2621 remarkTask.work(0); 2622 } 2623 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2624 guarantee(has_overflown() || 2625 satb_mq_set.completed_buffers_num() == 0, 2626 err_msg("Invariant: has_overflown = %s, num buffers = %d", 2627 BOOL_TO_STR(has_overflown()), 2628 satb_mq_set.completed_buffers_num())); 2629 2630 print_stats(); 2631 } 2632 2633 #ifndef PRODUCT 2634 2635 class PrintReachableOopClosure: public OopClosure { 2636 private: 2637 G1CollectedHeap* _g1h; 2638 outputStream* _out; 2639 VerifyOption _vo; 2640 bool _all; 2641 2642 public: 2643 PrintReachableOopClosure(outputStream* out, 2644 VerifyOption vo, 2645 bool all) : 2646 _g1h(G1CollectedHeap::heap()), 2647 _out(out), _vo(vo), _all(all) { } 2648 2649 void do_oop(narrowOop* p) { do_oop_work(p); } 2650 void do_oop( oop* p) { do_oop_work(p); } 2651 2652 template <class T> void do_oop_work(T* p) { 2653 oop obj = oopDesc::load_decode_heap_oop(p); 2654 const char* str = NULL; 2655 const char* str2 = ""; 2656 2657 if (obj == NULL) { 2658 str = ""; 2659 } else if (!_g1h->is_in_g1_reserved(obj)) { 2660 str = " O"; 2661 } else { 2662 HeapRegion* hr = _g1h->heap_region_containing(obj); 2663 guarantee(hr != NULL, "invariant"); 2664 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2665 bool marked = _g1h->is_marked(obj, _vo); 2666 2667 if (over_tams) { 2668 str = " >"; 2669 if (marked) { 2670 str2 = " AND MARKED"; 2671 } 2672 } else if (marked) { 2673 str = " M"; 2674 } else { 2675 str = " NOT"; 2676 } 2677 } 2678 2679 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2680 p, (void*) obj, str, str2); 2681 } 2682 }; 2683 2684 class PrintReachableObjectClosure : public ObjectClosure { 2685 private: 2686 G1CollectedHeap* _g1h; 2687 outputStream* _out; 2688 VerifyOption _vo; 2689 bool _all; 2690 HeapRegion* _hr; 2691 2692 public: 2693 PrintReachableObjectClosure(outputStream* out, 2694 VerifyOption vo, 2695 bool all, 2696 HeapRegion* hr) : 2697 _g1h(G1CollectedHeap::heap()), 2698 _out(out), _vo(vo), _all(all), _hr(hr) { } 2699 2700 void do_object(oop o) { 2701 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2702 bool marked = _g1h->is_marked(o, _vo); 2703 bool print_it = _all || over_tams || marked; 2704 2705 if (print_it) { 2706 _out->print_cr(" "PTR_FORMAT"%s", 2707 (void *)o, (over_tams) ? " >" : (marked) ? " M" : ""); 2708 PrintReachableOopClosure oopCl(_out, _vo, _all); 2709 o->oop_iterate_no_header(&oopCl); 2710 } 2711 } 2712 }; 2713 2714 class PrintReachableRegionClosure : public HeapRegionClosure { 2715 private: 2716 G1CollectedHeap* _g1h; 2717 outputStream* _out; 2718 VerifyOption _vo; 2719 bool _all; 2720 2721 public: 2722 bool doHeapRegion(HeapRegion* hr) { 2723 HeapWord* b = hr->bottom(); 2724 HeapWord* e = hr->end(); 2725 HeapWord* t = hr->top(); 2726 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2727 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2728 "TAMS: "PTR_FORMAT, b, e, t, p); 2729 _out->cr(); 2730 2731 HeapWord* from = b; 2732 HeapWord* to = t; 2733 2734 if (to > from) { 2735 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2736 _out->cr(); 2737 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2738 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2739 _out->cr(); 2740 } 2741 2742 return false; 2743 } 2744 2745 PrintReachableRegionClosure(outputStream* out, 2746 VerifyOption vo, 2747 bool all) : 2748 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2749 }; 2750 2751 void ConcurrentMark::print_reachable(const char* str, 2752 VerifyOption vo, 2753 bool all) { 2754 gclog_or_tty->cr(); 2755 gclog_or_tty->print_cr("== Doing heap dump... "); 2756 2757 if (G1PrintReachableBaseFile == NULL) { 2758 gclog_or_tty->print_cr(" #### error: no base file defined"); 2759 return; 2760 } 2761 2762 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2763 (JVM_MAXPATHLEN - 1)) { 2764 gclog_or_tty->print_cr(" #### error: file name too long"); 2765 return; 2766 } 2767 2768 char file_name[JVM_MAXPATHLEN]; 2769 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2770 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2771 2772 fileStream fout(file_name); 2773 if (!fout.is_open()) { 2774 gclog_or_tty->print_cr(" #### error: could not open file"); 2775 return; 2776 } 2777 2778 outputStream* out = &fout; 2779 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2780 out->cr(); 2781 2782 out->print_cr("--- ITERATING OVER REGIONS"); 2783 out->cr(); 2784 PrintReachableRegionClosure rcl(out, vo, all); 2785 _g1h->heap_region_iterate(&rcl); 2786 out->cr(); 2787 2788 gclog_or_tty->print_cr(" done"); 2789 gclog_or_tty->flush(); 2790 } 2791 2792 #endif // PRODUCT 2793 2794 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2795 // Note we are overriding the read-only view of the prev map here, via 2796 // the cast. 2797 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2798 } 2799 2800 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2801 _nextMarkBitMap->clearRange(mr); 2802 } 2803 2804 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2805 clearRangePrevBitmap(mr); 2806 clearRangeNextBitmap(mr); 2807 } 2808 2809 HeapRegion* 2810 ConcurrentMark::claim_region(uint worker_id) { 2811 // "checkpoint" the finger 2812 HeapWord* finger = _finger; 2813 2814 // _heap_end will not change underneath our feet; it only changes at 2815 // yield points. 2816 while (finger < _heap_end) { 2817 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2818 2819 // Note on how this code handles humongous regions. In the 2820 // normal case the finger will reach the start of a "starts 2821 // humongous" (SH) region. Its end will either be the end of the 2822 // last "continues humongous" (CH) region in the sequence, or the 2823 // standard end of the SH region (if the SH is the only region in 2824 // the sequence). That way claim_region() will skip over the CH 2825 // regions. However, there is a subtle race between a CM thread 2826 // executing this method and a mutator thread doing a humongous 2827 // object allocation. The two are not mutually exclusive as the CM 2828 // thread does not need to hold the Heap_lock when it gets 2829 // here. So there is a chance that claim_region() will come across 2830 // a free region that's in the progress of becoming a SH or a CH 2831 // region. In the former case, it will either 2832 // a) Miss the update to the region's end, in which case it will 2833 // visit every subsequent CH region, will find their bitmaps 2834 // empty, and do nothing, or 2835 // b) Will observe the update of the region's end (in which case 2836 // it will skip the subsequent CH regions). 2837 // If it comes across a region that suddenly becomes CH, the 2838 // scenario will be similar to b). So, the race between 2839 // claim_region() and a humongous object allocation might force us 2840 // to do a bit of unnecessary work (due to some unnecessary bitmap 2841 // iterations) but it should not introduce and correctness issues. 2842 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2843 HeapWord* bottom = curr_region->bottom(); 2844 HeapWord* end = curr_region->end(); 2845 HeapWord* limit = curr_region->next_top_at_mark_start(); 2846 2847 if (verbose_low()) { 2848 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" " 2849 "["PTR_FORMAT", "PTR_FORMAT"), " 2850 "limit = "PTR_FORMAT, 2851 worker_id, curr_region, bottom, end, limit); 2852 } 2853 2854 // Is the gap between reading the finger and doing the CAS too long? 2855 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2856 if (res == finger) { 2857 // we succeeded 2858 2859 // notice that _finger == end cannot be guaranteed here since, 2860 // someone else might have moved the finger even further 2861 assert(_finger >= end, "the finger should have moved forward"); 2862 2863 if (verbose_low()) { 2864 gclog_or_tty->print_cr("[%u] we were successful with region = " 2865 PTR_FORMAT, worker_id, curr_region); 2866 } 2867 2868 if (limit > bottom) { 2869 if (verbose_low()) { 2870 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, " 2871 "returning it ", worker_id, curr_region); 2872 } 2873 return curr_region; 2874 } else { 2875 assert(limit == bottom, 2876 "the region limit should be at bottom"); 2877 if (verbose_low()) { 2878 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, " 2879 "returning NULL", worker_id, curr_region); 2880 } 2881 // we return NULL and the caller should try calling 2882 // claim_region() again. 2883 return NULL; 2884 } 2885 } else { 2886 assert(_finger > finger, "the finger should have moved forward"); 2887 if (verbose_low()) { 2888 gclog_or_tty->print_cr("[%u] somebody else moved the finger, " 2889 "global finger = "PTR_FORMAT", " 2890 "our finger = "PTR_FORMAT, 2891 worker_id, _finger, finger); 2892 } 2893 2894 // read it again 2895 finger = _finger; 2896 } 2897 } 2898 2899 return NULL; 2900 } 2901 2902 #ifndef PRODUCT 2903 enum VerifyNoCSetOopsPhase { 2904 VerifyNoCSetOopsStack, 2905 VerifyNoCSetOopsQueues, 2906 VerifyNoCSetOopsSATBCompleted, 2907 VerifyNoCSetOopsSATBThread 2908 }; 2909 2910 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 2911 private: 2912 G1CollectedHeap* _g1h; 2913 VerifyNoCSetOopsPhase _phase; 2914 int _info; 2915 2916 const char* phase_str() { 2917 switch (_phase) { 2918 case VerifyNoCSetOopsStack: return "Stack"; 2919 case VerifyNoCSetOopsQueues: return "Queue"; 2920 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 2921 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 2922 default: ShouldNotReachHere(); 2923 } 2924 return NULL; 2925 } 2926 2927 void do_object_work(oop obj) { 2928 guarantee(!_g1h->obj_in_cs(obj), 2929 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 2930 (void*) obj, phase_str(), _info)); 2931 } 2932 2933 public: 2934 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 2935 2936 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 2937 _phase = phase; 2938 _info = info; 2939 } 2940 2941 virtual void do_oop(oop* p) { 2942 oop obj = oopDesc::load_decode_heap_oop(p); 2943 do_object_work(obj); 2944 } 2945 2946 virtual void do_oop(narrowOop* p) { 2947 // We should not come across narrow oops while scanning marking 2948 // stacks and SATB buffers. 2949 ShouldNotReachHere(); 2950 } 2951 2952 virtual void do_object(oop obj) { 2953 do_object_work(obj); 2954 } 2955 }; 2956 2957 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 2958 bool verify_enqueued_buffers, 2959 bool verify_thread_buffers, 2960 bool verify_fingers) { 2961 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2962 if (!G1CollectedHeap::heap()->mark_in_progress()) { 2963 return; 2964 } 2965 2966 VerifyNoCSetOopsClosure cl; 2967 2968 if (verify_stacks) { 2969 // Verify entries on the global mark stack 2970 cl.set_phase(VerifyNoCSetOopsStack); 2971 _markStack.oops_do(&cl); 2972 2973 // Verify entries on the task queues 2974 for (uint i = 0; i < _max_worker_id; i += 1) { 2975 cl.set_phase(VerifyNoCSetOopsQueues, i); 2976 CMTaskQueue* queue = _task_queues->queue(i); 2977 queue->oops_do(&cl); 2978 } 2979 } 2980 2981 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 2982 2983 // Verify entries on the enqueued SATB buffers 2984 if (verify_enqueued_buffers) { 2985 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 2986 satb_qs.iterate_completed_buffers_read_only(&cl); 2987 } 2988 2989 // Verify entries on the per-thread SATB buffers 2990 if (verify_thread_buffers) { 2991 cl.set_phase(VerifyNoCSetOopsSATBThread); 2992 satb_qs.iterate_thread_buffers_read_only(&cl); 2993 } 2994 2995 if (verify_fingers) { 2996 // Verify the global finger 2997 HeapWord* global_finger = finger(); 2998 if (global_finger != NULL && global_finger < _heap_end) { 2999 // The global finger always points to a heap region boundary. We 3000 // use heap_region_containing_raw() to get the containing region 3001 // given that the global finger could be pointing to a free region 3002 // which subsequently becomes continues humongous. If that 3003 // happens, heap_region_containing() will return the bottom of the 3004 // corresponding starts humongous region and the check below will 3005 // not hold any more. 3006 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 3007 guarantee(global_finger == global_hr->bottom(), 3008 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 3009 global_finger, HR_FORMAT_PARAMS(global_hr))); 3010 } 3011 3012 // Verify the task fingers 3013 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 3014 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 3015 CMTask* task = _tasks[i]; 3016 HeapWord* task_finger = task->finger(); 3017 if (task_finger != NULL && task_finger < _heap_end) { 3018 // See above note on the global finger verification. 3019 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 3020 guarantee(task_finger == task_hr->bottom() || 3021 !task_hr->in_collection_set(), 3022 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 3023 task_finger, HR_FORMAT_PARAMS(task_hr))); 3024 } 3025 } 3026 } 3027 } 3028 #endif // PRODUCT 3029 3030 // Aggregate the counting data that was constructed concurrently 3031 // with marking. 3032 class AggregateCountDataHRClosure: public HeapRegionClosure { 3033 G1CollectedHeap* _g1h; 3034 ConcurrentMark* _cm; 3035 CardTableModRefBS* _ct_bs; 3036 BitMap* _cm_card_bm; 3037 uint _max_worker_id; 3038 3039 public: 3040 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 3041 BitMap* cm_card_bm, 3042 uint max_worker_id) : 3043 _g1h(g1h), _cm(g1h->concurrent_mark()), 3044 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 3045 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 3046 3047 bool doHeapRegion(HeapRegion* hr) { 3048 if (hr->continuesHumongous()) { 3049 // We will ignore these here and process them when their 3050 // associated "starts humongous" region is processed. 3051 // Note that we cannot rely on their associated 3052 // "starts humongous" region to have their bit set to 1 3053 // since, due to the region chunking in the parallel region 3054 // iteration, a "continues humongous" region might be visited 3055 // before its associated "starts humongous". 3056 return false; 3057 } 3058 3059 HeapWord* start = hr->bottom(); 3060 HeapWord* limit = hr->next_top_at_mark_start(); 3061 HeapWord* end = hr->end(); 3062 3063 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 3064 err_msg("Preconditions not met - " 3065 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 3066 "top: "PTR_FORMAT", end: "PTR_FORMAT, 3067 start, limit, hr->top(), hr->end())); 3068 3069 assert(hr->next_marked_bytes() == 0, "Precondition"); 3070 3071 if (start == limit) { 3072 // NTAMS of this region has not been set so nothing to do. 3073 return false; 3074 } 3075 3076 // 'start' should be in the heap. 3077 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 3078 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 3079 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 3080 3081 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 3082 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 3083 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 3084 3085 // If ntams is not card aligned then we bump card bitmap index 3086 // for limit so that we get the all the cards spanned by 3087 // the object ending at ntams. 3088 // Note: if this is the last region in the heap then ntams 3089 // could be actually just beyond the end of the the heap; 3090 // limit_idx will then correspond to a (non-existent) card 3091 // that is also outside the heap. 3092 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 3093 limit_idx += 1; 3094 } 3095 3096 assert(limit_idx <= end_idx, "or else use atomics"); 3097 3098 // Aggregate the "stripe" in the count data associated with hr. 3099 uint hrs_index = hr->hrs_index(); 3100 size_t marked_bytes = 0; 3101 3102 for (uint i = 0; i < _max_worker_id; i += 1) { 3103 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 3104 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 3105 3106 // Fetch the marked_bytes in this region for task i and 3107 // add it to the running total for this region. 3108 marked_bytes += marked_bytes_array[hrs_index]; 3109 3110 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 3111 // into the global card bitmap. 3112 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 3113 3114 while (scan_idx < limit_idx) { 3115 assert(task_card_bm->at(scan_idx) == true, "should be"); 3116 _cm_card_bm->set_bit(scan_idx); 3117 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 3118 3119 // BitMap::get_next_one_offset() can handle the case when 3120 // its left_offset parameter is greater than its right_offset 3121 // parameter. It does, however, have an early exit if 3122 // left_offset == right_offset. So let's limit the value 3123 // passed in for left offset here. 3124 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 3125 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 3126 } 3127 } 3128 3129 // Update the marked bytes for this region. 3130 hr->add_to_marked_bytes(marked_bytes); 3131 3132 // Next heap region 3133 return false; 3134 } 3135 }; 3136 3137 class G1AggregateCountDataTask: public AbstractGangTask { 3138 protected: 3139 G1CollectedHeap* _g1h; 3140 ConcurrentMark* _cm; 3141 BitMap* _cm_card_bm; 3142 uint _max_worker_id; 3143 int _active_workers; 3144 3145 public: 3146 G1AggregateCountDataTask(G1CollectedHeap* g1h, 3147 ConcurrentMark* cm, 3148 BitMap* cm_card_bm, 3149 uint max_worker_id, 3150 int n_workers) : 3151 AbstractGangTask("Count Aggregation"), 3152 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 3153 _max_worker_id(max_worker_id), 3154 _active_workers(n_workers) { } 3155 3156 void work(uint worker_id) { 3157 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 3158 3159 if (G1CollectedHeap::use_parallel_gc_threads()) { 3160 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 3161 _active_workers, 3162 HeapRegion::AggregateCountClaimValue); 3163 } else { 3164 _g1h->heap_region_iterate(&cl); 3165 } 3166 } 3167 }; 3168 3169 3170 void ConcurrentMark::aggregate_count_data() { 3171 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3172 _g1h->workers()->active_workers() : 3173 1); 3174 3175 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3176 _max_worker_id, n_workers); 3177 3178 if (G1CollectedHeap::use_parallel_gc_threads()) { 3179 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3180 "sanity check"); 3181 _g1h->set_par_threads(n_workers); 3182 _g1h->workers()->run_task(&g1_par_agg_task); 3183 _g1h->set_par_threads(0); 3184 3185 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3186 "sanity check"); 3187 _g1h->reset_heap_region_claim_values(); 3188 } else { 3189 g1_par_agg_task.work(0); 3190 } 3191 } 3192 3193 // Clear the per-worker arrays used to store the per-region counting data 3194 void ConcurrentMark::clear_all_count_data() { 3195 // Clear the global card bitmap - it will be filled during 3196 // liveness count aggregation (during remark) and the 3197 // final counting task. 3198 _card_bm.clear(); 3199 3200 // Clear the global region bitmap - it will be filled as part 3201 // of the final counting task. 3202 _region_bm.clear(); 3203 3204 uint max_regions = _g1h->max_regions(); 3205 assert(_max_worker_id > 0, "uninitialized"); 3206 3207 for (uint i = 0; i < _max_worker_id; i += 1) { 3208 BitMap* task_card_bm = count_card_bitmap_for(i); 3209 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3210 3211 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3212 assert(marked_bytes_array != NULL, "uninitialized"); 3213 3214 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3215 task_card_bm->clear(); 3216 } 3217 } 3218 3219 void ConcurrentMark::print_stats() { 3220 if (verbose_stats()) { 3221 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3222 for (size_t i = 0; i < _active_tasks; ++i) { 3223 _tasks[i]->print_stats(); 3224 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3225 } 3226 } 3227 } 3228 3229 // abandon current marking iteration due to a Full GC 3230 void ConcurrentMark::abort() { 3231 // Clear all marks to force marking thread to do nothing 3232 _nextMarkBitMap->clearAll(); 3233 // Clear the liveness counting data 3234 clear_all_count_data(); 3235 // Empty mark stack 3236 reset_marking_state(); 3237 for (uint i = 0; i < _max_worker_id; ++i) { 3238 _tasks[i]->clear_region_fields(); 3239 } 3240 _has_aborted = true; 3241 3242 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3243 satb_mq_set.abandon_partial_marking(); 3244 // This can be called either during or outside marking, we'll read 3245 // the expected_active value from the SATB queue set. 3246 satb_mq_set.set_active_all_threads( 3247 false, /* new active value */ 3248 satb_mq_set.is_active() /* expected_active */); 3249 3250 _g1h->trace_heap_after_concurrent_cycle(); 3251 _g1h->register_concurrent_cycle_end(); 3252 } 3253 3254 static void print_ms_time_info(const char* prefix, const char* name, 3255 NumberSeq& ns) { 3256 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3257 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3258 if (ns.num() > 0) { 3259 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3260 prefix, ns.sd(), ns.maximum()); 3261 } 3262 } 3263 3264 void ConcurrentMark::print_summary_info() { 3265 gclog_or_tty->print_cr(" Concurrent marking:"); 3266 print_ms_time_info(" ", "init marks", _init_times); 3267 print_ms_time_info(" ", "remarks", _remark_times); 3268 { 3269 print_ms_time_info(" ", "final marks", _remark_mark_times); 3270 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3271 3272 } 3273 print_ms_time_info(" ", "cleanups", _cleanup_times); 3274 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3275 _total_counting_time, 3276 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3277 (double)_cleanup_times.num() 3278 : 0.0)); 3279 if (G1ScrubRemSets) { 3280 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3281 _total_rs_scrub_time, 3282 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3283 (double)_cleanup_times.num() 3284 : 0.0)); 3285 } 3286 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3287 (_init_times.sum() + _remark_times.sum() + 3288 _cleanup_times.sum())/1000.0); 3289 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3290 "(%8.2f s marking).", 3291 cmThread()->vtime_accum(), 3292 cmThread()->vtime_mark_accum()); 3293 } 3294 3295 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3296 if (use_parallel_marking_threads()) { 3297 _parallel_workers->print_worker_threads_on(st); 3298 } 3299 } 3300 3301 void ConcurrentMark::print_on_error(outputStream* st) const { 3302 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 3303 _prevMarkBitMap, _nextMarkBitMap); 3304 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 3305 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 3306 } 3307 3308 // We take a break if someone is trying to stop the world. 3309 bool ConcurrentMark::do_yield_check(uint worker_id) { 3310 if (should_yield()) { 3311 if (worker_id == 0) { 3312 _g1h->g1_policy()->record_concurrent_pause(); 3313 } 3314 cmThread()->yield(); 3315 return true; 3316 } else { 3317 return false; 3318 } 3319 } 3320 3321 bool ConcurrentMark::should_yield() { 3322 return cmThread()->should_yield(); 3323 } 3324 3325 bool ConcurrentMark::containing_card_is_marked(void* p) { 3326 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3327 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3328 } 3329 3330 bool ConcurrentMark::containing_cards_are_marked(void* start, 3331 void* last) { 3332 return containing_card_is_marked(start) && 3333 containing_card_is_marked(last); 3334 } 3335 3336 #ifndef PRODUCT 3337 // for debugging purposes 3338 void ConcurrentMark::print_finger() { 3339 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3340 _heap_start, _heap_end, _finger); 3341 for (uint i = 0; i < _max_worker_id; ++i) { 3342 gclog_or_tty->print(" %u: "PTR_FORMAT, i, _tasks[i]->finger()); 3343 } 3344 gclog_or_tty->print_cr(""); 3345 } 3346 #endif 3347 3348 void CMTask::scan_object(oop obj) { 3349 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3350 3351 if (_cm->verbose_high()) { 3352 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT, 3353 _worker_id, (void*) obj); 3354 } 3355 3356 size_t obj_size = obj->size(); 3357 _words_scanned += obj_size; 3358 3359 obj->oop_iterate(_cm_oop_closure); 3360 statsOnly( ++_objs_scanned ); 3361 check_limits(); 3362 } 3363 3364 // Closure for iteration over bitmaps 3365 class CMBitMapClosure : public BitMapClosure { 3366 private: 3367 // the bitmap that is being iterated over 3368 CMBitMap* _nextMarkBitMap; 3369 ConcurrentMark* _cm; 3370 CMTask* _task; 3371 3372 public: 3373 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3374 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3375 3376 bool do_bit(size_t offset) { 3377 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3378 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3379 assert( addr < _cm->finger(), "invariant"); 3380 3381 statsOnly( _task->increase_objs_found_on_bitmap() ); 3382 assert(addr >= _task->finger(), "invariant"); 3383 3384 // We move that task's local finger along. 3385 _task->move_finger_to(addr); 3386 3387 _task->scan_object(oop(addr)); 3388 // we only partially drain the local queue and global stack 3389 _task->drain_local_queue(true); 3390 _task->drain_global_stack(true); 3391 3392 // if the has_aborted flag has been raised, we need to bail out of 3393 // the iteration 3394 return !_task->has_aborted(); 3395 } 3396 }; 3397 3398 // Closure for iterating over objects, currently only used for 3399 // processing SATB buffers. 3400 class CMObjectClosure : public ObjectClosure { 3401 private: 3402 CMTask* _task; 3403 3404 public: 3405 void do_object(oop obj) { 3406 _task->deal_with_reference(obj); 3407 } 3408 3409 CMObjectClosure(CMTask* task) : _task(task) { } 3410 }; 3411 3412 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3413 ConcurrentMark* cm, 3414 CMTask* task) 3415 : _g1h(g1h), _cm(cm), _task(task) { 3416 assert(_ref_processor == NULL, "should be initialized to NULL"); 3417 3418 if (G1UseConcMarkReferenceProcessing) { 3419 _ref_processor = g1h->ref_processor_cm(); 3420 assert(_ref_processor != NULL, "should not be NULL"); 3421 } 3422 } 3423 3424 void CMTask::setup_for_region(HeapRegion* hr) { 3425 // Separated the asserts so that we know which one fires. 3426 assert(hr != NULL, 3427 "claim_region() should have filtered out continues humongous regions"); 3428 assert(!hr->continuesHumongous(), 3429 "claim_region() should have filtered out continues humongous regions"); 3430 3431 if (_cm->verbose_low()) { 3432 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT, 3433 _worker_id, hr); 3434 } 3435 3436 _curr_region = hr; 3437 _finger = hr->bottom(); 3438 update_region_limit(); 3439 } 3440 3441 void CMTask::update_region_limit() { 3442 HeapRegion* hr = _curr_region; 3443 HeapWord* bottom = hr->bottom(); 3444 HeapWord* limit = hr->next_top_at_mark_start(); 3445 3446 if (limit == bottom) { 3447 if (_cm->verbose_low()) { 3448 gclog_or_tty->print_cr("[%u] found an empty region " 3449 "["PTR_FORMAT", "PTR_FORMAT")", 3450 _worker_id, bottom, limit); 3451 } 3452 // The region was collected underneath our feet. 3453 // We set the finger to bottom to ensure that the bitmap 3454 // iteration that will follow this will not do anything. 3455 // (this is not a condition that holds when we set the region up, 3456 // as the region is not supposed to be empty in the first place) 3457 _finger = bottom; 3458 } else if (limit >= _region_limit) { 3459 assert(limit >= _finger, "peace of mind"); 3460 } else { 3461 assert(limit < _region_limit, "only way to get here"); 3462 // This can happen under some pretty unusual circumstances. An 3463 // evacuation pause empties the region underneath our feet (NTAMS 3464 // at bottom). We then do some allocation in the region (NTAMS 3465 // stays at bottom), followed by the region being used as a GC 3466 // alloc region (NTAMS will move to top() and the objects 3467 // originally below it will be grayed). All objects now marked in 3468 // the region are explicitly grayed, if below the global finger, 3469 // and we do not need in fact to scan anything else. So, we simply 3470 // set _finger to be limit to ensure that the bitmap iteration 3471 // doesn't do anything. 3472 _finger = limit; 3473 } 3474 3475 _region_limit = limit; 3476 } 3477 3478 void CMTask::giveup_current_region() { 3479 assert(_curr_region != NULL, "invariant"); 3480 if (_cm->verbose_low()) { 3481 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT, 3482 _worker_id, _curr_region); 3483 } 3484 clear_region_fields(); 3485 } 3486 3487 void CMTask::clear_region_fields() { 3488 // Values for these three fields that indicate that we're not 3489 // holding on to a region. 3490 _curr_region = NULL; 3491 _finger = NULL; 3492 _region_limit = NULL; 3493 } 3494 3495 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3496 if (cm_oop_closure == NULL) { 3497 assert(_cm_oop_closure != NULL, "invariant"); 3498 } else { 3499 assert(_cm_oop_closure == NULL, "invariant"); 3500 } 3501 _cm_oop_closure = cm_oop_closure; 3502 } 3503 3504 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3505 guarantee(nextMarkBitMap != NULL, "invariant"); 3506 3507 if (_cm->verbose_low()) { 3508 gclog_or_tty->print_cr("[%u] resetting", _worker_id); 3509 } 3510 3511 _nextMarkBitMap = nextMarkBitMap; 3512 clear_region_fields(); 3513 3514 _calls = 0; 3515 _elapsed_time_ms = 0.0; 3516 _termination_time_ms = 0.0; 3517 _termination_start_time_ms = 0.0; 3518 3519 #if _MARKING_STATS_ 3520 _local_pushes = 0; 3521 _local_pops = 0; 3522 _local_max_size = 0; 3523 _objs_scanned = 0; 3524 _global_pushes = 0; 3525 _global_pops = 0; 3526 _global_max_size = 0; 3527 _global_transfers_to = 0; 3528 _global_transfers_from = 0; 3529 _regions_claimed = 0; 3530 _objs_found_on_bitmap = 0; 3531 _satb_buffers_processed = 0; 3532 _steal_attempts = 0; 3533 _steals = 0; 3534 _aborted = 0; 3535 _aborted_overflow = 0; 3536 _aborted_cm_aborted = 0; 3537 _aborted_yield = 0; 3538 _aborted_timed_out = 0; 3539 _aborted_satb = 0; 3540 _aborted_termination = 0; 3541 #endif // _MARKING_STATS_ 3542 } 3543 3544 bool CMTask::should_exit_termination() { 3545 regular_clock_call(); 3546 // This is called when we are in the termination protocol. We should 3547 // quit if, for some reason, this task wants to abort or the global 3548 // stack is not empty (this means that we can get work from it). 3549 return !_cm->mark_stack_empty() || has_aborted(); 3550 } 3551 3552 void CMTask::reached_limit() { 3553 assert(_words_scanned >= _words_scanned_limit || 3554 _refs_reached >= _refs_reached_limit , 3555 "shouldn't have been called otherwise"); 3556 regular_clock_call(); 3557 } 3558 3559 void CMTask::regular_clock_call() { 3560 if (has_aborted()) return; 3561 3562 // First, we need to recalculate the words scanned and refs reached 3563 // limits for the next clock call. 3564 recalculate_limits(); 3565 3566 // During the regular clock call we do the following 3567 3568 // (1) If an overflow has been flagged, then we abort. 3569 if (_cm->has_overflown()) { 3570 set_has_aborted(); 3571 return; 3572 } 3573 3574 // If we are not concurrent (i.e. we're doing remark) we don't need 3575 // to check anything else. The other steps are only needed during 3576 // the concurrent marking phase. 3577 if (!concurrent()) return; 3578 3579 // (2) If marking has been aborted for Full GC, then we also abort. 3580 if (_cm->has_aborted()) { 3581 set_has_aborted(); 3582 statsOnly( ++_aborted_cm_aborted ); 3583 return; 3584 } 3585 3586 double curr_time_ms = os::elapsedVTime() * 1000.0; 3587 3588 // (3) If marking stats are enabled, then we update the step history. 3589 #if _MARKING_STATS_ 3590 if (_words_scanned >= _words_scanned_limit) { 3591 ++_clock_due_to_scanning; 3592 } 3593 if (_refs_reached >= _refs_reached_limit) { 3594 ++_clock_due_to_marking; 3595 } 3596 3597 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3598 _interval_start_time_ms = curr_time_ms; 3599 _all_clock_intervals_ms.add(last_interval_ms); 3600 3601 if (_cm->verbose_medium()) { 3602 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, " 3603 "scanned = %d%s, refs reached = %d%s", 3604 _worker_id, last_interval_ms, 3605 _words_scanned, 3606 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3607 _refs_reached, 3608 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3609 } 3610 #endif // _MARKING_STATS_ 3611 3612 // (4) We check whether we should yield. If we have to, then we abort. 3613 if (_cm->should_yield()) { 3614 // We should yield. To do this we abort the task. The caller is 3615 // responsible for yielding. 3616 set_has_aborted(); 3617 statsOnly( ++_aborted_yield ); 3618 return; 3619 } 3620 3621 // (5) We check whether we've reached our time quota. If we have, 3622 // then we abort. 3623 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3624 if (elapsed_time_ms > _time_target_ms) { 3625 set_has_aborted(); 3626 _has_timed_out = true; 3627 statsOnly( ++_aborted_timed_out ); 3628 return; 3629 } 3630 3631 // (6) Finally, we check whether there are enough completed STAB 3632 // buffers available for processing. If there are, we abort. 3633 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3634 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3635 if (_cm->verbose_low()) { 3636 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers", 3637 _worker_id); 3638 } 3639 // we do need to process SATB buffers, we'll abort and restart 3640 // the marking task to do so 3641 set_has_aborted(); 3642 statsOnly( ++_aborted_satb ); 3643 return; 3644 } 3645 } 3646 3647 void CMTask::recalculate_limits() { 3648 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3649 _words_scanned_limit = _real_words_scanned_limit; 3650 3651 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3652 _refs_reached_limit = _real_refs_reached_limit; 3653 } 3654 3655 void CMTask::decrease_limits() { 3656 // This is called when we believe that we're going to do an infrequent 3657 // operation which will increase the per byte scanned cost (i.e. move 3658 // entries to/from the global stack). It basically tries to decrease the 3659 // scanning limit so that the clock is called earlier. 3660 3661 if (_cm->verbose_medium()) { 3662 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id); 3663 } 3664 3665 _words_scanned_limit = _real_words_scanned_limit - 3666 3 * words_scanned_period / 4; 3667 _refs_reached_limit = _real_refs_reached_limit - 3668 3 * refs_reached_period / 4; 3669 } 3670 3671 void CMTask::move_entries_to_global_stack() { 3672 // local array where we'll store the entries that will be popped 3673 // from the local queue 3674 oop buffer[global_stack_transfer_size]; 3675 3676 int n = 0; 3677 oop obj; 3678 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3679 buffer[n] = obj; 3680 ++n; 3681 } 3682 3683 if (n > 0) { 3684 // we popped at least one entry from the local queue 3685 3686 statsOnly( ++_global_transfers_to; _local_pops += n ); 3687 3688 if (!_cm->mark_stack_push(buffer, n)) { 3689 if (_cm->verbose_low()) { 3690 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow", 3691 _worker_id); 3692 } 3693 set_has_aborted(); 3694 } else { 3695 // the transfer was successful 3696 3697 if (_cm->verbose_medium()) { 3698 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack", 3699 _worker_id, n); 3700 } 3701 statsOnly( int tmp_size = _cm->mark_stack_size(); 3702 if (tmp_size > _global_max_size) { 3703 _global_max_size = tmp_size; 3704 } 3705 _global_pushes += n ); 3706 } 3707 } 3708 3709 // this operation was quite expensive, so decrease the limits 3710 decrease_limits(); 3711 } 3712 3713 void CMTask::get_entries_from_global_stack() { 3714 // local array where we'll store the entries that will be popped 3715 // from the global stack. 3716 oop buffer[global_stack_transfer_size]; 3717 int n; 3718 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3719 assert(n <= global_stack_transfer_size, 3720 "we should not pop more than the given limit"); 3721 if (n > 0) { 3722 // yes, we did actually pop at least one entry 3723 3724 statsOnly( ++_global_transfers_from; _global_pops += n ); 3725 if (_cm->verbose_medium()) { 3726 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack", 3727 _worker_id, n); 3728 } 3729 for (int i = 0; i < n; ++i) { 3730 bool success = _task_queue->push(buffer[i]); 3731 // We only call this when the local queue is empty or under a 3732 // given target limit. So, we do not expect this push to fail. 3733 assert(success, "invariant"); 3734 } 3735 3736 statsOnly( int tmp_size = _task_queue->size(); 3737 if (tmp_size > _local_max_size) { 3738 _local_max_size = tmp_size; 3739 } 3740 _local_pushes += n ); 3741 } 3742 3743 // this operation was quite expensive, so decrease the limits 3744 decrease_limits(); 3745 } 3746 3747 void CMTask::drain_local_queue(bool partially) { 3748 if (has_aborted()) return; 3749 3750 // Decide what the target size is, depending whether we're going to 3751 // drain it partially (so that other tasks can steal if they run out 3752 // of things to do) or totally (at the very end). 3753 size_t target_size; 3754 if (partially) { 3755 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3756 } else { 3757 target_size = 0; 3758 } 3759 3760 if (_task_queue->size() > target_size) { 3761 if (_cm->verbose_high()) { 3762 gclog_or_tty->print_cr("[%u] draining local queue, target size = %d", 3763 _worker_id, target_size); 3764 } 3765 3766 oop obj; 3767 bool ret = _task_queue->pop_local(obj); 3768 while (ret) { 3769 statsOnly( ++_local_pops ); 3770 3771 if (_cm->verbose_high()) { 3772 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id, 3773 (void*) obj); 3774 } 3775 3776 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3777 assert(!_g1h->is_on_master_free_list( 3778 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3779 3780 scan_object(obj); 3781 3782 if (_task_queue->size() <= target_size || has_aborted()) { 3783 ret = false; 3784 } else { 3785 ret = _task_queue->pop_local(obj); 3786 } 3787 } 3788 3789 if (_cm->verbose_high()) { 3790 gclog_or_tty->print_cr("[%u] drained local queue, size = %d", 3791 _worker_id, _task_queue->size()); 3792 } 3793 } 3794 } 3795 3796 void CMTask::drain_global_stack(bool partially) { 3797 if (has_aborted()) return; 3798 3799 // We have a policy to drain the local queue before we attempt to 3800 // drain the global stack. 3801 assert(partially || _task_queue->size() == 0, "invariant"); 3802 3803 // Decide what the target size is, depending whether we're going to 3804 // drain it partially (so that other tasks can steal if they run out 3805 // of things to do) or totally (at the very end). Notice that, 3806 // because we move entries from the global stack in chunks or 3807 // because another task might be doing the same, we might in fact 3808 // drop below the target. But, this is not a problem. 3809 size_t target_size; 3810 if (partially) { 3811 target_size = _cm->partial_mark_stack_size_target(); 3812 } else { 3813 target_size = 0; 3814 } 3815 3816 if (_cm->mark_stack_size() > target_size) { 3817 if (_cm->verbose_low()) { 3818 gclog_or_tty->print_cr("[%u] draining global_stack, target size %d", 3819 _worker_id, target_size); 3820 } 3821 3822 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3823 get_entries_from_global_stack(); 3824 drain_local_queue(partially); 3825 } 3826 3827 if (_cm->verbose_low()) { 3828 gclog_or_tty->print_cr("[%u] drained global stack, size = %d", 3829 _worker_id, _cm->mark_stack_size()); 3830 } 3831 } 3832 } 3833 3834 // SATB Queue has several assumptions on whether to call the par or 3835 // non-par versions of the methods. this is why some of the code is 3836 // replicated. We should really get rid of the single-threaded version 3837 // of the code to simplify things. 3838 void CMTask::drain_satb_buffers() { 3839 if (has_aborted()) return; 3840 3841 // We set this so that the regular clock knows that we're in the 3842 // middle of draining buffers and doesn't set the abort flag when it 3843 // notices that SATB buffers are available for draining. It'd be 3844 // very counter productive if it did that. :-) 3845 _draining_satb_buffers = true; 3846 3847 CMObjectClosure oc(this); 3848 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3849 if (G1CollectedHeap::use_parallel_gc_threads()) { 3850 satb_mq_set.set_par_closure(_worker_id, &oc); 3851 } else { 3852 satb_mq_set.set_closure(&oc); 3853 } 3854 3855 // This keeps claiming and applying the closure to completed buffers 3856 // until we run out of buffers or we need to abort. 3857 if (G1CollectedHeap::use_parallel_gc_threads()) { 3858 while (!has_aborted() && 3859 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) { 3860 if (_cm->verbose_medium()) { 3861 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3862 } 3863 statsOnly( ++_satb_buffers_processed ); 3864 regular_clock_call(); 3865 } 3866 } else { 3867 while (!has_aborted() && 3868 satb_mq_set.apply_closure_to_completed_buffer()) { 3869 if (_cm->verbose_medium()) { 3870 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3871 } 3872 statsOnly( ++_satb_buffers_processed ); 3873 regular_clock_call(); 3874 } 3875 } 3876 3877 if (!concurrent() && !has_aborted()) { 3878 // We should only do this during remark. 3879 if (G1CollectedHeap::use_parallel_gc_threads()) { 3880 satb_mq_set.par_iterate_closure_all_threads(_worker_id); 3881 } else { 3882 satb_mq_set.iterate_closure_all_threads(); 3883 } 3884 } 3885 3886 _draining_satb_buffers = false; 3887 3888 assert(has_aborted() || 3889 concurrent() || 3890 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3891 3892 if (G1CollectedHeap::use_parallel_gc_threads()) { 3893 satb_mq_set.set_par_closure(_worker_id, NULL); 3894 } else { 3895 satb_mq_set.set_closure(NULL); 3896 } 3897 3898 // again, this was a potentially expensive operation, decrease the 3899 // limits to get the regular clock call early 3900 decrease_limits(); 3901 } 3902 3903 void CMTask::print_stats() { 3904 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 3905 _worker_id, _calls); 3906 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3907 _elapsed_time_ms, _termination_time_ms); 3908 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3909 _step_times_ms.num(), _step_times_ms.avg(), 3910 _step_times_ms.sd()); 3911 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3912 _step_times_ms.maximum(), _step_times_ms.sum()); 3913 3914 #if _MARKING_STATS_ 3915 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3916 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3917 _all_clock_intervals_ms.sd()); 3918 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3919 _all_clock_intervals_ms.maximum(), 3920 _all_clock_intervals_ms.sum()); 3921 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3922 _clock_due_to_scanning, _clock_due_to_marking); 3923 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3924 _objs_scanned, _objs_found_on_bitmap); 3925 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3926 _local_pushes, _local_pops, _local_max_size); 3927 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3928 _global_pushes, _global_pops, _global_max_size); 3929 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3930 _global_transfers_to,_global_transfers_from); 3931 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 3932 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3933 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3934 _steal_attempts, _steals); 3935 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3936 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3937 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3938 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3939 _aborted_timed_out, _aborted_satb, _aborted_termination); 3940 #endif // _MARKING_STATS_ 3941 } 3942 3943 /***************************************************************************** 3944 3945 The do_marking_step(time_target_ms, ...) method is the building 3946 block of the parallel marking framework. It can be called in parallel 3947 with other invocations of do_marking_step() on different tasks 3948 (but only one per task, obviously) and concurrently with the 3949 mutator threads, or during remark, hence it eliminates the need 3950 for two versions of the code. When called during remark, it will 3951 pick up from where the task left off during the concurrent marking 3952 phase. Interestingly, tasks are also claimable during evacuation 3953 pauses too, since do_marking_step() ensures that it aborts before 3954 it needs to yield. 3955 3956 The data structures that it uses to do marking work are the 3957 following: 3958 3959 (1) Marking Bitmap. If there are gray objects that appear only 3960 on the bitmap (this happens either when dealing with an overflow 3961 or when the initial marking phase has simply marked the roots 3962 and didn't push them on the stack), then tasks claim heap 3963 regions whose bitmap they then scan to find gray objects. A 3964 global finger indicates where the end of the last claimed region 3965 is. A local finger indicates how far into the region a task has 3966 scanned. The two fingers are used to determine how to gray an 3967 object (i.e. whether simply marking it is OK, as it will be 3968 visited by a task in the future, or whether it needs to be also 3969 pushed on a stack). 3970 3971 (2) Local Queue. The local queue of the task which is accessed 3972 reasonably efficiently by the task. Other tasks can steal from 3973 it when they run out of work. Throughout the marking phase, a 3974 task attempts to keep its local queue short but not totally 3975 empty, so that entries are available for stealing by other 3976 tasks. Only when there is no more work, a task will totally 3977 drain its local queue. 3978 3979 (3) Global Mark Stack. This handles local queue overflow. During 3980 marking only sets of entries are moved between it and the local 3981 queues, as access to it requires a mutex and more fine-grain 3982 interaction with it which might cause contention. If it 3983 overflows, then the marking phase should restart and iterate 3984 over the bitmap to identify gray objects. Throughout the marking 3985 phase, tasks attempt to keep the global mark stack at a small 3986 length but not totally empty, so that entries are available for 3987 popping by other tasks. Only when there is no more work, tasks 3988 will totally drain the global mark stack. 3989 3990 (4) SATB Buffer Queue. This is where completed SATB buffers are 3991 made available. Buffers are regularly removed from this queue 3992 and scanned for roots, so that the queue doesn't get too 3993 long. During remark, all completed buffers are processed, as 3994 well as the filled in parts of any uncompleted buffers. 3995 3996 The do_marking_step() method tries to abort when the time target 3997 has been reached. There are a few other cases when the 3998 do_marking_step() method also aborts: 3999 4000 (1) When the marking phase has been aborted (after a Full GC). 4001 4002 (2) When a global overflow (on the global stack) has been 4003 triggered. Before the task aborts, it will actually sync up with 4004 the other tasks to ensure that all the marking data structures 4005 (local queues, stacks, fingers etc.) are re-initialized so that 4006 when do_marking_step() completes, the marking phase can 4007 immediately restart. 4008 4009 (3) When enough completed SATB buffers are available. The 4010 do_marking_step() method only tries to drain SATB buffers right 4011 at the beginning. So, if enough buffers are available, the 4012 marking step aborts and the SATB buffers are processed at 4013 the beginning of the next invocation. 4014 4015 (4) To yield. when we have to yield then we abort and yield 4016 right at the end of do_marking_step(). This saves us from a lot 4017 of hassle as, by yielding we might allow a Full GC. If this 4018 happens then objects will be compacted underneath our feet, the 4019 heap might shrink, etc. We save checking for this by just 4020 aborting and doing the yield right at the end. 4021 4022 From the above it follows that the do_marking_step() method should 4023 be called in a loop (or, otherwise, regularly) until it completes. 4024 4025 If a marking step completes without its has_aborted() flag being 4026 true, it means it has completed the current marking phase (and 4027 also all other marking tasks have done so and have all synced up). 4028 4029 A method called regular_clock_call() is invoked "regularly" (in 4030 sub ms intervals) throughout marking. It is this clock method that 4031 checks all the abort conditions which were mentioned above and 4032 decides when the task should abort. A work-based scheme is used to 4033 trigger this clock method: when the number of object words the 4034 marking phase has scanned or the number of references the marking 4035 phase has visited reach a given limit. Additional invocations to 4036 the method clock have been planted in a few other strategic places 4037 too. The initial reason for the clock method was to avoid calling 4038 vtime too regularly, as it is quite expensive. So, once it was in 4039 place, it was natural to piggy-back all the other conditions on it 4040 too and not constantly check them throughout the code. 4041 4042 If do_termination is true then do_marking_step will enter its 4043 termination protocol. 4044 4045 The value of is_serial must be true when do_marking_step is being 4046 called serially (i.e. by the VMThread) and do_marking_step should 4047 skip any synchronization in the termination and overflow code. 4048 Examples include the serial remark code and the serial reference 4049 processing closures. 4050 4051 The value of is_serial must be false when do_marking_step is 4052 being called by any of the worker threads in a work gang. 4053 Examples include the concurrent marking code (CMMarkingTask), 4054 the MT remark code, and the MT reference processing closures. 4055 4056 *****************************************************************************/ 4057 4058 void CMTask::do_marking_step(double time_target_ms, 4059 bool do_termination, 4060 bool is_serial) { 4061 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 4062 assert(concurrent() == _cm->concurrent(), "they should be the same"); 4063 4064 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4065 assert(_task_queues != NULL, "invariant"); 4066 assert(_task_queue != NULL, "invariant"); 4067 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 4068 4069 assert(!_claimed, 4070 "only one thread should claim this task at any one time"); 4071 4072 // OK, this doesn't safeguard again all possible scenarios, as it is 4073 // possible for two threads to set the _claimed flag at the same 4074 // time. But it is only for debugging purposes anyway and it will 4075 // catch most problems. 4076 _claimed = true; 4077 4078 _start_time_ms = os::elapsedVTime() * 1000.0; 4079 statsOnly( _interval_start_time_ms = _start_time_ms ); 4080 4081 // If do_stealing is true then do_marking_step will attempt to 4082 // steal work from the other CMTasks. It only makes sense to 4083 // enable stealing when the termination protocol is enabled 4084 // and do_marking_step() is not being called serially. 4085 bool do_stealing = do_termination && !is_serial; 4086 4087 double diff_prediction_ms = 4088 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4089 _time_target_ms = time_target_ms - diff_prediction_ms; 4090 4091 // set up the variables that are used in the work-based scheme to 4092 // call the regular clock method 4093 _words_scanned = 0; 4094 _refs_reached = 0; 4095 recalculate_limits(); 4096 4097 // clear all flags 4098 clear_has_aborted(); 4099 _has_timed_out = false; 4100 _draining_satb_buffers = false; 4101 4102 ++_calls; 4103 4104 if (_cm->verbose_low()) { 4105 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, " 4106 "target = %1.2lfms >>>>>>>>>>", 4107 _worker_id, _calls, _time_target_ms); 4108 } 4109 4110 // Set up the bitmap and oop closures. Anything that uses them is 4111 // eventually called from this method, so it is OK to allocate these 4112 // statically. 4113 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4114 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 4115 set_cm_oop_closure(&cm_oop_closure); 4116 4117 if (_cm->has_overflown()) { 4118 // This can happen if the mark stack overflows during a GC pause 4119 // and this task, after a yield point, restarts. We have to abort 4120 // as we need to get into the overflow protocol which happens 4121 // right at the end of this task. 4122 set_has_aborted(); 4123 } 4124 4125 // First drain any available SATB buffers. After this, we will not 4126 // look at SATB buffers before the next invocation of this method. 4127 // If enough completed SATB buffers are queued up, the regular clock 4128 // will abort this task so that it restarts. 4129 drain_satb_buffers(); 4130 // ...then partially drain the local queue and the global stack 4131 drain_local_queue(true); 4132 drain_global_stack(true); 4133 4134 do { 4135 if (!has_aborted() && _curr_region != NULL) { 4136 // This means that we're already holding on to a region. 4137 assert(_finger != NULL, "if region is not NULL, then the finger " 4138 "should not be NULL either"); 4139 4140 // We might have restarted this task after an evacuation pause 4141 // which might have evacuated the region we're holding on to 4142 // underneath our feet. Let's read its limit again to make sure 4143 // that we do not iterate over a region of the heap that 4144 // contains garbage (update_region_limit() will also move 4145 // _finger to the start of the region if it is found empty). 4146 update_region_limit(); 4147 // We will start from _finger not from the start of the region, 4148 // as we might be restarting this task after aborting half-way 4149 // through scanning this region. In this case, _finger points to 4150 // the address where we last found a marked object. If this is a 4151 // fresh region, _finger points to start(). 4152 MemRegion mr = MemRegion(_finger, _region_limit); 4153 4154 if (_cm->verbose_low()) { 4155 gclog_or_tty->print_cr("[%u] we're scanning part " 4156 "["PTR_FORMAT", "PTR_FORMAT") " 4157 "of region "HR_FORMAT, 4158 _worker_id, _finger, _region_limit, 4159 HR_FORMAT_PARAMS(_curr_region)); 4160 } 4161 4162 assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(), 4163 "humongous regions should go around loop once only"); 4164 4165 // Some special cases: 4166 // If the memory region is empty, we can just give up the region. 4167 // If the current region is humongous then we only need to check 4168 // the bitmap for the bit associated with the start of the object, 4169 // scan the object if it's live, and give up the region. 4170 // Otherwise, let's iterate over the bitmap of the part of the region 4171 // that is left. 4172 // If the iteration is successful, give up the region. 4173 if (mr.is_empty()) { 4174 giveup_current_region(); 4175 regular_clock_call(); 4176 } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) { 4177 if (_nextMarkBitMap->isMarked(mr.start())) { 4178 // The object is marked - apply the closure 4179 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 4180 bitmap_closure.do_bit(offset); 4181 } 4182 // Even if this task aborted while scanning the humongous object 4183 // we can (and should) give up the current region. 4184 giveup_current_region(); 4185 regular_clock_call(); 4186 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4187 giveup_current_region(); 4188 regular_clock_call(); 4189 } else { 4190 assert(has_aborted(), "currently the only way to do so"); 4191 // The only way to abort the bitmap iteration is to return 4192 // false from the do_bit() method. However, inside the 4193 // do_bit() method we move the _finger to point to the 4194 // object currently being looked at. So, if we bail out, we 4195 // have definitely set _finger to something non-null. 4196 assert(_finger != NULL, "invariant"); 4197 4198 // Region iteration was actually aborted. So now _finger 4199 // points to the address of the object we last scanned. If we 4200 // leave it there, when we restart this task, we will rescan 4201 // the object. It is easy to avoid this. We move the finger by 4202 // enough to point to the next possible object header (the 4203 // bitmap knows by how much we need to move it as it knows its 4204 // granularity). 4205 assert(_finger < _region_limit, "invariant"); 4206 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 4207 // Check if bitmap iteration was aborted while scanning the last object 4208 if (new_finger >= _region_limit) { 4209 giveup_current_region(); 4210 } else { 4211 move_finger_to(new_finger); 4212 } 4213 } 4214 } 4215 // At this point we have either completed iterating over the 4216 // region we were holding on to, or we have aborted. 4217 4218 // We then partially drain the local queue and the global stack. 4219 // (Do we really need this?) 4220 drain_local_queue(true); 4221 drain_global_stack(true); 4222 4223 // Read the note on the claim_region() method on why it might 4224 // return NULL with potentially more regions available for 4225 // claiming and why we have to check out_of_regions() to determine 4226 // whether we're done or not. 4227 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4228 // We are going to try to claim a new region. We should have 4229 // given up on the previous one. 4230 // Separated the asserts so that we know which one fires. 4231 assert(_curr_region == NULL, "invariant"); 4232 assert(_finger == NULL, "invariant"); 4233 assert(_region_limit == NULL, "invariant"); 4234 if (_cm->verbose_low()) { 4235 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id); 4236 } 4237 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 4238 if (claimed_region != NULL) { 4239 // Yes, we managed to claim one 4240 statsOnly( ++_regions_claimed ); 4241 4242 if (_cm->verbose_low()) { 4243 gclog_or_tty->print_cr("[%u] we successfully claimed " 4244 "region "PTR_FORMAT, 4245 _worker_id, claimed_region); 4246 } 4247 4248 setup_for_region(claimed_region); 4249 assert(_curr_region == claimed_region, "invariant"); 4250 } 4251 // It is important to call the regular clock here. It might take 4252 // a while to claim a region if, for example, we hit a large 4253 // block of empty regions. So we need to call the regular clock 4254 // method once round the loop to make sure it's called 4255 // frequently enough. 4256 regular_clock_call(); 4257 } 4258 4259 if (!has_aborted() && _curr_region == NULL) { 4260 assert(_cm->out_of_regions(), 4261 "at this point we should be out of regions"); 4262 } 4263 } while ( _curr_region != NULL && !has_aborted()); 4264 4265 if (!has_aborted()) { 4266 // We cannot check whether the global stack is empty, since other 4267 // tasks might be pushing objects to it concurrently. 4268 assert(_cm->out_of_regions(), 4269 "at this point we should be out of regions"); 4270 4271 if (_cm->verbose_low()) { 4272 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id); 4273 } 4274 4275 // Try to reduce the number of available SATB buffers so that 4276 // remark has less work to do. 4277 drain_satb_buffers(); 4278 } 4279 4280 // Since we've done everything else, we can now totally drain the 4281 // local queue and global stack. 4282 drain_local_queue(false); 4283 drain_global_stack(false); 4284 4285 // Attempt at work stealing from other task's queues. 4286 if (do_stealing && !has_aborted()) { 4287 // We have not aborted. This means that we have finished all that 4288 // we could. Let's try to do some stealing... 4289 4290 // We cannot check whether the global stack is empty, since other 4291 // tasks might be pushing objects to it concurrently. 4292 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4293 "only way to reach here"); 4294 4295 if (_cm->verbose_low()) { 4296 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id); 4297 } 4298 4299 while (!has_aborted()) { 4300 oop obj; 4301 statsOnly( ++_steal_attempts ); 4302 4303 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 4304 if (_cm->verbose_medium()) { 4305 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully", 4306 _worker_id, (void*) obj); 4307 } 4308 4309 statsOnly( ++_steals ); 4310 4311 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4312 "any stolen object should be marked"); 4313 scan_object(obj); 4314 4315 // And since we're towards the end, let's totally drain the 4316 // local queue and global stack. 4317 drain_local_queue(false); 4318 drain_global_stack(false); 4319 } else { 4320 break; 4321 } 4322 } 4323 } 4324 4325 // If we are about to wrap up and go into termination, check if we 4326 // should raise the overflow flag. 4327 if (do_termination && !has_aborted()) { 4328 if (_cm->force_overflow()->should_force()) { 4329 _cm->set_has_overflown(); 4330 regular_clock_call(); 4331 } 4332 } 4333 4334 // We still haven't aborted. Now, let's try to get into the 4335 // termination protocol. 4336 if (do_termination && !has_aborted()) { 4337 // We cannot check whether the global stack is empty, since other 4338 // tasks might be concurrently pushing objects on it. 4339 // Separated the asserts so that we know which one fires. 4340 assert(_cm->out_of_regions(), "only way to reach here"); 4341 assert(_task_queue->size() == 0, "only way to reach here"); 4342 4343 if (_cm->verbose_low()) { 4344 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id); 4345 } 4346 4347 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4348 4349 // The CMTask class also extends the TerminatorTerminator class, 4350 // hence its should_exit_termination() method will also decide 4351 // whether to exit the termination protocol or not. 4352 bool finished = (is_serial || 4353 _cm->terminator()->offer_termination(this)); 4354 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4355 _termination_time_ms += 4356 termination_end_time_ms - _termination_start_time_ms; 4357 4358 if (finished) { 4359 // We're all done. 4360 4361 if (_worker_id == 0) { 4362 // let's allow task 0 to do this 4363 if (concurrent()) { 4364 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4365 // we need to set this to false before the next 4366 // safepoint. This way we ensure that the marking phase 4367 // doesn't observe any more heap expansions. 4368 _cm->clear_concurrent_marking_in_progress(); 4369 } 4370 } 4371 4372 // We can now guarantee that the global stack is empty, since 4373 // all other tasks have finished. We separated the guarantees so 4374 // that, if a condition is false, we can immediately find out 4375 // which one. 4376 guarantee(_cm->out_of_regions(), "only way to reach here"); 4377 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4378 guarantee(_task_queue->size() == 0, "only way to reach here"); 4379 guarantee(!_cm->has_overflown(), "only way to reach here"); 4380 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4381 4382 if (_cm->verbose_low()) { 4383 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id); 4384 } 4385 } else { 4386 // Apparently there's more work to do. Let's abort this task. It 4387 // will restart it and we can hopefully find more things to do. 4388 4389 if (_cm->verbose_low()) { 4390 gclog_or_tty->print_cr("[%u] apparently there is more work to do", 4391 _worker_id); 4392 } 4393 4394 set_has_aborted(); 4395 statsOnly( ++_aborted_termination ); 4396 } 4397 } 4398 4399 // Mainly for debugging purposes to make sure that a pointer to the 4400 // closure which was statically allocated in this frame doesn't 4401 // escape it by accident. 4402 set_cm_oop_closure(NULL); 4403 double end_time_ms = os::elapsedVTime() * 1000.0; 4404 double elapsed_time_ms = end_time_ms - _start_time_ms; 4405 // Update the step history. 4406 _step_times_ms.add(elapsed_time_ms); 4407 4408 if (has_aborted()) { 4409 // The task was aborted for some reason. 4410 4411 statsOnly( ++_aborted ); 4412 4413 if (_has_timed_out) { 4414 double diff_ms = elapsed_time_ms - _time_target_ms; 4415 // Keep statistics of how well we did with respect to hitting 4416 // our target only if we actually timed out (if we aborted for 4417 // other reasons, then the results might get skewed). 4418 _marking_step_diffs_ms.add(diff_ms); 4419 } 4420 4421 if (_cm->has_overflown()) { 4422 // This is the interesting one. We aborted because a global 4423 // overflow was raised. This means we have to restart the 4424 // marking phase and start iterating over regions. However, in 4425 // order to do this we have to make sure that all tasks stop 4426 // what they are doing and re-initialize in a safe manner. We 4427 // will achieve this with the use of two barrier sync points. 4428 4429 if (_cm->verbose_low()) { 4430 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id); 4431 } 4432 4433 if (!is_serial) { 4434 // We only need to enter the sync barrier if being called 4435 // from a parallel context 4436 _cm->enter_first_sync_barrier(_worker_id); 4437 4438 // When we exit this sync barrier we know that all tasks have 4439 // stopped doing marking work. So, it's now safe to 4440 // re-initialize our data structures. At the end of this method, 4441 // task 0 will clear the global data structures. 4442 } 4443 4444 statsOnly( ++_aborted_overflow ); 4445 4446 // We clear the local state of this task... 4447 clear_region_fields(); 4448 4449 if (!is_serial) { 4450 // ...and enter the second barrier. 4451 _cm->enter_second_sync_barrier(_worker_id); 4452 } 4453 // At this point, if we're during the concurrent phase of 4454 // marking, everything has been re-initialized and we're 4455 // ready to restart. 4456 } 4457 4458 if (_cm->verbose_low()) { 4459 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4460 "elapsed = %1.2lfms <<<<<<<<<<", 4461 _worker_id, _time_target_ms, elapsed_time_ms); 4462 if (_cm->has_aborted()) { 4463 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========", 4464 _worker_id); 4465 } 4466 } 4467 } else { 4468 if (_cm->verbose_low()) { 4469 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4470 "elapsed = %1.2lfms <<<<<<<<<<", 4471 _worker_id, _time_target_ms, elapsed_time_ms); 4472 } 4473 } 4474 4475 _claimed = false; 4476 } 4477 4478 CMTask::CMTask(uint worker_id, 4479 ConcurrentMark* cm, 4480 size_t* marked_bytes, 4481 BitMap* card_bm, 4482 CMTaskQueue* task_queue, 4483 CMTaskQueueSet* task_queues) 4484 : _g1h(G1CollectedHeap::heap()), 4485 _worker_id(worker_id), _cm(cm), 4486 _claimed(false), 4487 _nextMarkBitMap(NULL), _hash_seed(17), 4488 _task_queue(task_queue), 4489 _task_queues(task_queues), 4490 _cm_oop_closure(NULL), 4491 _marked_bytes_array(marked_bytes), 4492 _card_bm(card_bm) { 4493 guarantee(task_queue != NULL, "invariant"); 4494 guarantee(task_queues != NULL, "invariant"); 4495 4496 statsOnly( _clock_due_to_scanning = 0; 4497 _clock_due_to_marking = 0 ); 4498 4499 _marking_step_diffs_ms.add(0.5); 4500 } 4501 4502 // These are formatting macros that are used below to ensure 4503 // consistent formatting. The *_H_* versions are used to format the 4504 // header for a particular value and they should be kept consistent 4505 // with the corresponding macro. Also note that most of the macros add 4506 // the necessary white space (as a prefix) which makes them a bit 4507 // easier to compose. 4508 4509 // All the output lines are prefixed with this string to be able to 4510 // identify them easily in a large log file. 4511 #define G1PPRL_LINE_PREFIX "###" 4512 4513 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4514 #ifdef _LP64 4515 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4516 #else // _LP64 4517 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4518 #endif // _LP64 4519 4520 // For per-region info 4521 #define G1PPRL_TYPE_FORMAT " %-4s" 4522 #define G1PPRL_TYPE_H_FORMAT " %4s" 4523 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4524 #define G1PPRL_BYTE_H_FORMAT " %9s" 4525 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4526 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4527 4528 // For summary info 4529 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4530 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4531 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4532 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4533 4534 G1PrintRegionLivenessInfoClosure:: 4535 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4536 : _out(out), 4537 _total_used_bytes(0), _total_capacity_bytes(0), 4538 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4539 _hum_used_bytes(0), _hum_capacity_bytes(0), 4540 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 4541 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 4542 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4543 MemRegion g1_committed = g1h->g1_committed(); 4544 MemRegion g1_reserved = g1h->g1_reserved(); 4545 double now = os::elapsedTime(); 4546 4547 // Print the header of the output. 4548 _out->cr(); 4549 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4550 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4551 G1PPRL_SUM_ADDR_FORMAT("committed") 4552 G1PPRL_SUM_ADDR_FORMAT("reserved") 4553 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4554 g1_committed.start(), g1_committed.end(), 4555 g1_reserved.start(), g1_reserved.end(), 4556 HeapRegion::GrainBytes); 4557 _out->print_cr(G1PPRL_LINE_PREFIX); 4558 _out->print_cr(G1PPRL_LINE_PREFIX 4559 G1PPRL_TYPE_H_FORMAT 4560 G1PPRL_ADDR_BASE_H_FORMAT 4561 G1PPRL_BYTE_H_FORMAT 4562 G1PPRL_BYTE_H_FORMAT 4563 G1PPRL_BYTE_H_FORMAT 4564 G1PPRL_DOUBLE_H_FORMAT 4565 G1PPRL_BYTE_H_FORMAT 4566 G1PPRL_BYTE_H_FORMAT, 4567 "type", "address-range", 4568 "used", "prev-live", "next-live", "gc-eff", 4569 "remset", "code-roots"); 4570 _out->print_cr(G1PPRL_LINE_PREFIX 4571 G1PPRL_TYPE_H_FORMAT 4572 G1PPRL_ADDR_BASE_H_FORMAT 4573 G1PPRL_BYTE_H_FORMAT 4574 G1PPRL_BYTE_H_FORMAT 4575 G1PPRL_BYTE_H_FORMAT 4576 G1PPRL_DOUBLE_H_FORMAT 4577 G1PPRL_BYTE_H_FORMAT 4578 G1PPRL_BYTE_H_FORMAT, 4579 "", "", 4580 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 4581 "(bytes)", "(bytes)"); 4582 } 4583 4584 // It takes as a parameter a reference to one of the _hum_* fields, it 4585 // deduces the corresponding value for a region in a humongous region 4586 // series (either the region size, or what's left if the _hum_* field 4587 // is < the region size), and updates the _hum_* field accordingly. 4588 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4589 size_t bytes = 0; 4590 // The > 0 check is to deal with the prev and next live bytes which 4591 // could be 0. 4592 if (*hum_bytes > 0) { 4593 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4594 *hum_bytes -= bytes; 4595 } 4596 return bytes; 4597 } 4598 4599 // It deduces the values for a region in a humongous region series 4600 // from the _hum_* fields and updates those accordingly. It assumes 4601 // that that _hum_* fields have already been set up from the "starts 4602 // humongous" region and we visit the regions in address order. 4603 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4604 size_t* capacity_bytes, 4605 size_t* prev_live_bytes, 4606 size_t* next_live_bytes) { 4607 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4608 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4609 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4610 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4611 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4612 } 4613 4614 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4615 const char* type = ""; 4616 HeapWord* bottom = r->bottom(); 4617 HeapWord* end = r->end(); 4618 size_t capacity_bytes = r->capacity(); 4619 size_t used_bytes = r->used(); 4620 size_t prev_live_bytes = r->live_bytes(); 4621 size_t next_live_bytes = r->next_live_bytes(); 4622 double gc_eff = r->gc_efficiency(); 4623 size_t remset_bytes = r->rem_set()->mem_size(); 4624 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 4625 4626 if (r->used() == 0) { 4627 type = "FREE"; 4628 } else if (r->is_survivor()) { 4629 type = "SURV"; 4630 } else if (r->is_young()) { 4631 type = "EDEN"; 4632 } else if (r->startsHumongous()) { 4633 type = "HUMS"; 4634 4635 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4636 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4637 "they should have been zeroed after the last time we used them"); 4638 // Set up the _hum_* fields. 4639 _hum_capacity_bytes = capacity_bytes; 4640 _hum_used_bytes = used_bytes; 4641 _hum_prev_live_bytes = prev_live_bytes; 4642 _hum_next_live_bytes = next_live_bytes; 4643 get_hum_bytes(&used_bytes, &capacity_bytes, 4644 &prev_live_bytes, &next_live_bytes); 4645 end = bottom + HeapRegion::GrainWords; 4646 } else if (r->continuesHumongous()) { 4647 type = "HUMC"; 4648 get_hum_bytes(&used_bytes, &capacity_bytes, 4649 &prev_live_bytes, &next_live_bytes); 4650 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4651 } else { 4652 type = "OLD"; 4653 } 4654 4655 _total_used_bytes += used_bytes; 4656 _total_capacity_bytes += capacity_bytes; 4657 _total_prev_live_bytes += prev_live_bytes; 4658 _total_next_live_bytes += next_live_bytes; 4659 _total_remset_bytes += remset_bytes; 4660 _total_strong_code_roots_bytes += strong_code_roots_bytes; 4661 4662 // Print a line for this particular region. 4663 _out->print_cr(G1PPRL_LINE_PREFIX 4664 G1PPRL_TYPE_FORMAT 4665 G1PPRL_ADDR_BASE_FORMAT 4666 G1PPRL_BYTE_FORMAT 4667 G1PPRL_BYTE_FORMAT 4668 G1PPRL_BYTE_FORMAT 4669 G1PPRL_DOUBLE_FORMAT 4670 G1PPRL_BYTE_FORMAT 4671 G1PPRL_BYTE_FORMAT, 4672 type, bottom, end, 4673 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 4674 remset_bytes, strong_code_roots_bytes); 4675 4676 return false; 4677 } 4678 4679 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4680 // add static memory usages to remembered set sizes 4681 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 4682 // Print the footer of the output. 4683 _out->print_cr(G1PPRL_LINE_PREFIX); 4684 _out->print_cr(G1PPRL_LINE_PREFIX 4685 " SUMMARY" 4686 G1PPRL_SUM_MB_FORMAT("capacity") 4687 G1PPRL_SUM_MB_PERC_FORMAT("used") 4688 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4689 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 4690 G1PPRL_SUM_MB_FORMAT("remset") 4691 G1PPRL_SUM_MB_FORMAT("code-roots"), 4692 bytes_to_mb(_total_capacity_bytes), 4693 bytes_to_mb(_total_used_bytes), 4694 perc(_total_used_bytes, _total_capacity_bytes), 4695 bytes_to_mb(_total_prev_live_bytes), 4696 perc(_total_prev_live_bytes, _total_capacity_bytes), 4697 bytes_to_mb(_total_next_live_bytes), 4698 perc(_total_next_live_bytes, _total_capacity_bytes), 4699 bytes_to_mb(_total_remset_bytes), 4700 bytes_to_mb(_total_strong_code_roots_bytes)); 4701 _out->cr(); 4702 }