1 /* 2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 32 #include "gc_implementation/g1/g1Log.hpp" 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 34 #include "gc_implementation/g1/g1RemSet.hpp" 35 #include "gc_implementation/g1/heapRegion.inline.hpp" 36 #include "gc_implementation/g1/heapRegionRemSet.hpp" 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 38 #include "gc_implementation/shared/vmGCOperations.hpp" 39 #include "memory/genOopClosures.inline.hpp" 40 #include "memory/referencePolicy.hpp" 41 #include "memory/resourceArea.hpp" 42 #include "oops/oop.inline.hpp" 43 #include "runtime/handles.inline.hpp" 44 #include "runtime/java.hpp" 45 #include "services/memTracker.hpp" 46 47 // Concurrent marking bit map wrapper 48 49 CMBitMapRO::CMBitMapRO(int shifter) : 50 _bm(), 51 _shifter(shifter) { 52 _bmStartWord = 0; 53 _bmWordSize = 0; 54 } 55 56 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 57 HeapWord* limit) const { 58 // First we must round addr *up* to a possible object boundary. 59 addr = (HeapWord*)align_size_up((intptr_t)addr, 60 HeapWordSize << _shifter); 61 size_t addrOffset = heapWordToOffset(addr); 62 if (limit == NULL) { 63 limit = _bmStartWord + _bmWordSize; 64 } 65 size_t limitOffset = heapWordToOffset(limit); 66 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 67 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 68 assert(nextAddr >= addr, "get_next_one postcondition"); 69 assert(nextAddr == limit || isMarked(nextAddr), 70 "get_next_one postcondition"); 71 return nextAddr; 72 } 73 74 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 75 HeapWord* limit) const { 76 size_t addrOffset = heapWordToOffset(addr); 77 if (limit == NULL) { 78 limit = _bmStartWord + _bmWordSize; 79 } 80 size_t limitOffset = heapWordToOffset(limit); 81 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 82 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 83 assert(nextAddr >= addr, "get_next_one postcondition"); 84 assert(nextAddr == limit || !isMarked(nextAddr), 85 "get_next_one postcondition"); 86 return nextAddr; 87 } 88 89 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 90 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 91 return (int) (diff >> _shifter); 92 } 93 94 #ifndef PRODUCT 95 bool CMBitMapRO::covers(ReservedSpace heap_rs) const { 96 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 97 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 98 "size inconsistency"); 99 return _bmStartWord == (HeapWord*)(heap_rs.base()) && 100 _bmWordSize == heap_rs.size()>>LogHeapWordSize; 101 } 102 #endif 103 104 bool CMBitMap::allocate(ReservedSpace heap_rs) { 105 _bmStartWord = (HeapWord*)(heap_rs.base()); 106 _bmWordSize = heap_rs.size()/HeapWordSize; // heap_rs.size() is in bytes 107 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 108 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 109 if (!brs.is_reserved()) { 110 warning("ConcurrentMark marking bit map allocation failure"); 111 return false; 112 } 113 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC); 114 // For now we'll just commit all of the bit map up front. 115 // Later on we'll try to be more parsimonious with swap. 116 if (!_virtual_space.initialize(brs, brs.size())) { 117 warning("ConcurrentMark marking bit map backing store failure"); 118 return false; 119 } 120 assert(_virtual_space.committed_size() == brs.size(), 121 "didn't reserve backing store for all of concurrent marking bit map?"); 122 _bm.set_map((uintptr_t*)_virtual_space.low()); 123 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 124 _bmWordSize, "inconsistency in bit map sizing"); 125 _bm.set_size(_bmWordSize >> _shifter); 126 return true; 127 } 128 129 void CMBitMap::clearAll() { 130 _bm.clear(); 131 return; 132 } 133 134 void CMBitMap::markRange(MemRegion mr) { 135 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 136 assert(!mr.is_empty(), "unexpected empty region"); 137 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 138 ((HeapWord *) mr.end())), 139 "markRange memory region end is not card aligned"); 140 // convert address range into offset range 141 _bm.at_put_range(heapWordToOffset(mr.start()), 142 heapWordToOffset(mr.end()), true); 143 } 144 145 void CMBitMap::clearRange(MemRegion mr) { 146 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 147 assert(!mr.is_empty(), "unexpected empty region"); 148 // convert address range into offset range 149 _bm.at_put_range(heapWordToOffset(mr.start()), 150 heapWordToOffset(mr.end()), false); 151 } 152 153 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 154 HeapWord* end_addr) { 155 HeapWord* start = getNextMarkedWordAddress(addr); 156 start = MIN2(start, end_addr); 157 HeapWord* end = getNextUnmarkedWordAddress(start); 158 end = MIN2(end, end_addr); 159 assert(start <= end, "Consistency check"); 160 MemRegion mr(start, end); 161 if (!mr.is_empty()) { 162 clearRange(mr); 163 } 164 return mr; 165 } 166 167 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 168 _base(NULL), _cm(cm) 169 #ifdef ASSERT 170 , _drain_in_progress(false) 171 , _drain_in_progress_yields(false) 172 #endif 173 {} 174 175 bool CMMarkStack::allocate(size_t capacity) { 176 // allocate a stack of the requisite depth 177 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 178 if (!rs.is_reserved()) { 179 warning("ConcurrentMark MarkStack allocation failure"); 180 return false; 181 } 182 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 183 if (!_virtual_space.initialize(rs, rs.size())) { 184 warning("ConcurrentMark MarkStack backing store failure"); 185 // Release the virtual memory reserved for the marking stack 186 rs.release(); 187 return false; 188 } 189 assert(_virtual_space.committed_size() == rs.size(), 190 "Didn't reserve backing store for all of ConcurrentMark stack?"); 191 _base = (oop*) _virtual_space.low(); 192 setEmpty(); 193 _capacity = (jint) capacity; 194 _saved_index = -1; 195 _should_expand = false; 196 NOT_PRODUCT(_max_depth = 0); 197 return true; 198 } 199 200 void CMMarkStack::expand() { 201 // Called, during remark, if we've overflown the marking stack during marking. 202 assert(isEmpty(), "stack should been emptied while handling overflow"); 203 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 204 // Clear expansion flag 205 _should_expand = false; 206 if (_capacity == (jint) MarkStackSizeMax) { 207 if (PrintGCDetails && Verbose) { 208 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 209 } 210 return; 211 } 212 // Double capacity if possible 213 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 214 // Do not give up existing stack until we have managed to 215 // get the double capacity that we desired. 216 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 217 sizeof(oop))); 218 if (rs.is_reserved()) { 219 // Release the backing store associated with old stack 220 _virtual_space.release(); 221 // Reinitialize virtual space for new stack 222 if (!_virtual_space.initialize(rs, rs.size())) { 223 fatal("Not enough swap for expanded marking stack capacity"); 224 } 225 _base = (oop*)(_virtual_space.low()); 226 _index = 0; 227 _capacity = new_capacity; 228 } else { 229 if (PrintGCDetails && Verbose) { 230 // Failed to double capacity, continue; 231 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 232 SIZE_FORMAT"K to " SIZE_FORMAT"K", 233 _capacity / K, new_capacity / K); 234 } 235 } 236 } 237 238 void CMMarkStack::set_should_expand() { 239 // If we're resetting the marking state because of an 240 // marking stack overflow, record that we should, if 241 // possible, expand the stack. 242 _should_expand = _cm->has_overflown(); 243 } 244 245 CMMarkStack::~CMMarkStack() { 246 if (_base != NULL) { 247 _base = NULL; 248 _virtual_space.release(); 249 } 250 } 251 252 void CMMarkStack::par_push(oop ptr) { 253 while (true) { 254 if (isFull()) { 255 _overflow = true; 256 return; 257 } 258 // Otherwise... 259 jint index = _index; 260 jint next_index = index+1; 261 jint res = Atomic::cmpxchg(next_index, &_index, index); 262 if (res == index) { 263 _base[index] = ptr; 264 // Note that we don't maintain this atomically. We could, but it 265 // doesn't seem necessary. 266 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 267 return; 268 } 269 // Otherwise, we need to try again. 270 } 271 } 272 273 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 274 while (true) { 275 if (isFull()) { 276 _overflow = true; 277 return; 278 } 279 // Otherwise... 280 jint index = _index; 281 jint next_index = index + n; 282 if (next_index > _capacity) { 283 _overflow = true; 284 return; 285 } 286 jint res = Atomic::cmpxchg(next_index, &_index, index); 287 if (res == index) { 288 for (int i = 0; i < n; i++) { 289 int ind = index + i; 290 assert(ind < _capacity, "By overflow test above."); 291 _base[ind] = ptr_arr[i]; 292 } 293 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 294 return; 295 } 296 // Otherwise, we need to try again. 297 } 298 } 299 300 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 301 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 302 jint start = _index; 303 jint next_index = start + n; 304 if (next_index > _capacity) { 305 _overflow = true; 306 return; 307 } 308 // Otherwise. 309 _index = next_index; 310 for (int i = 0; i < n; i++) { 311 int ind = start + i; 312 assert(ind < _capacity, "By overflow test above."); 313 _base[ind] = ptr_arr[i]; 314 } 315 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 316 } 317 318 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 319 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 320 jint index = _index; 321 if (index == 0) { 322 *n = 0; 323 return false; 324 } else { 325 int k = MIN2(max, index); 326 jint new_ind = index - k; 327 for (int j = 0; j < k; j++) { 328 ptr_arr[j] = _base[new_ind + j]; 329 } 330 _index = new_ind; 331 *n = k; 332 return true; 333 } 334 } 335 336 template<class OopClosureClass> 337 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 338 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 339 || SafepointSynchronize::is_at_safepoint(), 340 "Drain recursion must be yield-safe."); 341 bool res = true; 342 debug_only(_drain_in_progress = true); 343 debug_only(_drain_in_progress_yields = yield_after); 344 while (!isEmpty()) { 345 oop newOop = pop(); 346 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 347 assert(newOop->is_oop(), "Expected an oop"); 348 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 349 "only grey objects on this stack"); 350 newOop->oop_iterate(cl); 351 if (yield_after && _cm->do_yield_check()) { 352 res = false; 353 break; 354 } 355 } 356 debug_only(_drain_in_progress = false); 357 return res; 358 } 359 360 void CMMarkStack::note_start_of_gc() { 361 assert(_saved_index == -1, 362 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 363 _saved_index = _index; 364 } 365 366 void CMMarkStack::note_end_of_gc() { 367 // This is intentionally a guarantee, instead of an assert. If we 368 // accidentally add something to the mark stack during GC, it 369 // will be a correctness issue so it's better if we crash. we'll 370 // only check this once per GC anyway, so it won't be a performance 371 // issue in any way. 372 guarantee(_saved_index == _index, 373 err_msg("saved index: %d index: %d", _saved_index, _index)); 374 _saved_index = -1; 375 } 376 377 void CMMarkStack::oops_do(OopClosure* f) { 378 assert(_saved_index == _index, 379 err_msg("saved index: %d index: %d", _saved_index, _index)); 380 for (int i = 0; i < _index; i += 1) { 381 f->do_oop(&_base[i]); 382 } 383 } 384 385 bool ConcurrentMark::not_yet_marked(oop obj) const { 386 return _g1h->is_obj_ill(obj); 387 } 388 389 CMRootRegions::CMRootRegions() : 390 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 391 _should_abort(false), _next_survivor(NULL) { } 392 393 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 394 _young_list = g1h->young_list(); 395 _cm = cm; 396 } 397 398 void CMRootRegions::prepare_for_scan() { 399 assert(!scan_in_progress(), "pre-condition"); 400 401 // Currently, only survivors can be root regions. 402 assert(_next_survivor == NULL, "pre-condition"); 403 _next_survivor = _young_list->first_survivor_region(); 404 _scan_in_progress = (_next_survivor != NULL); 405 _should_abort = false; 406 } 407 408 HeapRegion* CMRootRegions::claim_next() { 409 if (_should_abort) { 410 // If someone has set the should_abort flag, we return NULL to 411 // force the caller to bail out of their loop. 412 return NULL; 413 } 414 415 // Currently, only survivors can be root regions. 416 HeapRegion* res = _next_survivor; 417 if (res != NULL) { 418 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 419 // Read it again in case it changed while we were waiting for the lock. 420 res = _next_survivor; 421 if (res != NULL) { 422 if (res == _young_list->last_survivor_region()) { 423 // We just claimed the last survivor so store NULL to indicate 424 // that we're done. 425 _next_survivor = NULL; 426 } else { 427 _next_survivor = res->get_next_young_region(); 428 } 429 } else { 430 // Someone else claimed the last survivor while we were trying 431 // to take the lock so nothing else to do. 432 } 433 } 434 assert(res == NULL || res->is_survivor(), "post-condition"); 435 436 return res; 437 } 438 439 void CMRootRegions::scan_finished() { 440 assert(scan_in_progress(), "pre-condition"); 441 442 // Currently, only survivors can be root regions. 443 if (!_should_abort) { 444 assert(_next_survivor == NULL, "we should have claimed all survivors"); 445 } 446 _next_survivor = NULL; 447 448 { 449 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 450 _scan_in_progress = false; 451 RootRegionScan_lock->notify_all(); 452 } 453 } 454 455 bool CMRootRegions::wait_until_scan_finished() { 456 if (!scan_in_progress()) return false; 457 458 { 459 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 460 while (scan_in_progress()) { 461 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 462 } 463 } 464 return true; 465 } 466 467 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 468 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 469 #endif // _MSC_VER 470 471 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 472 return MAX2((n_par_threads + 2) / 4, 1U); 473 } 474 475 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) : 476 _g1h(g1h), 477 _markBitMap1(MinObjAlignment - 1), 478 _markBitMap2(MinObjAlignment - 1), 479 480 _parallel_marking_threads(0), 481 _max_parallel_marking_threads(0), 482 _sleep_factor(0.0), 483 _marking_task_overhead(1.0), 484 _cleanup_sleep_factor(0.0), 485 _cleanup_task_overhead(1.0), 486 _cleanup_list("Cleanup List"), 487 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 488 _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >> 489 CardTableModRefBS::card_shift, 490 false /* in_resource_area*/), 491 492 _prevMarkBitMap(&_markBitMap1), 493 _nextMarkBitMap(&_markBitMap2), 494 495 _markStack(this), 496 // _finger set in set_non_marking_state 497 498 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)), 499 // _active_tasks set in set_non_marking_state 500 // _tasks set inside the constructor 501 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 502 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 503 504 _has_overflown(false), 505 _concurrent(false), 506 _has_aborted(false), 507 _restart_for_overflow(false), 508 _concurrent_marking_in_progress(false), 509 510 // _verbose_level set below 511 512 _init_times(), 513 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 514 _cleanup_times(), 515 _total_counting_time(0.0), 516 _total_rs_scrub_time(0.0), 517 518 _parallel_workers(NULL), 519 520 _count_card_bitmaps(NULL), 521 _count_marked_bytes(NULL), 522 _completed_initialization(false) { 523 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 524 if (verbose_level < no_verbose) { 525 verbose_level = no_verbose; 526 } 527 if (verbose_level > high_verbose) { 528 verbose_level = high_verbose; 529 } 530 _verbose_level = verbose_level; 531 532 if (verbose_low()) { 533 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 534 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 535 } 536 537 if (!_markBitMap1.allocate(heap_rs)) { 538 warning("Failed to allocate first CM bit map"); 539 return; 540 } 541 if (!_markBitMap2.allocate(heap_rs)) { 542 warning("Failed to allocate second CM bit map"); 543 return; 544 } 545 546 // Create & start a ConcurrentMark thread. 547 _cmThread = new ConcurrentMarkThread(this); 548 assert(cmThread() != NULL, "CM Thread should have been created"); 549 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 550 551 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 552 assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency"); 553 assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency"); 554 555 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 556 satb_qs.set_buffer_size(G1SATBBufferSize); 557 558 _root_regions.init(_g1h, this); 559 560 if (ConcGCThreads > ParallelGCThreads) { 561 warning("Can't have more ConcGCThreads (" UINT32_FORMAT ") " 562 "than ParallelGCThreads (" UINT32_FORMAT ").", 563 ConcGCThreads, ParallelGCThreads); 564 return; 565 } 566 if (ParallelGCThreads == 0) { 567 // if we are not running with any parallel GC threads we will not 568 // spawn any marking threads either 569 _parallel_marking_threads = 0; 570 _max_parallel_marking_threads = 0; 571 _sleep_factor = 0.0; 572 _marking_task_overhead = 1.0; 573 } else { 574 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 575 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 576 // if both are set 577 _sleep_factor = 0.0; 578 _marking_task_overhead = 1.0; 579 } else if (G1MarkingOverheadPercent > 0) { 580 // We will calculate the number of parallel marking threads based 581 // on a target overhead with respect to the soft real-time goal 582 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 583 double overall_cm_overhead = 584 (double) MaxGCPauseMillis * marking_overhead / 585 (double) GCPauseIntervalMillis; 586 double cpu_ratio = 1.0 / (double) os::processor_count(); 587 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 588 double marking_task_overhead = 589 overall_cm_overhead / marking_thread_num * 590 (double) os::processor_count(); 591 double sleep_factor = 592 (1.0 - marking_task_overhead) / marking_task_overhead; 593 594 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num); 595 _sleep_factor = sleep_factor; 596 _marking_task_overhead = marking_task_overhead; 597 } else { 598 // Calculate the number of parallel marking threads by scaling 599 // the number of parallel GC threads. 600 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads); 601 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num); 602 _sleep_factor = 0.0; 603 _marking_task_overhead = 1.0; 604 } 605 606 assert(ConcGCThreads > 0, "Should have been set"); 607 _parallel_marking_threads = (uint) ConcGCThreads; 608 _max_parallel_marking_threads = _parallel_marking_threads; 609 610 if (parallel_marking_threads() > 1) { 611 _cleanup_task_overhead = 1.0; 612 } else { 613 _cleanup_task_overhead = marking_task_overhead(); 614 } 615 _cleanup_sleep_factor = 616 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 617 618 #if 0 619 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 620 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 621 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 622 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 623 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 624 #endif 625 626 guarantee(parallel_marking_threads() > 0, "peace of mind"); 627 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 628 _max_parallel_marking_threads, false, true); 629 if (_parallel_workers == NULL) { 630 vm_exit_during_initialization("Failed necessary allocation."); 631 } else { 632 _parallel_workers->initialize_workers(); 633 } 634 } 635 636 if (FLAG_IS_DEFAULT(MarkStackSize)) { 637 uintx mark_stack_size = 638 MIN2(MarkStackSizeMax, 639 MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE))); 640 // Verify that the calculated value for MarkStackSize is in range. 641 // It would be nice to use the private utility routine from Arguments. 642 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 643 warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): " 644 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 645 mark_stack_size, 1, MarkStackSizeMax); 646 return; 647 } 648 FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size); 649 } else { 650 // Verify MarkStackSize is in range. 651 if (FLAG_IS_CMDLINE(MarkStackSize)) { 652 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 653 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 654 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): " 655 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 656 MarkStackSize, 1, MarkStackSizeMax); 657 return; 658 } 659 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 660 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 661 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")" 662 " or for MarkStackSizeMax (" UINTX_FORMAT ")", 663 MarkStackSize, MarkStackSizeMax); 664 return; 665 } 666 } 667 } 668 } 669 670 if (!_markStack.allocate(MarkStackSize)) { 671 warning("Failed to allocate CM marking stack"); 672 return; 673 } 674 675 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 676 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 677 678 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 679 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 680 681 BitMap::idx_t card_bm_size = _card_bm.size(); 682 683 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 684 _active_tasks = _max_worker_id; 685 686 size_t max_regions = (size_t) _g1h->max_regions(); 687 for (uint i = 0; i < _max_worker_id; ++i) { 688 CMTaskQueue* task_queue = new CMTaskQueue(); 689 task_queue->initialize(); 690 _task_queues->register_queue(i, task_queue); 691 692 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 693 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 694 695 _tasks[i] = new CMTask(i, this, 696 _count_marked_bytes[i], 697 &_count_card_bitmaps[i], 698 task_queue, _task_queues); 699 700 _accum_task_vtime[i] = 0.0; 701 } 702 703 // Calculate the card number for the bottom of the heap. Used 704 // in biasing indexes into the accounting card bitmaps. 705 _heap_bottom_card_num = 706 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 707 CardTableModRefBS::card_shift); 708 709 // Clear all the liveness counting data 710 clear_all_count_data(); 711 712 // so that the call below can read a sensible value 713 _heap_start = (HeapWord*) heap_rs.base(); 714 set_non_marking_state(); 715 _completed_initialization = true; 716 } 717 718 void ConcurrentMark::update_g1_committed(bool force) { 719 // If concurrent marking is not in progress, then we do not need to 720 // update _heap_end. 721 if (!concurrent_marking_in_progress() && !force) return; 722 723 MemRegion committed = _g1h->g1_committed(); 724 assert(committed.start() == _heap_start, "start shouldn't change"); 725 HeapWord* new_end = committed.end(); 726 if (new_end > _heap_end) { 727 // The heap has been expanded. 728 729 _heap_end = new_end; 730 } 731 // Notice that the heap can also shrink. However, this only happens 732 // during a Full GC (at least currently) and the entire marking 733 // phase will bail out and the task will not be restarted. So, let's 734 // do nothing. 735 } 736 737 void ConcurrentMark::reset() { 738 // Starting values for these two. This should be called in a STW 739 // phase. CM will be notified of any future g1_committed expansions 740 // will be at the end of evacuation pauses, when tasks are 741 // inactive. 742 MemRegion committed = _g1h->g1_committed(); 743 _heap_start = committed.start(); 744 _heap_end = committed.end(); 745 746 // Separated the asserts so that we know which one fires. 747 assert(_heap_start != NULL, "heap bounds should look ok"); 748 assert(_heap_end != NULL, "heap bounds should look ok"); 749 assert(_heap_start < _heap_end, "heap bounds should look ok"); 750 751 // Reset all the marking data structures and any necessary flags 752 reset_marking_state(); 753 754 if (verbose_low()) { 755 gclog_or_tty->print_cr("[global] resetting"); 756 } 757 758 // We do reset all of them, since different phases will use 759 // different number of active threads. So, it's easiest to have all 760 // of them ready. 761 for (uint i = 0; i < _max_worker_id; ++i) { 762 _tasks[i]->reset(_nextMarkBitMap); 763 } 764 765 // we need this to make sure that the flag is on during the evac 766 // pause with initial mark piggy-backed 767 set_concurrent_marking_in_progress(); 768 } 769 770 771 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 772 _markStack.set_should_expand(); 773 _markStack.setEmpty(); // Also clears the _markStack overflow flag 774 if (clear_overflow) { 775 clear_has_overflown(); 776 } else { 777 assert(has_overflown(), "pre-condition"); 778 } 779 _finger = _heap_start; 780 781 for (uint i = 0; i < _max_worker_id; ++i) { 782 CMTaskQueue* queue = _task_queues->queue(i); 783 queue->set_empty(); 784 } 785 } 786 787 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) { 788 assert(active_tasks <= _max_worker_id, "we should not have more"); 789 790 _active_tasks = active_tasks; 791 // Need to update the three data structures below according to the 792 // number of active threads for this phase. 793 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 794 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 795 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 796 797 _concurrent = concurrent; 798 // We propagate this to all tasks, not just the active ones. 799 for (uint i = 0; i < _max_worker_id; ++i) 800 _tasks[i]->set_concurrent(concurrent); 801 802 if (concurrent) { 803 set_concurrent_marking_in_progress(); 804 } else { 805 // We currently assume that the concurrent flag has been set to 806 // false before we start remark. At this point we should also be 807 // in a STW phase. 808 assert(!concurrent_marking_in_progress(), "invariant"); 809 assert(_finger == _heap_end, 810 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT, 811 _finger, _heap_end)); 812 update_g1_committed(true); 813 } 814 } 815 816 void ConcurrentMark::set_non_marking_state() { 817 // We set the global marking state to some default values when we're 818 // not doing marking. 819 reset_marking_state(); 820 _active_tasks = 0; 821 clear_concurrent_marking_in_progress(); 822 } 823 824 ConcurrentMark::~ConcurrentMark() { 825 // The ConcurrentMark instance is never freed. 826 ShouldNotReachHere(); 827 } 828 829 void ConcurrentMark::clearNextBitmap() { 830 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 831 G1CollectorPolicy* g1p = g1h->g1_policy(); 832 833 // Make sure that the concurrent mark thread looks to still be in 834 // the current cycle. 835 guarantee(cmThread()->during_cycle(), "invariant"); 836 837 // We are finishing up the current cycle by clearing the next 838 // marking bitmap and getting it ready for the next cycle. During 839 // this time no other cycle can start. So, let's make sure that this 840 // is the case. 841 guarantee(!g1h->mark_in_progress(), "invariant"); 842 843 // clear the mark bitmap (no grey objects to start with). 844 // We need to do this in chunks and offer to yield in between 845 // each chunk. 846 HeapWord* start = _nextMarkBitMap->startWord(); 847 HeapWord* end = _nextMarkBitMap->endWord(); 848 HeapWord* cur = start; 849 size_t chunkSize = M; 850 while (cur < end) { 851 HeapWord* next = cur + chunkSize; 852 if (next > end) { 853 next = end; 854 } 855 MemRegion mr(cur,next); 856 _nextMarkBitMap->clearRange(mr); 857 cur = next; 858 do_yield_check(); 859 860 // Repeat the asserts from above. We'll do them as asserts here to 861 // minimize their overhead on the product. However, we'll have 862 // them as guarantees at the beginning / end of the bitmap 863 // clearing to get some checking in the product. 864 assert(cmThread()->during_cycle(), "invariant"); 865 assert(!g1h->mark_in_progress(), "invariant"); 866 } 867 868 // Clear the liveness counting data 869 clear_all_count_data(); 870 871 // Repeat the asserts from above. 872 guarantee(cmThread()->during_cycle(), "invariant"); 873 guarantee(!g1h->mark_in_progress(), "invariant"); 874 } 875 876 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 877 public: 878 bool doHeapRegion(HeapRegion* r) { 879 if (!r->continuesHumongous()) { 880 r->note_start_of_marking(); 881 } 882 return false; 883 } 884 }; 885 886 void ConcurrentMark::checkpointRootsInitialPre() { 887 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 888 G1CollectorPolicy* g1p = g1h->g1_policy(); 889 890 _has_aborted = false; 891 892 #ifndef PRODUCT 893 if (G1PrintReachableAtInitialMark) { 894 print_reachable("at-cycle-start", 895 VerifyOption_G1UsePrevMarking, true /* all */); 896 } 897 #endif 898 899 // Initialise marking structures. This has to be done in a STW phase. 900 reset(); 901 902 // For each region note start of marking. 903 NoteStartOfMarkHRClosure startcl; 904 g1h->heap_region_iterate(&startcl); 905 } 906 907 908 void ConcurrentMark::checkpointRootsInitialPost() { 909 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 910 911 // If we force an overflow during remark, the remark operation will 912 // actually abort and we'll restart concurrent marking. If we always 913 // force an oveflow during remark we'll never actually complete the 914 // marking phase. So, we initilize this here, at the start of the 915 // cycle, so that at the remaining overflow number will decrease at 916 // every remark and we'll eventually not need to cause one. 917 force_overflow_stw()->init(); 918 919 // Start Concurrent Marking weak-reference discovery. 920 ReferenceProcessor* rp = g1h->ref_processor_cm(); 921 // enable ("weak") refs discovery 922 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 923 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 924 925 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 926 // This is the start of the marking cycle, we're expected all 927 // threads to have SATB queues with active set to false. 928 satb_mq_set.set_active_all_threads(true, /* new active value */ 929 false /* expected_active */); 930 931 _root_regions.prepare_for_scan(); 932 933 // update_g1_committed() will be called at the end of an evac pause 934 // when marking is on. So, it's also called at the end of the 935 // initial-mark pause to update the heap end, if the heap expands 936 // during it. No need to call it here. 937 } 938 939 /* 940 * Notice that in the next two methods, we actually leave the STS 941 * during the barrier sync and join it immediately afterwards. If we 942 * do not do this, the following deadlock can occur: one thread could 943 * be in the barrier sync code, waiting for the other thread to also 944 * sync up, whereas another one could be trying to yield, while also 945 * waiting for the other threads to sync up too. 946 * 947 * Note, however, that this code is also used during remark and in 948 * this case we should not attempt to leave / enter the STS, otherwise 949 * we'll either hit an asseert (debug / fastdebug) or deadlock 950 * (product). So we should only leave / enter the STS if we are 951 * operating concurrently. 952 * 953 * Because the thread that does the sync barrier has left the STS, it 954 * is possible to be suspended for a Full GC or an evacuation pause 955 * could occur. This is actually safe, since the entering the sync 956 * barrier is one of the last things do_marking_step() does, and it 957 * doesn't manipulate any data structures afterwards. 958 */ 959 960 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 961 if (verbose_low()) { 962 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id); 963 } 964 965 if (concurrent()) { 966 ConcurrentGCThread::stsLeave(); 967 } 968 _first_overflow_barrier_sync.enter(); 969 if (concurrent()) { 970 ConcurrentGCThread::stsJoin(); 971 } 972 // at this point everyone should have synced up and not be doing any 973 // more work 974 975 if (verbose_low()) { 976 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); 977 } 978 979 // If we're during the concurrent phase of marking, reset the marking 980 // state; otherwise the marking state is reset after reference processing, 981 // during the remark pause. 982 // If we reset here as a result of an overflow during the remark we 983 // will see assertion failures from any subsequent set_phase() calls. 984 if (concurrent()) { 985 // let the task associated with with worker 0 do this 986 if (worker_id == 0) { 987 // task 0 is responsible for clearing the global data structures 988 // We should be here because of an overflow. During STW we should 989 // not clear the overflow flag since we rely on it being true when 990 // we exit this method to abort the pause and restart concurent 991 // marking. 992 reset_marking_state(concurrent() /* clear_overflow */); 993 force_overflow()->update(); 994 995 if (G1Log::fine()) { 996 gclog_or_tty->date_stamp(PrintGCDateStamps); 997 gclog_or_tty->stamp(PrintGCTimeStamps); 998 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 999 } 1000 } 1001 } 1002 1003 // after this, each task should reset its own data structures then 1004 // then go into the second barrier 1005 } 1006 1007 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 1008 if (verbose_low()) { 1009 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id); 1010 } 1011 1012 if (concurrent()) { 1013 ConcurrentGCThread::stsLeave(); 1014 } 1015 _second_overflow_barrier_sync.enter(); 1016 if (concurrent()) { 1017 ConcurrentGCThread::stsJoin(); 1018 } 1019 // at this point everything should be re-initialised and ready to go 1020 1021 if (verbose_low()) { 1022 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); 1023 } 1024 } 1025 1026 #ifndef PRODUCT 1027 void ForceOverflowSettings::init() { 1028 _num_remaining = G1ConcMarkForceOverflow; 1029 _force = false; 1030 update(); 1031 } 1032 1033 void ForceOverflowSettings::update() { 1034 if (_num_remaining > 0) { 1035 _num_remaining -= 1; 1036 _force = true; 1037 } else { 1038 _force = false; 1039 } 1040 } 1041 1042 bool ForceOverflowSettings::should_force() { 1043 if (_force) { 1044 _force = false; 1045 return true; 1046 } else { 1047 return false; 1048 } 1049 } 1050 #endif // !PRODUCT 1051 1052 class CMConcurrentMarkingTask: public AbstractGangTask { 1053 private: 1054 ConcurrentMark* _cm; 1055 ConcurrentMarkThread* _cmt; 1056 1057 public: 1058 void work(uint worker_id) { 1059 assert(Thread::current()->is_ConcurrentGC_thread(), 1060 "this should only be done by a conc GC thread"); 1061 ResourceMark rm; 1062 1063 double start_vtime = os::elapsedVTime(); 1064 1065 ConcurrentGCThread::stsJoin(); 1066 1067 assert(worker_id < _cm->active_tasks(), "invariant"); 1068 CMTask* the_task = _cm->task(worker_id); 1069 the_task->record_start_time(); 1070 if (!_cm->has_aborted()) { 1071 do { 1072 double start_vtime_sec = os::elapsedVTime(); 1073 double start_time_sec = os::elapsedTime(); 1074 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1075 1076 the_task->do_marking_step(mark_step_duration_ms, 1077 true /* do_stealing */, 1078 true /* do_termination */, 1079 false /* is_serial */); 1080 1081 double end_time_sec = os::elapsedTime(); 1082 double end_vtime_sec = os::elapsedVTime(); 1083 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1084 double elapsed_time_sec = end_time_sec - start_time_sec; 1085 _cm->clear_has_overflown(); 1086 1087 bool ret = _cm->do_yield_check(worker_id); 1088 1089 jlong sleep_time_ms; 1090 if (!_cm->has_aborted() && the_task->has_aborted()) { 1091 sleep_time_ms = 1092 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1093 ConcurrentGCThread::stsLeave(); 1094 os::sleep(Thread::current(), sleep_time_ms, false); 1095 ConcurrentGCThread::stsJoin(); 1096 } 1097 double end_time2_sec = os::elapsedTime(); 1098 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1099 1100 #if 0 1101 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1102 "overhead %1.4lf", 1103 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1104 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1105 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1106 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1107 #endif 1108 } while (!_cm->has_aborted() && the_task->has_aborted()); 1109 } 1110 the_task->record_end_time(); 1111 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1112 1113 ConcurrentGCThread::stsLeave(); 1114 1115 double end_vtime = os::elapsedVTime(); 1116 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1117 } 1118 1119 CMConcurrentMarkingTask(ConcurrentMark* cm, 1120 ConcurrentMarkThread* cmt) : 1121 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1122 1123 ~CMConcurrentMarkingTask() { } 1124 }; 1125 1126 // Calculates the number of active workers for a concurrent 1127 // phase. 1128 uint ConcurrentMark::calc_parallel_marking_threads() { 1129 if (G1CollectedHeap::use_parallel_gc_threads()) { 1130 uint n_conc_workers = 0; 1131 if (!UseDynamicNumberOfGCThreads || 1132 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1133 !ForceDynamicNumberOfGCThreads)) { 1134 n_conc_workers = max_parallel_marking_threads(); 1135 } else { 1136 n_conc_workers = 1137 AdaptiveSizePolicy::calc_default_active_workers( 1138 max_parallel_marking_threads(), 1139 1, /* Minimum workers */ 1140 parallel_marking_threads(), 1141 Threads::number_of_non_daemon_threads()); 1142 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1143 // that scaling has already gone into "_max_parallel_marking_threads". 1144 } 1145 assert(n_conc_workers > 0, "Always need at least 1"); 1146 return n_conc_workers; 1147 } 1148 // If we are not running with any parallel GC threads we will not 1149 // have spawned any marking threads either. Hence the number of 1150 // concurrent workers should be 0. 1151 return 0; 1152 } 1153 1154 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1155 // Currently, only survivors can be root regions. 1156 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1157 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1158 1159 const uintx interval = PrefetchScanIntervalInBytes; 1160 HeapWord* curr = hr->bottom(); 1161 const HeapWord* end = hr->top(); 1162 while (curr < end) { 1163 Prefetch::read(curr, interval); 1164 oop obj = oop(curr); 1165 int size = obj->oop_iterate(&cl); 1166 assert(size == obj->size(), "sanity"); 1167 curr += size; 1168 } 1169 } 1170 1171 class CMRootRegionScanTask : public AbstractGangTask { 1172 private: 1173 ConcurrentMark* _cm; 1174 1175 public: 1176 CMRootRegionScanTask(ConcurrentMark* cm) : 1177 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1178 1179 void work(uint worker_id) { 1180 assert(Thread::current()->is_ConcurrentGC_thread(), 1181 "this should only be done by a conc GC thread"); 1182 1183 CMRootRegions* root_regions = _cm->root_regions(); 1184 HeapRegion* hr = root_regions->claim_next(); 1185 while (hr != NULL) { 1186 _cm->scanRootRegion(hr, worker_id); 1187 hr = root_regions->claim_next(); 1188 } 1189 } 1190 }; 1191 1192 void ConcurrentMark::scanRootRegions() { 1193 // scan_in_progress() will have been set to true only if there was 1194 // at least one root region to scan. So, if it's false, we 1195 // should not attempt to do any further work. 1196 if (root_regions()->scan_in_progress()) { 1197 _parallel_marking_threads = calc_parallel_marking_threads(); 1198 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1199 "Maximum number of marking threads exceeded"); 1200 uint active_workers = MAX2(1U, parallel_marking_threads()); 1201 1202 CMRootRegionScanTask task(this); 1203 if (use_parallel_marking_threads()) { 1204 _parallel_workers->set_active_workers((int) active_workers); 1205 _parallel_workers->run_task(&task); 1206 } else { 1207 task.work(0); 1208 } 1209 1210 // It's possible that has_aborted() is true here without actually 1211 // aborting the survivor scan earlier. This is OK as it's 1212 // mainly used for sanity checking. 1213 root_regions()->scan_finished(); 1214 } 1215 } 1216 1217 void ConcurrentMark::markFromRoots() { 1218 // we might be tempted to assert that: 1219 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1220 // "inconsistent argument?"); 1221 // However that wouldn't be right, because it's possible that 1222 // a safepoint is indeed in progress as a younger generation 1223 // stop-the-world GC happens even as we mark in this generation. 1224 1225 _restart_for_overflow = false; 1226 force_overflow_conc()->init(); 1227 1228 // _g1h has _n_par_threads 1229 _parallel_marking_threads = calc_parallel_marking_threads(); 1230 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1231 "Maximum number of marking threads exceeded"); 1232 1233 uint active_workers = MAX2(1U, parallel_marking_threads()); 1234 1235 // Parallel task terminator is set in "set_phase()" 1236 set_phase(active_workers, true /* concurrent */); 1237 1238 CMConcurrentMarkingTask markingTask(this, cmThread()); 1239 if (use_parallel_marking_threads()) { 1240 _parallel_workers->set_active_workers((int)active_workers); 1241 // Don't set _n_par_threads because it affects MT in proceess_strong_roots() 1242 // and the decisions on that MT processing is made elsewhere. 1243 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1244 _parallel_workers->run_task(&markingTask); 1245 } else { 1246 markingTask.work(0); 1247 } 1248 print_stats(); 1249 } 1250 1251 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1252 // world is stopped at this checkpoint 1253 assert(SafepointSynchronize::is_at_safepoint(), 1254 "world should be stopped"); 1255 1256 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1257 1258 // If a full collection has happened, we shouldn't do this. 1259 if (has_aborted()) { 1260 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1261 return; 1262 } 1263 1264 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1265 1266 if (VerifyDuringGC) { 1267 HandleMark hm; // handle scope 1268 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1269 Universe::heap()->prepare_for_verify(); 1270 Universe::verify(/* silent */ false, 1271 /* option */ VerifyOption_G1UsePrevMarking); 1272 } 1273 1274 G1CollectorPolicy* g1p = g1h->g1_policy(); 1275 g1p->record_concurrent_mark_remark_start(); 1276 1277 double start = os::elapsedTime(); 1278 1279 checkpointRootsFinalWork(); 1280 1281 double mark_work_end = os::elapsedTime(); 1282 1283 weakRefsWork(clear_all_soft_refs); 1284 1285 if (has_overflown()) { 1286 // Oops. We overflowed. Restart concurrent marking. 1287 _restart_for_overflow = true; 1288 // Clear the marking state because we will be restarting 1289 // marking due to overflowing the global mark stack. 1290 reset_marking_state(); 1291 if (G1TraceMarkStackOverflow) { 1292 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1293 } 1294 } else { 1295 // Aggregate the per-task counting data that we have accumulated 1296 // while marking. 1297 aggregate_count_data(); 1298 1299 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1300 // We're done with marking. 1301 // This is the end of the marking cycle, we're expected all 1302 // threads to have SATB queues with active set to true. 1303 satb_mq_set.set_active_all_threads(false, /* new active value */ 1304 true /* expected_active */); 1305 1306 if (VerifyDuringGC) { 1307 HandleMark hm; // handle scope 1308 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1309 Universe::heap()->prepare_for_verify(); 1310 Universe::verify(/* silent */ false, 1311 /* option */ VerifyOption_G1UseNextMarking); 1312 } 1313 assert(!restart_for_overflow(), "sanity"); 1314 // Completely reset the marking state since marking completed 1315 set_non_marking_state(); 1316 } 1317 1318 // Expand the marking stack, if we have to and if we can. 1319 if (_markStack.should_expand()) { 1320 _markStack.expand(); 1321 } 1322 1323 #if VERIFY_OBJS_PROCESSED 1324 _scan_obj_cl.objs_processed = 0; 1325 ThreadLocalObjQueue::objs_enqueued = 0; 1326 #endif 1327 1328 // Statistics 1329 double now = os::elapsedTime(); 1330 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1331 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1332 _remark_times.add((now - start) * 1000.0); 1333 1334 g1p->record_concurrent_mark_remark_end(); 1335 } 1336 1337 // Base class of the closures that finalize and verify the 1338 // liveness counting data. 1339 class CMCountDataClosureBase: public HeapRegionClosure { 1340 protected: 1341 G1CollectedHeap* _g1h; 1342 ConcurrentMark* _cm; 1343 CardTableModRefBS* _ct_bs; 1344 1345 BitMap* _region_bm; 1346 BitMap* _card_bm; 1347 1348 // Takes a region that's not empty (i.e., it has at least one 1349 // live object in it and sets its corresponding bit on the region 1350 // bitmap to 1. If the region is "starts humongous" it will also set 1351 // to 1 the bits on the region bitmap that correspond to its 1352 // associated "continues humongous" regions. 1353 void set_bit_for_region(HeapRegion* hr) { 1354 assert(!hr->continuesHumongous(), "should have filtered those out"); 1355 1356 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1357 if (!hr->startsHumongous()) { 1358 // Normal (non-humongous) case: just set the bit. 1359 _region_bm->par_at_put(index, true); 1360 } else { 1361 // Starts humongous case: calculate how many regions are part of 1362 // this humongous region and then set the bit range. 1363 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1364 _region_bm->par_at_put_range(index, end_index, true); 1365 } 1366 } 1367 1368 public: 1369 CMCountDataClosureBase(G1CollectedHeap* g1h, 1370 BitMap* region_bm, BitMap* card_bm): 1371 _g1h(g1h), _cm(g1h->concurrent_mark()), 1372 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 1373 _region_bm(region_bm), _card_bm(card_bm) { } 1374 }; 1375 1376 // Closure that calculates the # live objects per region. Used 1377 // for verification purposes during the cleanup pause. 1378 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1379 CMBitMapRO* _bm; 1380 size_t _region_marked_bytes; 1381 1382 public: 1383 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1384 BitMap* region_bm, BitMap* card_bm) : 1385 CMCountDataClosureBase(g1h, region_bm, card_bm), 1386 _bm(bm), _region_marked_bytes(0) { } 1387 1388 bool doHeapRegion(HeapRegion* hr) { 1389 1390 if (hr->continuesHumongous()) { 1391 // We will ignore these here and process them when their 1392 // associated "starts humongous" region is processed (see 1393 // set_bit_for_heap_region()). Note that we cannot rely on their 1394 // associated "starts humongous" region to have their bit set to 1395 // 1 since, due to the region chunking in the parallel region 1396 // iteration, a "continues humongous" region might be visited 1397 // before its associated "starts humongous". 1398 return false; 1399 } 1400 1401 HeapWord* ntams = hr->next_top_at_mark_start(); 1402 HeapWord* start = hr->bottom(); 1403 1404 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1405 err_msg("Preconditions not met - " 1406 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT, 1407 start, ntams, hr->end())); 1408 1409 // Find the first marked object at or after "start". 1410 start = _bm->getNextMarkedWordAddress(start, ntams); 1411 1412 size_t marked_bytes = 0; 1413 1414 while (start < ntams) { 1415 oop obj = oop(start); 1416 int obj_sz = obj->size(); 1417 HeapWord* obj_end = start + obj_sz; 1418 1419 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1420 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1421 1422 // Note: if we're looking at the last region in heap - obj_end 1423 // could be actually just beyond the end of the heap; end_idx 1424 // will then correspond to a (non-existent) card that is also 1425 // just beyond the heap. 1426 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1427 // end of object is not card aligned - increment to cover 1428 // all the cards spanned by the object 1429 end_idx += 1; 1430 } 1431 1432 // Set the bits in the card BM for the cards spanned by this object. 1433 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1434 1435 // Add the size of this object to the number of marked bytes. 1436 marked_bytes += (size_t)obj_sz * HeapWordSize; 1437 1438 // Find the next marked object after this one. 1439 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1440 } 1441 1442 // Mark the allocated-since-marking portion... 1443 HeapWord* top = hr->top(); 1444 if (ntams < top) { 1445 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1446 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1447 1448 // Note: if we're looking at the last region in heap - top 1449 // could be actually just beyond the end of the heap; end_idx 1450 // will then correspond to a (non-existent) card that is also 1451 // just beyond the heap. 1452 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1453 // end of object is not card aligned - increment to cover 1454 // all the cards spanned by the object 1455 end_idx += 1; 1456 } 1457 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1458 1459 // This definitely means the region has live objects. 1460 set_bit_for_region(hr); 1461 } 1462 1463 // Update the live region bitmap. 1464 if (marked_bytes > 0) { 1465 set_bit_for_region(hr); 1466 } 1467 1468 // Set the marked bytes for the current region so that 1469 // it can be queried by a calling verificiation routine 1470 _region_marked_bytes = marked_bytes; 1471 1472 return false; 1473 } 1474 1475 size_t region_marked_bytes() const { return _region_marked_bytes; } 1476 }; 1477 1478 // Heap region closure used for verifying the counting data 1479 // that was accumulated concurrently and aggregated during 1480 // the remark pause. This closure is applied to the heap 1481 // regions during the STW cleanup pause. 1482 1483 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1484 G1CollectedHeap* _g1h; 1485 ConcurrentMark* _cm; 1486 CalcLiveObjectsClosure _calc_cl; 1487 BitMap* _region_bm; // Region BM to be verified 1488 BitMap* _card_bm; // Card BM to be verified 1489 bool _verbose; // verbose output? 1490 1491 BitMap* _exp_region_bm; // Expected Region BM values 1492 BitMap* _exp_card_bm; // Expected card BM values 1493 1494 int _failures; 1495 1496 public: 1497 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1498 BitMap* region_bm, 1499 BitMap* card_bm, 1500 BitMap* exp_region_bm, 1501 BitMap* exp_card_bm, 1502 bool verbose) : 1503 _g1h(g1h), _cm(g1h->concurrent_mark()), 1504 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1505 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1506 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1507 _failures(0) { } 1508 1509 int failures() const { return _failures; } 1510 1511 bool doHeapRegion(HeapRegion* hr) { 1512 if (hr->continuesHumongous()) { 1513 // We will ignore these here and process them when their 1514 // associated "starts humongous" region is processed (see 1515 // set_bit_for_heap_region()). Note that we cannot rely on their 1516 // associated "starts humongous" region to have their bit set to 1517 // 1 since, due to the region chunking in the parallel region 1518 // iteration, a "continues humongous" region might be visited 1519 // before its associated "starts humongous". 1520 return false; 1521 } 1522 1523 int failures = 0; 1524 1525 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1526 // this region and set the corresponding bits in the expected region 1527 // and card bitmaps. 1528 bool res = _calc_cl.doHeapRegion(hr); 1529 assert(res == false, "should be continuing"); 1530 1531 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1532 Mutex::_no_safepoint_check_flag); 1533 1534 // Verify the marked bytes for this region. 1535 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1536 size_t act_marked_bytes = hr->next_marked_bytes(); 1537 1538 // We're not OK if expected marked bytes > actual marked bytes. It means 1539 // we have missed accounting some objects during the actual marking. 1540 if (exp_marked_bytes > act_marked_bytes) { 1541 if (_verbose) { 1542 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1543 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1544 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1545 } 1546 failures += 1; 1547 } 1548 1549 // Verify the bit, for this region, in the actual and expected 1550 // (which was just calculated) region bit maps. 1551 // We're not OK if the bit in the calculated expected region 1552 // bitmap is set and the bit in the actual region bitmap is not. 1553 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1554 1555 bool expected = _exp_region_bm->at(index); 1556 bool actual = _region_bm->at(index); 1557 if (expected && !actual) { 1558 if (_verbose) { 1559 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1560 "expected: %s, actual: %s", 1561 hr->hrs_index(), 1562 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1563 } 1564 failures += 1; 1565 } 1566 1567 // Verify that the card bit maps for the cards spanned by the current 1568 // region match. We have an error if we have a set bit in the expected 1569 // bit map and the corresponding bit in the actual bitmap is not set. 1570 1571 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1572 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1573 1574 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1575 expected = _exp_card_bm->at(i); 1576 actual = _card_bm->at(i); 1577 1578 if (expected && !actual) { 1579 if (_verbose) { 1580 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1581 "expected: %s, actual: %s", 1582 hr->hrs_index(), i, 1583 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1584 } 1585 failures += 1; 1586 } 1587 } 1588 1589 if (failures > 0 && _verbose) { 1590 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1591 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1592 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(), 1593 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1594 } 1595 1596 _failures += failures; 1597 1598 // We could stop iteration over the heap when we 1599 // find the first violating region by returning true. 1600 return false; 1601 } 1602 }; 1603 1604 1605 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1606 protected: 1607 G1CollectedHeap* _g1h; 1608 ConcurrentMark* _cm; 1609 BitMap* _actual_region_bm; 1610 BitMap* _actual_card_bm; 1611 1612 uint _n_workers; 1613 1614 BitMap* _expected_region_bm; 1615 BitMap* _expected_card_bm; 1616 1617 int _failures; 1618 bool _verbose; 1619 1620 public: 1621 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1622 BitMap* region_bm, BitMap* card_bm, 1623 BitMap* expected_region_bm, BitMap* expected_card_bm) 1624 : AbstractGangTask("G1 verify final counting"), 1625 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1626 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1627 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1628 _failures(0), _verbose(false), 1629 _n_workers(0) { 1630 assert(VerifyDuringGC, "don't call this otherwise"); 1631 1632 // Use the value already set as the number of active threads 1633 // in the call to run_task(). 1634 if (G1CollectedHeap::use_parallel_gc_threads()) { 1635 assert( _g1h->workers()->active_workers() > 0, 1636 "Should have been previously set"); 1637 _n_workers = _g1h->workers()->active_workers(); 1638 } else { 1639 _n_workers = 1; 1640 } 1641 1642 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1643 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1644 1645 _verbose = _cm->verbose_medium(); 1646 } 1647 1648 void work(uint worker_id) { 1649 assert(worker_id < _n_workers, "invariant"); 1650 1651 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1652 _actual_region_bm, _actual_card_bm, 1653 _expected_region_bm, 1654 _expected_card_bm, 1655 _verbose); 1656 1657 if (G1CollectedHeap::use_parallel_gc_threads()) { 1658 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1659 worker_id, 1660 _n_workers, 1661 HeapRegion::VerifyCountClaimValue); 1662 } else { 1663 _g1h->heap_region_iterate(&verify_cl); 1664 } 1665 1666 Atomic::add(verify_cl.failures(), &_failures); 1667 } 1668 1669 int failures() const { return _failures; } 1670 }; 1671 1672 // Closure that finalizes the liveness counting data. 1673 // Used during the cleanup pause. 1674 // Sets the bits corresponding to the interval [NTAMS, top] 1675 // (which contains the implicitly live objects) in the 1676 // card liveness bitmap. Also sets the bit for each region, 1677 // containing live data, in the region liveness bitmap. 1678 1679 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1680 public: 1681 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1682 BitMap* region_bm, 1683 BitMap* card_bm) : 1684 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1685 1686 bool doHeapRegion(HeapRegion* hr) { 1687 1688 if (hr->continuesHumongous()) { 1689 // We will ignore these here and process them when their 1690 // associated "starts humongous" region is processed (see 1691 // set_bit_for_heap_region()). Note that we cannot rely on their 1692 // associated "starts humongous" region to have their bit set to 1693 // 1 since, due to the region chunking in the parallel region 1694 // iteration, a "continues humongous" region might be visited 1695 // before its associated "starts humongous". 1696 return false; 1697 } 1698 1699 HeapWord* ntams = hr->next_top_at_mark_start(); 1700 HeapWord* top = hr->top(); 1701 1702 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1703 1704 // Mark the allocated-since-marking portion... 1705 if (ntams < top) { 1706 // This definitely means the region has live objects. 1707 set_bit_for_region(hr); 1708 1709 // Now set the bits in the card bitmap for [ntams, top) 1710 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1711 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1712 1713 // Note: if we're looking at the last region in heap - top 1714 // could be actually just beyond the end of the heap; end_idx 1715 // will then correspond to a (non-existent) card that is also 1716 // just beyond the heap. 1717 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1718 // end of object is not card aligned - increment to cover 1719 // all the cards spanned by the object 1720 end_idx += 1; 1721 } 1722 1723 assert(end_idx <= _card_bm->size(), 1724 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1725 end_idx, _card_bm->size())); 1726 assert(start_idx < _card_bm->size(), 1727 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1728 start_idx, _card_bm->size())); 1729 1730 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1731 } 1732 1733 // Set the bit for the region if it contains live data 1734 if (hr->next_marked_bytes() > 0) { 1735 set_bit_for_region(hr); 1736 } 1737 1738 return false; 1739 } 1740 }; 1741 1742 class G1ParFinalCountTask: public AbstractGangTask { 1743 protected: 1744 G1CollectedHeap* _g1h; 1745 ConcurrentMark* _cm; 1746 BitMap* _actual_region_bm; 1747 BitMap* _actual_card_bm; 1748 1749 uint _n_workers; 1750 1751 public: 1752 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1753 : AbstractGangTask("G1 final counting"), 1754 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1755 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1756 _n_workers(0) { 1757 // Use the value already set as the number of active threads 1758 // in the call to run_task(). 1759 if (G1CollectedHeap::use_parallel_gc_threads()) { 1760 assert( _g1h->workers()->active_workers() > 0, 1761 "Should have been previously set"); 1762 _n_workers = _g1h->workers()->active_workers(); 1763 } else { 1764 _n_workers = 1; 1765 } 1766 } 1767 1768 void work(uint worker_id) { 1769 assert(worker_id < _n_workers, "invariant"); 1770 1771 FinalCountDataUpdateClosure final_update_cl(_g1h, 1772 _actual_region_bm, 1773 _actual_card_bm); 1774 1775 if (G1CollectedHeap::use_parallel_gc_threads()) { 1776 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1777 worker_id, 1778 _n_workers, 1779 HeapRegion::FinalCountClaimValue); 1780 } else { 1781 _g1h->heap_region_iterate(&final_update_cl); 1782 } 1783 } 1784 }; 1785 1786 class G1ParNoteEndTask; 1787 1788 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1789 G1CollectedHeap* _g1; 1790 int _worker_num; 1791 size_t _max_live_bytes; 1792 uint _regions_claimed; 1793 size_t _freed_bytes; 1794 FreeRegionList* _local_cleanup_list; 1795 OldRegionSet* _old_proxy_set; 1796 HumongousRegionSet* _humongous_proxy_set; 1797 HRRSCleanupTask* _hrrs_cleanup_task; 1798 double _claimed_region_time; 1799 double _max_region_time; 1800 1801 public: 1802 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1803 int worker_num, 1804 FreeRegionList* local_cleanup_list, 1805 OldRegionSet* old_proxy_set, 1806 HumongousRegionSet* humongous_proxy_set, 1807 HRRSCleanupTask* hrrs_cleanup_task) : 1808 _g1(g1), _worker_num(worker_num), 1809 _max_live_bytes(0), _regions_claimed(0), 1810 _freed_bytes(0), 1811 _claimed_region_time(0.0), _max_region_time(0.0), 1812 _local_cleanup_list(local_cleanup_list), 1813 _old_proxy_set(old_proxy_set), 1814 _humongous_proxy_set(humongous_proxy_set), 1815 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1816 1817 size_t freed_bytes() { return _freed_bytes; } 1818 1819 bool doHeapRegion(HeapRegion *hr) { 1820 if (hr->continuesHumongous()) { 1821 return false; 1822 } 1823 // We use a claim value of zero here because all regions 1824 // were claimed with value 1 in the FinalCount task. 1825 _g1->reset_gc_time_stamps(hr); 1826 double start = os::elapsedTime(); 1827 _regions_claimed++; 1828 hr->note_end_of_marking(); 1829 _max_live_bytes += hr->max_live_bytes(); 1830 _g1->free_region_if_empty(hr, 1831 &_freed_bytes, 1832 _local_cleanup_list, 1833 _old_proxy_set, 1834 _humongous_proxy_set, 1835 _hrrs_cleanup_task, 1836 true /* par */); 1837 double region_time = (os::elapsedTime() - start); 1838 _claimed_region_time += region_time; 1839 if (region_time > _max_region_time) { 1840 _max_region_time = region_time; 1841 } 1842 return false; 1843 } 1844 1845 size_t max_live_bytes() { return _max_live_bytes; } 1846 uint regions_claimed() { return _regions_claimed; } 1847 double claimed_region_time_sec() { return _claimed_region_time; } 1848 double max_region_time_sec() { return _max_region_time; } 1849 }; 1850 1851 class G1ParNoteEndTask: public AbstractGangTask { 1852 friend class G1NoteEndOfConcMarkClosure; 1853 1854 protected: 1855 G1CollectedHeap* _g1h; 1856 size_t _max_live_bytes; 1857 size_t _freed_bytes; 1858 FreeRegionList* _cleanup_list; 1859 1860 public: 1861 G1ParNoteEndTask(G1CollectedHeap* g1h, 1862 FreeRegionList* cleanup_list) : 1863 AbstractGangTask("G1 note end"), _g1h(g1h), 1864 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1865 1866 void work(uint worker_id) { 1867 double start = os::elapsedTime(); 1868 FreeRegionList local_cleanup_list("Local Cleanup List"); 1869 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set"); 1870 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1871 HRRSCleanupTask hrrs_cleanup_task; 1872 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list, 1873 &old_proxy_set, 1874 &humongous_proxy_set, 1875 &hrrs_cleanup_task); 1876 if (G1CollectedHeap::use_parallel_gc_threads()) { 1877 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1878 _g1h->workers()->active_workers(), 1879 HeapRegion::NoteEndClaimValue); 1880 } else { 1881 _g1h->heap_region_iterate(&g1_note_end); 1882 } 1883 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1884 1885 // Now update the lists 1886 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1887 NULL /* free_list */, 1888 &old_proxy_set, 1889 &humongous_proxy_set, 1890 true /* par */); 1891 { 1892 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1893 _max_live_bytes += g1_note_end.max_live_bytes(); 1894 _freed_bytes += g1_note_end.freed_bytes(); 1895 1896 // If we iterate over the global cleanup list at the end of 1897 // cleanup to do this printing we will not guarantee to only 1898 // generate output for the newly-reclaimed regions (the list 1899 // might not be empty at the beginning of cleanup; we might 1900 // still be working on its previous contents). So we do the 1901 // printing here, before we append the new regions to the global 1902 // cleanup list. 1903 1904 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1905 if (hr_printer->is_active()) { 1906 HeapRegionLinkedListIterator iter(&local_cleanup_list); 1907 while (iter.more_available()) { 1908 HeapRegion* hr = iter.get_next(); 1909 hr_printer->cleanup(hr); 1910 } 1911 } 1912 1913 _cleanup_list->add_as_tail(&local_cleanup_list); 1914 assert(local_cleanup_list.is_empty(), "post-condition"); 1915 1916 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1917 } 1918 } 1919 size_t max_live_bytes() { return _max_live_bytes; } 1920 size_t freed_bytes() { return _freed_bytes; } 1921 }; 1922 1923 class G1ParScrubRemSetTask: public AbstractGangTask { 1924 protected: 1925 G1RemSet* _g1rs; 1926 BitMap* _region_bm; 1927 BitMap* _card_bm; 1928 public: 1929 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1930 BitMap* region_bm, BitMap* card_bm) : 1931 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1932 _region_bm(region_bm), _card_bm(card_bm) { } 1933 1934 void work(uint worker_id) { 1935 if (G1CollectedHeap::use_parallel_gc_threads()) { 1936 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1937 HeapRegion::ScrubRemSetClaimValue); 1938 } else { 1939 _g1rs->scrub(_region_bm, _card_bm); 1940 } 1941 } 1942 1943 }; 1944 1945 void ConcurrentMark::cleanup() { 1946 // world is stopped at this checkpoint 1947 assert(SafepointSynchronize::is_at_safepoint(), 1948 "world should be stopped"); 1949 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1950 1951 // If a full collection has happened, we shouldn't do this. 1952 if (has_aborted()) { 1953 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1954 return; 1955 } 1956 1957 HRSPhaseSetter x(HRSPhaseCleanup); 1958 g1h->verify_region_sets_optional(); 1959 1960 if (VerifyDuringGC) { 1961 HandleMark hm; // handle scope 1962 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1963 Universe::heap()->prepare_for_verify(); 1964 Universe::verify(/* silent */ false, 1965 /* option */ VerifyOption_G1UsePrevMarking); 1966 } 1967 1968 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1969 g1p->record_concurrent_mark_cleanup_start(); 1970 1971 double start = os::elapsedTime(); 1972 1973 HeapRegionRemSet::reset_for_cleanup_tasks(); 1974 1975 uint n_workers; 1976 1977 // Do counting once more with the world stopped for good measure. 1978 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1979 1980 if (G1CollectedHeap::use_parallel_gc_threads()) { 1981 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 1982 "sanity check"); 1983 1984 g1h->set_par_threads(); 1985 n_workers = g1h->n_par_threads(); 1986 assert(g1h->n_par_threads() == n_workers, 1987 "Should not have been reset"); 1988 g1h->workers()->run_task(&g1_par_count_task); 1989 // Done with the parallel phase so reset to 0. 1990 g1h->set_par_threads(0); 1991 1992 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 1993 "sanity check"); 1994 } else { 1995 n_workers = 1; 1996 g1_par_count_task.work(0); 1997 } 1998 1999 if (VerifyDuringGC) { 2000 // Verify that the counting data accumulated during marking matches 2001 // that calculated by walking the marking bitmap. 2002 2003 // Bitmaps to hold expected values 2004 BitMap expected_region_bm(_region_bm.size(), false); 2005 BitMap expected_card_bm(_card_bm.size(), false); 2006 2007 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 2008 &_region_bm, 2009 &_card_bm, 2010 &expected_region_bm, 2011 &expected_card_bm); 2012 2013 if (G1CollectedHeap::use_parallel_gc_threads()) { 2014 g1h->set_par_threads((int)n_workers); 2015 g1h->workers()->run_task(&g1_par_verify_task); 2016 // Done with the parallel phase so reset to 0. 2017 g1h->set_par_threads(0); 2018 2019 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 2020 "sanity check"); 2021 } else { 2022 g1_par_verify_task.work(0); 2023 } 2024 2025 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 2026 } 2027 2028 size_t start_used_bytes = g1h->used(); 2029 g1h->set_marking_complete(); 2030 2031 double count_end = os::elapsedTime(); 2032 double this_final_counting_time = (count_end - start); 2033 _total_counting_time += this_final_counting_time; 2034 2035 if (G1PrintRegionLivenessInfo) { 2036 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 2037 _g1h->heap_region_iterate(&cl); 2038 } 2039 2040 // Install newly created mark bitMap as "prev". 2041 swapMarkBitMaps(); 2042 2043 g1h->reset_gc_time_stamp(); 2044 2045 // Note end of marking in all heap regions. 2046 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 2047 if (G1CollectedHeap::use_parallel_gc_threads()) { 2048 g1h->set_par_threads((int)n_workers); 2049 g1h->workers()->run_task(&g1_par_note_end_task); 2050 g1h->set_par_threads(0); 2051 2052 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 2053 "sanity check"); 2054 } else { 2055 g1_par_note_end_task.work(0); 2056 } 2057 g1h->check_gc_time_stamps(); 2058 2059 if (!cleanup_list_is_empty()) { 2060 // The cleanup list is not empty, so we'll have to process it 2061 // concurrently. Notify anyone else that might be wanting free 2062 // regions that there will be more free regions coming soon. 2063 g1h->set_free_regions_coming(); 2064 } 2065 2066 // call below, since it affects the metric by which we sort the heap 2067 // regions. 2068 if (G1ScrubRemSets) { 2069 double rs_scrub_start = os::elapsedTime(); 2070 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 2071 if (G1CollectedHeap::use_parallel_gc_threads()) { 2072 g1h->set_par_threads((int)n_workers); 2073 g1h->workers()->run_task(&g1_par_scrub_rs_task); 2074 g1h->set_par_threads(0); 2075 2076 assert(g1h->check_heap_region_claim_values( 2077 HeapRegion::ScrubRemSetClaimValue), 2078 "sanity check"); 2079 } else { 2080 g1_par_scrub_rs_task.work(0); 2081 } 2082 2083 double rs_scrub_end = os::elapsedTime(); 2084 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 2085 _total_rs_scrub_time += this_rs_scrub_time; 2086 } 2087 2088 // this will also free any regions totally full of garbage objects, 2089 // and sort the regions. 2090 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 2091 2092 // Statistics. 2093 double end = os::elapsedTime(); 2094 _cleanup_times.add((end - start) * 1000.0); 2095 2096 if (G1Log::fine()) { 2097 g1h->print_size_transition(gclog_or_tty, 2098 start_used_bytes, 2099 g1h->used(), 2100 g1h->capacity()); 2101 } 2102 2103 // Clean up will have freed any regions completely full of garbage. 2104 // Update the soft reference policy with the new heap occupancy. 2105 Universe::update_heap_info_at_gc(); 2106 2107 // We need to make this be a "collection" so any collection pause that 2108 // races with it goes around and waits for completeCleanup to finish. 2109 g1h->increment_total_collections(); 2110 2111 // We reclaimed old regions so we should calculate the sizes to make 2112 // sure we update the old gen/space data. 2113 g1h->g1mm()->update_sizes(); 2114 2115 if (VerifyDuringGC) { 2116 HandleMark hm; // handle scope 2117 gclog_or_tty->print(" VerifyDuringGC:(after)"); 2118 Universe::heap()->prepare_for_verify(); 2119 Universe::verify(/* silent */ false, 2120 /* option */ VerifyOption_G1UsePrevMarking); 2121 } 2122 2123 g1h->verify_region_sets_optional(); 2124 } 2125 2126 void ConcurrentMark::completeCleanup() { 2127 if (has_aborted()) return; 2128 2129 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2130 2131 _cleanup_list.verify_optional(); 2132 FreeRegionList tmp_free_list("Tmp Free List"); 2133 2134 if (G1ConcRegionFreeingVerbose) { 2135 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2136 "cleanup list has %u entries", 2137 _cleanup_list.length()); 2138 } 2139 2140 // Noone else should be accessing the _cleanup_list at this point, 2141 // so it's not necessary to take any locks 2142 while (!_cleanup_list.is_empty()) { 2143 HeapRegion* hr = _cleanup_list.remove_head(); 2144 assert(hr != NULL, "the list was not empty"); 2145 hr->par_clear(); 2146 tmp_free_list.add_as_tail(hr); 2147 2148 // Instead of adding one region at a time to the secondary_free_list, 2149 // we accumulate them in the local list and move them a few at a 2150 // time. This also cuts down on the number of notify_all() calls 2151 // we do during this process. We'll also append the local list when 2152 // _cleanup_list is empty (which means we just removed the last 2153 // region from the _cleanup_list). 2154 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2155 _cleanup_list.is_empty()) { 2156 if (G1ConcRegionFreeingVerbose) { 2157 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2158 "appending %u entries to the secondary_free_list, " 2159 "cleanup list still has %u entries", 2160 tmp_free_list.length(), 2161 _cleanup_list.length()); 2162 } 2163 2164 { 2165 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2166 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 2167 SecondaryFreeList_lock->notify_all(); 2168 } 2169 2170 if (G1StressConcRegionFreeing) { 2171 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2172 os::sleep(Thread::current(), (jlong) 1, false); 2173 } 2174 } 2175 } 2176 } 2177 assert(tmp_free_list.is_empty(), "post-condition"); 2178 } 2179 2180 // Supporting Object and Oop closures for reference discovery 2181 // and processing in during marking 2182 2183 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2184 HeapWord* addr = (HeapWord*)obj; 2185 return addr != NULL && 2186 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2187 } 2188 2189 // 'Keep Alive' oop closure used by both serial parallel reference processing. 2190 // Uses the CMTask associated with a worker thread (for serial reference 2191 // processing the CMTask for worker 0 is used) to preserve (mark) and 2192 // trace referent objects. 2193 // 2194 // Using the CMTask and embedded local queues avoids having the worker 2195 // threads operating on the global mark stack. This reduces the risk 2196 // of overflowing the stack - which we would rather avoid at this late 2197 // state. Also using the tasks' local queues removes the potential 2198 // of the workers interfering with each other that could occur if 2199 // operating on the global stack. 2200 2201 class G1CMKeepAliveAndDrainClosure: public OopClosure { 2202 ConcurrentMark* _cm; 2203 CMTask* _task; 2204 int _ref_counter_limit; 2205 int _ref_counter; 2206 bool _is_serial; 2207 public: 2208 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial): 2209 _cm(cm), _task(task), _is_serial(is_serial), 2210 _ref_counter_limit(G1RefProcDrainInterval) { 2211 assert(_ref_counter_limit > 0, "sanity"); 2212 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2213 _ref_counter = _ref_counter_limit; 2214 } 2215 2216 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2217 virtual void do_oop( oop* p) { do_oop_work(p); } 2218 2219 template <class T> void do_oop_work(T* p) { 2220 if (!_cm->has_overflown()) { 2221 oop obj = oopDesc::load_decode_heap_oop(p); 2222 if (_cm->verbose_high()) { 2223 gclog_or_tty->print_cr("\t[%u] we're looking at location " 2224 "*"PTR_FORMAT" = "PTR_FORMAT, 2225 _task->worker_id(), p, (void*) obj); 2226 } 2227 2228 _task->deal_with_reference(obj); 2229 _ref_counter--; 2230 2231 if (_ref_counter == 0) { 2232 // We have dealt with _ref_counter_limit references, pushing them 2233 // and objects reachable from them on to the local stack (and 2234 // possibly the global stack). Call CMTask::do_marking_step() to 2235 // process these entries. 2236 // 2237 // We call CMTask::do_marking_step() in a loop, which we'll exit if 2238 // there's nothing more to do (i.e. we're done with the entries that 2239 // were pushed as a result of the CMTask::deal_with_reference() calls 2240 // above) or we overflow. 2241 // 2242 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2243 // flag while there may still be some work to do. (See the comment at 2244 // the beginning of CMTask::do_marking_step() for those conditions - 2245 // one of which is reaching the specified time target.) It is only 2246 // when CMTask::do_marking_step() returns without setting the 2247 // has_aborted() flag that the marking step has completed. 2248 do { 2249 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2250 _task->do_marking_step(mark_step_duration_ms, 2251 false /* do_stealing */, 2252 false /* do_termination */, 2253 _is_serial); 2254 } while (_task->has_aborted() && !_cm->has_overflown()); 2255 _ref_counter = _ref_counter_limit; 2256 } 2257 } else { 2258 if (_cm->verbose_high()) { 2259 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id()); 2260 } 2261 } 2262 } 2263 }; 2264 2265 // 'Drain' oop closure used by both serial and parallel reference processing. 2266 // Uses the CMTask associated with a given worker thread (for serial 2267 // reference processing the CMtask for worker 0 is used). Calls the 2268 // do_marking_step routine, with an unbelievably large timeout value, 2269 // to drain the marking data structures of the remaining entries 2270 // added by the 'keep alive' oop closure above. 2271 2272 class G1CMDrainMarkingStackClosure: public VoidClosure { 2273 ConcurrentMark* _cm; 2274 CMTask* _task; 2275 bool _do_stealing; 2276 bool _do_termination; 2277 bool _is_serial; 2278 public: 2279 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial): 2280 _cm(cm), _task(task), _is_serial(is_serial) { 2281 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2282 2283 // It only makes sense to allow stealing in CMTask::do_marking_step() if 2284 // this closure is being instantiated for parallel reference processing. 2285 _do_stealing = !_is_serial; 2286 _do_termination = true; 2287 } 2288 2289 void do_void() { 2290 do { 2291 if (_cm->verbose_high()) { 2292 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - " 2293 "stealing: %s, termination: %s, serial: %s", 2294 _task->worker_id(), 2295 BOOL_TO_STR(_do_stealing), 2296 BOOL_TO_STR(_do_termination), 2297 BOOL_TO_STR(_is_serial)); 2298 } 2299 2300 // We call CMTask::do_marking_step() to completely drain the local 2301 // and global marking stacks of entries pushed by the 'keep alive' 2302 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2303 // 2304 // CMTask::do_marking_step() is called in a loop, which we'll exit 2305 // if there's nothing more to do (i.e. we'completely drained the 2306 // entries that were pushed as a a result of applying the 'keep alive' 2307 // closure to the entries on the discovered ref lists) or we overflow 2308 // the global marking stack. 2309 // 2310 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2311 // flag while there may still be some work to do. (See the comment at 2312 // the beginning of CMTask::do_marking_step() for those conditions - 2313 // one of which is reaching the specified time target.) It is only 2314 // when CMTask::do_marking_step() returns without setting the 2315 // has_aborted() flag that the marking step has completed. 2316 2317 _task->do_marking_step(1000000000.0 /* something very large */, 2318 _do_stealing, 2319 _do_termination, 2320 _is_serial); 2321 } while (_task->has_aborted() && !_cm->has_overflown()); 2322 } 2323 }; 2324 2325 // Implementation of AbstractRefProcTaskExecutor for parallel 2326 // reference processing at the end of G1 concurrent marking 2327 2328 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2329 private: 2330 G1CollectedHeap* _g1h; 2331 ConcurrentMark* _cm; 2332 WorkGang* _workers; 2333 int _active_workers; 2334 2335 public: 2336 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2337 ConcurrentMark* cm, 2338 WorkGang* workers, 2339 int n_workers) : 2340 _g1h(g1h), _cm(cm), 2341 _workers(workers), _active_workers(n_workers) { } 2342 2343 // Executes the given task using concurrent marking worker threads. 2344 virtual void execute(ProcessTask& task); 2345 virtual void execute(EnqueueTask& task); 2346 }; 2347 2348 class G1CMRefProcTaskProxy: public AbstractGangTask { 2349 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2350 ProcessTask& _proc_task; 2351 G1CollectedHeap* _g1h; 2352 ConcurrentMark* _cm; 2353 2354 public: 2355 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2356 G1CollectedHeap* g1h, 2357 ConcurrentMark* cm) : 2358 AbstractGangTask("Process reference objects in parallel"), 2359 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2360 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2361 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2362 } 2363 2364 virtual void work(uint worker_id) { 2365 CMTask* marking_task = _cm->task(worker_id); 2366 G1CMIsAliveClosure g1_is_alive(_g1h); 2367 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, 2368 marking_task, 2369 false /* is_serial */); 2370 G1CMDrainMarkingStackClosure g1_par_drain(_cm, 2371 marking_task, 2372 false /* is_serial */); 2373 2374 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2375 } 2376 }; 2377 2378 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2379 assert(_workers != NULL, "Need parallel worker threads."); 2380 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2381 2382 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2383 2384 // We need to reset the number of workers in the parallel 2385 // task terminator, before each proxy task execution, so 2386 // that the termination protocol in CMTask::do_marking_step() 2387 // knows how many workers to wait for. 2388 _cm->terminator()->reset_for_reuse(_active_workers); 2389 _g1h->set_par_threads(_active_workers); 2390 _workers->run_task(&proc_task_proxy); 2391 _g1h->set_par_threads(0); 2392 } 2393 2394 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2395 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2396 EnqueueTask& _enq_task; 2397 2398 public: 2399 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2400 AbstractGangTask("Enqueue reference objects in parallel"), 2401 _enq_task(enq_task) { } 2402 2403 virtual void work(uint worker_id) { 2404 _enq_task.work(worker_id); 2405 } 2406 }; 2407 2408 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2409 assert(_workers != NULL, "Need parallel worker threads."); 2410 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2411 2412 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2413 2414 _g1h->set_par_threads(_active_workers); 2415 _workers->run_task(&enq_task_proxy); 2416 _g1h->set_par_threads(0); 2417 } 2418 2419 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2420 if (has_overflown()) { 2421 // Skip processing the discovered references if we have 2422 // overflown the global marking stack. Reference objects 2423 // only get discovered once so it is OK to not 2424 // de-populate the discovered reference lists. We could have, 2425 // but the only benefit would be that, when marking restarts, 2426 // less reference objects are discovered. 2427 return; 2428 } 2429 2430 ResourceMark rm; 2431 HandleMark hm; 2432 2433 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2434 2435 // Is alive closure. 2436 G1CMIsAliveClosure g1_is_alive(g1h); 2437 2438 // Inner scope to exclude the cleaning of the string and symbol 2439 // tables from the displayed time. 2440 { 2441 if (G1Log::finer()) { 2442 gclog_or_tty->put(' '); 2443 } 2444 TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty); 2445 2446 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2447 2448 // See the comment in G1CollectedHeap::ref_processing_init() 2449 // about how reference processing currently works in G1. 2450 2451 // Set the soft reference policy 2452 rp->setup_policy(clear_all_soft_refs); 2453 assert(_markStack.isEmpty(), "mark stack should be empty"); 2454 2455 // We need at least one active thread. If reference processing 2456 // is not multi-threaded we use the current (VMThread) thread, 2457 // otherwise we use the work gang from the G1CollectedHeap and 2458 // we utilize all the worker threads we can. 2459 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL; 2460 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2461 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2462 2463 // Serial (i.e executed by VMThread) instances of the 'Keep Alive' 2464 // and 'Complete GC' closures. 2465 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2466 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2467 2468 // Parallel processing task executor. 2469 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2470 g1h->workers(), active_workers); 2471 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2472 2473 // Tell the parallel task terminator and the worker barrier syncs how 2474 // many workers to expect. 2475 set_phase(active_workers, concurrent()); 2476 2477 // Set the degree of MT processing here. If the discovery was done MT, 2478 // the number of threads involved during discovery could differ from 2479 // the number of active workers. This is OK as long as the discovered 2480 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2481 rp->set_active_mt_degree(active_workers); 2482 2483 // Process the weak references. 2484 rp->process_discovered_references(&g1_is_alive, 2485 &g1_keep_alive, 2486 &g1_drain_mark_stack, 2487 executor); 2488 2489 // The do_oop work routines of the keep_alive and drain_marking_stack 2490 // oop closures will set the has_overflown flag if we overflow the 2491 // global marking stack. 2492 2493 assert(_markStack.overflow() || _markStack.isEmpty(), 2494 "mark stack should be empty (unless it overflowed)"); 2495 2496 if (_markStack.overflow()) { 2497 // This should have been done already when we tried to push an 2498 // entry on to the global mark stack. But let's do it again. 2499 set_has_overflown(); 2500 } 2501 2502 assert(rp->num_q() == active_workers, "why not"); 2503 2504 rp->enqueue_discovered_references(executor); 2505 2506 rp->verify_no_references_recorded(); 2507 assert(!rp->discovery_enabled(), "Post condition"); 2508 } 2509 2510 // Now clean up stale oops in StringTable 2511 StringTable::unlink(&g1_is_alive); 2512 // Clean up unreferenced symbols in symbol table. 2513 SymbolTable::unlink(); 2514 } 2515 2516 void ConcurrentMark::swapMarkBitMaps() { 2517 CMBitMapRO* temp = _prevMarkBitMap; 2518 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2519 _nextMarkBitMap = (CMBitMap*) temp; 2520 } 2521 2522 class CMRemarkTask: public AbstractGangTask { 2523 private: 2524 ConcurrentMark* _cm; 2525 bool _is_serial; 2526 2527 public: 2528 void work(uint worker_id) { 2529 // Since all available tasks are actually started, we should 2530 // only proceed if we're supposed to be actived. 2531 if (worker_id < _cm->active_tasks()) { 2532 CMTask* task = _cm->task(worker_id); 2533 task->record_start_time(); 2534 do { 2535 task->do_marking_step(1000000000.0 /* something very large */, 2536 true /* do_stealing */, 2537 true /* do_termination */, 2538 _is_serial); 2539 } while (task->has_aborted() && !_cm->has_overflown()); 2540 // If we overflow, then we do not want to restart. We instead 2541 // want to abort remark and do concurrent marking again. 2542 task->record_end_time(); 2543 } 2544 } 2545 2546 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) : 2547 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) { 2548 _cm->terminator()->reset_for_reuse(active_workers); 2549 } 2550 }; 2551 2552 void ConcurrentMark::checkpointRootsFinalWork() { 2553 ResourceMark rm; 2554 HandleMark hm; 2555 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2556 2557 g1h->ensure_parsability(false); 2558 2559 if (G1CollectedHeap::use_parallel_gc_threads()) { 2560 G1CollectedHeap::StrongRootsScope srs(g1h); 2561 // this is remark, so we'll use up all active threads 2562 uint active_workers = g1h->workers()->active_workers(); 2563 if (active_workers == 0) { 2564 assert(active_workers > 0, "Should have been set earlier"); 2565 active_workers = (uint) ParallelGCThreads; 2566 g1h->workers()->set_active_workers(active_workers); 2567 } 2568 set_phase(active_workers, false /* concurrent */); 2569 // Leave _parallel_marking_threads at it's 2570 // value originally calculated in the ConcurrentMark 2571 // constructor and pass values of the active workers 2572 // through the gang in the task. 2573 2574 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */); 2575 2576 // We will start all available threads, even if we decide that the 2577 // active_workers will be fewer. The extra ones will just bail out 2578 // immediately. 2579 g1h->set_par_threads(active_workers); 2580 g1h->workers()->run_task(&remarkTask); 2581 g1h->set_par_threads(0); 2582 } else { 2583 G1CollectedHeap::StrongRootsScope srs(g1h); 2584 uint active_workers = 1; 2585 set_phase(active_workers, false /* concurrent */); 2586 2587 // Note - if there's no work gang then the VMThread will be 2588 // the thread to execute the remark - serially. We have 2589 // to pass true for the is_serial parameter so that 2590 // CMTask::do_marking_step() doesn't enter the synch 2591 // barriers in the event of an overflow. Doing so will 2592 // cause an assert that the current thread is not a 2593 // concurrent GC thread. 2594 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/); 2595 remarkTask.work(0); 2596 } 2597 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2598 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2599 2600 print_stats(); 2601 2602 #if VERIFY_OBJS_PROCESSED 2603 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2604 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2605 _scan_obj_cl.objs_processed, 2606 ThreadLocalObjQueue::objs_enqueued); 2607 guarantee(_scan_obj_cl.objs_processed == 2608 ThreadLocalObjQueue::objs_enqueued, 2609 "Different number of objs processed and enqueued."); 2610 } 2611 #endif 2612 } 2613 2614 #ifndef PRODUCT 2615 2616 class PrintReachableOopClosure: public OopClosure { 2617 private: 2618 G1CollectedHeap* _g1h; 2619 outputStream* _out; 2620 VerifyOption _vo; 2621 bool _all; 2622 2623 public: 2624 PrintReachableOopClosure(outputStream* out, 2625 VerifyOption vo, 2626 bool all) : 2627 _g1h(G1CollectedHeap::heap()), 2628 _out(out), _vo(vo), _all(all) { } 2629 2630 void do_oop(narrowOop* p) { do_oop_work(p); } 2631 void do_oop( oop* p) { do_oop_work(p); } 2632 2633 template <class T> void do_oop_work(T* p) { 2634 oop obj = oopDesc::load_decode_heap_oop(p); 2635 const char* str = NULL; 2636 const char* str2 = ""; 2637 2638 if (obj == NULL) { 2639 str = ""; 2640 } else if (!_g1h->is_in_g1_reserved(obj)) { 2641 str = " O"; 2642 } else { 2643 HeapRegion* hr = _g1h->heap_region_containing(obj); 2644 guarantee(hr != NULL, "invariant"); 2645 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2646 bool marked = _g1h->is_marked(obj, _vo); 2647 2648 if (over_tams) { 2649 str = " >"; 2650 if (marked) { 2651 str2 = " AND MARKED"; 2652 } 2653 } else if (marked) { 2654 str = " M"; 2655 } else { 2656 str = " NOT"; 2657 } 2658 } 2659 2660 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2661 p, (void*) obj, str, str2); 2662 } 2663 }; 2664 2665 class PrintReachableObjectClosure : public ObjectClosure { 2666 private: 2667 G1CollectedHeap* _g1h; 2668 outputStream* _out; 2669 VerifyOption _vo; 2670 bool _all; 2671 HeapRegion* _hr; 2672 2673 public: 2674 PrintReachableObjectClosure(outputStream* out, 2675 VerifyOption vo, 2676 bool all, 2677 HeapRegion* hr) : 2678 _g1h(G1CollectedHeap::heap()), 2679 _out(out), _vo(vo), _all(all), _hr(hr) { } 2680 2681 void do_object(oop o) { 2682 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2683 bool marked = _g1h->is_marked(o, _vo); 2684 bool print_it = _all || over_tams || marked; 2685 2686 if (print_it) { 2687 _out->print_cr(" "PTR_FORMAT"%s", 2688 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2689 PrintReachableOopClosure oopCl(_out, _vo, _all); 2690 o->oop_iterate_no_header(&oopCl); 2691 } 2692 } 2693 }; 2694 2695 class PrintReachableRegionClosure : public HeapRegionClosure { 2696 private: 2697 G1CollectedHeap* _g1h; 2698 outputStream* _out; 2699 VerifyOption _vo; 2700 bool _all; 2701 2702 public: 2703 bool doHeapRegion(HeapRegion* hr) { 2704 HeapWord* b = hr->bottom(); 2705 HeapWord* e = hr->end(); 2706 HeapWord* t = hr->top(); 2707 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2708 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2709 "TAMS: "PTR_FORMAT, b, e, t, p); 2710 _out->cr(); 2711 2712 HeapWord* from = b; 2713 HeapWord* to = t; 2714 2715 if (to > from) { 2716 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2717 _out->cr(); 2718 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2719 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2720 _out->cr(); 2721 } 2722 2723 return false; 2724 } 2725 2726 PrintReachableRegionClosure(outputStream* out, 2727 VerifyOption vo, 2728 bool all) : 2729 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2730 }; 2731 2732 void ConcurrentMark::print_reachable(const char* str, 2733 VerifyOption vo, 2734 bool all) { 2735 gclog_or_tty->cr(); 2736 gclog_or_tty->print_cr("== Doing heap dump... "); 2737 2738 if (G1PrintReachableBaseFile == NULL) { 2739 gclog_or_tty->print_cr(" #### error: no base file defined"); 2740 return; 2741 } 2742 2743 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2744 (JVM_MAXPATHLEN - 1)) { 2745 gclog_or_tty->print_cr(" #### error: file name too long"); 2746 return; 2747 } 2748 2749 char file_name[JVM_MAXPATHLEN]; 2750 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2751 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2752 2753 fileStream fout(file_name); 2754 if (!fout.is_open()) { 2755 gclog_or_tty->print_cr(" #### error: could not open file"); 2756 return; 2757 } 2758 2759 outputStream* out = &fout; 2760 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2761 out->cr(); 2762 2763 out->print_cr("--- ITERATING OVER REGIONS"); 2764 out->cr(); 2765 PrintReachableRegionClosure rcl(out, vo, all); 2766 _g1h->heap_region_iterate(&rcl); 2767 out->cr(); 2768 2769 gclog_or_tty->print_cr(" done"); 2770 gclog_or_tty->flush(); 2771 } 2772 2773 #endif // PRODUCT 2774 2775 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2776 // Note we are overriding the read-only view of the prev map here, via 2777 // the cast. 2778 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2779 } 2780 2781 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2782 _nextMarkBitMap->clearRange(mr); 2783 } 2784 2785 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2786 clearRangePrevBitmap(mr); 2787 clearRangeNextBitmap(mr); 2788 } 2789 2790 HeapRegion* 2791 ConcurrentMark::claim_region(uint worker_id) { 2792 // "checkpoint" the finger 2793 HeapWord* finger = _finger; 2794 2795 // _heap_end will not change underneath our feet; it only changes at 2796 // yield points. 2797 while (finger < _heap_end) { 2798 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2799 2800 // Note on how this code handles humongous regions. In the 2801 // normal case the finger will reach the start of a "starts 2802 // humongous" (SH) region. Its end will either be the end of the 2803 // last "continues humongous" (CH) region in the sequence, or the 2804 // standard end of the SH region (if the SH is the only region in 2805 // the sequence). That way claim_region() will skip over the CH 2806 // regions. However, there is a subtle race between a CM thread 2807 // executing this method and a mutator thread doing a humongous 2808 // object allocation. The two are not mutually exclusive as the CM 2809 // thread does not need to hold the Heap_lock when it gets 2810 // here. So there is a chance that claim_region() will come across 2811 // a free region that's in the progress of becoming a SH or a CH 2812 // region. In the former case, it will either 2813 // a) Miss the update to the region's end, in which case it will 2814 // visit every subsequent CH region, will find their bitmaps 2815 // empty, and do nothing, or 2816 // b) Will observe the update of the region's end (in which case 2817 // it will skip the subsequent CH regions). 2818 // If it comes across a region that suddenly becomes CH, the 2819 // scenario will be similar to b). So, the race between 2820 // claim_region() and a humongous object allocation might force us 2821 // to do a bit of unnecessary work (due to some unnecessary bitmap 2822 // iterations) but it should not introduce and correctness issues. 2823 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2824 HeapWord* bottom = curr_region->bottom(); 2825 HeapWord* end = curr_region->end(); 2826 HeapWord* limit = curr_region->next_top_at_mark_start(); 2827 2828 if (verbose_low()) { 2829 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" " 2830 "["PTR_FORMAT", "PTR_FORMAT"), " 2831 "limit = "PTR_FORMAT, 2832 worker_id, curr_region, bottom, end, limit); 2833 } 2834 2835 // Is the gap between reading the finger and doing the CAS too long? 2836 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2837 if (res == finger) { 2838 // we succeeded 2839 2840 // notice that _finger == end cannot be guaranteed here since, 2841 // someone else might have moved the finger even further 2842 assert(_finger >= end, "the finger should have moved forward"); 2843 2844 if (verbose_low()) { 2845 gclog_or_tty->print_cr("[%u] we were successful with region = " 2846 PTR_FORMAT, worker_id, curr_region); 2847 } 2848 2849 if (limit > bottom) { 2850 if (verbose_low()) { 2851 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, " 2852 "returning it ", worker_id, curr_region); 2853 } 2854 return curr_region; 2855 } else { 2856 assert(limit == bottom, 2857 "the region limit should be at bottom"); 2858 if (verbose_low()) { 2859 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, " 2860 "returning NULL", worker_id, curr_region); 2861 } 2862 // we return NULL and the caller should try calling 2863 // claim_region() again. 2864 return NULL; 2865 } 2866 } else { 2867 assert(_finger > finger, "the finger should have moved forward"); 2868 if (verbose_low()) { 2869 gclog_or_tty->print_cr("[%u] somebody else moved the finger, " 2870 "global finger = "PTR_FORMAT", " 2871 "our finger = "PTR_FORMAT, 2872 worker_id, _finger, finger); 2873 } 2874 2875 // read it again 2876 finger = _finger; 2877 } 2878 } 2879 2880 return NULL; 2881 } 2882 2883 #ifndef PRODUCT 2884 enum VerifyNoCSetOopsPhase { 2885 VerifyNoCSetOopsStack, 2886 VerifyNoCSetOopsQueues, 2887 VerifyNoCSetOopsSATBCompleted, 2888 VerifyNoCSetOopsSATBThread 2889 }; 2890 2891 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 2892 private: 2893 G1CollectedHeap* _g1h; 2894 VerifyNoCSetOopsPhase _phase; 2895 int _info; 2896 2897 const char* phase_str() { 2898 switch (_phase) { 2899 case VerifyNoCSetOopsStack: return "Stack"; 2900 case VerifyNoCSetOopsQueues: return "Queue"; 2901 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 2902 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 2903 default: ShouldNotReachHere(); 2904 } 2905 return NULL; 2906 } 2907 2908 void do_object_work(oop obj) { 2909 guarantee(!_g1h->obj_in_cs(obj), 2910 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 2911 (void*) obj, phase_str(), _info)); 2912 } 2913 2914 public: 2915 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 2916 2917 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 2918 _phase = phase; 2919 _info = info; 2920 } 2921 2922 virtual void do_oop(oop* p) { 2923 oop obj = oopDesc::load_decode_heap_oop(p); 2924 do_object_work(obj); 2925 } 2926 2927 virtual void do_oop(narrowOop* p) { 2928 // We should not come across narrow oops while scanning marking 2929 // stacks and SATB buffers. 2930 ShouldNotReachHere(); 2931 } 2932 2933 virtual void do_object(oop obj) { 2934 do_object_work(obj); 2935 } 2936 }; 2937 2938 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 2939 bool verify_enqueued_buffers, 2940 bool verify_thread_buffers, 2941 bool verify_fingers) { 2942 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2943 if (!G1CollectedHeap::heap()->mark_in_progress()) { 2944 return; 2945 } 2946 2947 VerifyNoCSetOopsClosure cl; 2948 2949 if (verify_stacks) { 2950 // Verify entries on the global mark stack 2951 cl.set_phase(VerifyNoCSetOopsStack); 2952 _markStack.oops_do(&cl); 2953 2954 // Verify entries on the task queues 2955 for (uint i = 0; i < _max_worker_id; i += 1) { 2956 cl.set_phase(VerifyNoCSetOopsQueues, i); 2957 CMTaskQueue* queue = _task_queues->queue(i); 2958 queue->oops_do(&cl); 2959 } 2960 } 2961 2962 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 2963 2964 // Verify entries on the enqueued SATB buffers 2965 if (verify_enqueued_buffers) { 2966 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 2967 satb_qs.iterate_completed_buffers_read_only(&cl); 2968 } 2969 2970 // Verify entries on the per-thread SATB buffers 2971 if (verify_thread_buffers) { 2972 cl.set_phase(VerifyNoCSetOopsSATBThread); 2973 satb_qs.iterate_thread_buffers_read_only(&cl); 2974 } 2975 2976 if (verify_fingers) { 2977 // Verify the global finger 2978 HeapWord* global_finger = finger(); 2979 if (global_finger != NULL && global_finger < _heap_end) { 2980 // The global finger always points to a heap region boundary. We 2981 // use heap_region_containing_raw() to get the containing region 2982 // given that the global finger could be pointing to a free region 2983 // which subsequently becomes continues humongous. If that 2984 // happens, heap_region_containing() will return the bottom of the 2985 // corresponding starts humongous region and the check below will 2986 // not hold any more. 2987 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 2988 guarantee(global_finger == global_hr->bottom(), 2989 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 2990 global_finger, HR_FORMAT_PARAMS(global_hr))); 2991 } 2992 2993 // Verify the task fingers 2994 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2995 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 2996 CMTask* task = _tasks[i]; 2997 HeapWord* task_finger = task->finger(); 2998 if (task_finger != NULL && task_finger < _heap_end) { 2999 // See above note on the global finger verification. 3000 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 3001 guarantee(task_finger == task_hr->bottom() || 3002 !task_hr->in_collection_set(), 3003 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 3004 task_finger, HR_FORMAT_PARAMS(task_hr))); 3005 } 3006 } 3007 } 3008 } 3009 #endif // PRODUCT 3010 3011 // Aggregate the counting data that was constructed concurrently 3012 // with marking. 3013 class AggregateCountDataHRClosure: public HeapRegionClosure { 3014 G1CollectedHeap* _g1h; 3015 ConcurrentMark* _cm; 3016 CardTableModRefBS* _ct_bs; 3017 BitMap* _cm_card_bm; 3018 uint _max_worker_id; 3019 3020 public: 3021 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 3022 BitMap* cm_card_bm, 3023 uint max_worker_id) : 3024 _g1h(g1h), _cm(g1h->concurrent_mark()), 3025 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 3026 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 3027 3028 bool doHeapRegion(HeapRegion* hr) { 3029 if (hr->continuesHumongous()) { 3030 // We will ignore these here and process them when their 3031 // associated "starts humongous" region is processed. 3032 // Note that we cannot rely on their associated 3033 // "starts humongous" region to have their bit set to 1 3034 // since, due to the region chunking in the parallel region 3035 // iteration, a "continues humongous" region might be visited 3036 // before its associated "starts humongous". 3037 return false; 3038 } 3039 3040 HeapWord* start = hr->bottom(); 3041 HeapWord* limit = hr->next_top_at_mark_start(); 3042 HeapWord* end = hr->end(); 3043 3044 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 3045 err_msg("Preconditions not met - " 3046 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 3047 "top: "PTR_FORMAT", end: "PTR_FORMAT, 3048 start, limit, hr->top(), hr->end())); 3049 3050 assert(hr->next_marked_bytes() == 0, "Precondition"); 3051 3052 if (start == limit) { 3053 // NTAMS of this region has not been set so nothing to do. 3054 return false; 3055 } 3056 3057 // 'start' should be in the heap. 3058 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 3059 // 'end' *may* be just beyone the end of the heap (if hr is the last region) 3060 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 3061 3062 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 3063 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 3064 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 3065 3066 // If ntams is not card aligned then we bump card bitmap index 3067 // for limit so that we get the all the cards spanned by 3068 // the object ending at ntams. 3069 // Note: if this is the last region in the heap then ntams 3070 // could be actually just beyond the end of the the heap; 3071 // limit_idx will then correspond to a (non-existent) card 3072 // that is also outside the heap. 3073 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 3074 limit_idx += 1; 3075 } 3076 3077 assert(limit_idx <= end_idx, "or else use atomics"); 3078 3079 // Aggregate the "stripe" in the count data associated with hr. 3080 uint hrs_index = hr->hrs_index(); 3081 size_t marked_bytes = 0; 3082 3083 for (uint i = 0; i < _max_worker_id; i += 1) { 3084 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 3085 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 3086 3087 // Fetch the marked_bytes in this region for task i and 3088 // add it to the running total for this region. 3089 marked_bytes += marked_bytes_array[hrs_index]; 3090 3091 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 3092 // into the global card bitmap. 3093 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 3094 3095 while (scan_idx < limit_idx) { 3096 assert(task_card_bm->at(scan_idx) == true, "should be"); 3097 _cm_card_bm->set_bit(scan_idx); 3098 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 3099 3100 // BitMap::get_next_one_offset() can handle the case when 3101 // its left_offset parameter is greater than its right_offset 3102 // parameter. It does, however, have an early exit if 3103 // left_offset == right_offset. So let's limit the value 3104 // passed in for left offset here. 3105 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 3106 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 3107 } 3108 } 3109 3110 // Update the marked bytes for this region. 3111 hr->add_to_marked_bytes(marked_bytes); 3112 3113 // Next heap region 3114 return false; 3115 } 3116 }; 3117 3118 class G1AggregateCountDataTask: public AbstractGangTask { 3119 protected: 3120 G1CollectedHeap* _g1h; 3121 ConcurrentMark* _cm; 3122 BitMap* _cm_card_bm; 3123 uint _max_worker_id; 3124 int _active_workers; 3125 3126 public: 3127 G1AggregateCountDataTask(G1CollectedHeap* g1h, 3128 ConcurrentMark* cm, 3129 BitMap* cm_card_bm, 3130 uint max_worker_id, 3131 int n_workers) : 3132 AbstractGangTask("Count Aggregation"), 3133 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 3134 _max_worker_id(max_worker_id), 3135 _active_workers(n_workers) { } 3136 3137 void work(uint worker_id) { 3138 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 3139 3140 if (G1CollectedHeap::use_parallel_gc_threads()) { 3141 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 3142 _active_workers, 3143 HeapRegion::AggregateCountClaimValue); 3144 } else { 3145 _g1h->heap_region_iterate(&cl); 3146 } 3147 } 3148 }; 3149 3150 3151 void ConcurrentMark::aggregate_count_data() { 3152 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3153 _g1h->workers()->active_workers() : 3154 1); 3155 3156 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3157 _max_worker_id, n_workers); 3158 3159 if (G1CollectedHeap::use_parallel_gc_threads()) { 3160 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3161 "sanity check"); 3162 _g1h->set_par_threads(n_workers); 3163 _g1h->workers()->run_task(&g1_par_agg_task); 3164 _g1h->set_par_threads(0); 3165 3166 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3167 "sanity check"); 3168 _g1h->reset_heap_region_claim_values(); 3169 } else { 3170 g1_par_agg_task.work(0); 3171 } 3172 } 3173 3174 // Clear the per-worker arrays used to store the per-region counting data 3175 void ConcurrentMark::clear_all_count_data() { 3176 // Clear the global card bitmap - it will be filled during 3177 // liveness count aggregation (during remark) and the 3178 // final counting task. 3179 _card_bm.clear(); 3180 3181 // Clear the global region bitmap - it will be filled as part 3182 // of the final counting task. 3183 _region_bm.clear(); 3184 3185 uint max_regions = _g1h->max_regions(); 3186 assert(_max_worker_id > 0, "uninitialized"); 3187 3188 for (uint i = 0; i < _max_worker_id; i += 1) { 3189 BitMap* task_card_bm = count_card_bitmap_for(i); 3190 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3191 3192 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3193 assert(marked_bytes_array != NULL, "uninitialized"); 3194 3195 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3196 task_card_bm->clear(); 3197 } 3198 } 3199 3200 void ConcurrentMark::print_stats() { 3201 if (verbose_stats()) { 3202 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3203 for (size_t i = 0; i < _active_tasks; ++i) { 3204 _tasks[i]->print_stats(); 3205 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3206 } 3207 } 3208 } 3209 3210 // abandon current marking iteration due to a Full GC 3211 void ConcurrentMark::abort() { 3212 // Clear all marks to force marking thread to do nothing 3213 _nextMarkBitMap->clearAll(); 3214 // Clear the liveness counting data 3215 clear_all_count_data(); 3216 // Empty mark stack 3217 reset_marking_state(); 3218 for (uint i = 0; i < _max_worker_id; ++i) { 3219 _tasks[i]->clear_region_fields(); 3220 } 3221 _has_aborted = true; 3222 3223 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3224 satb_mq_set.abandon_partial_marking(); 3225 // This can be called either during or outside marking, we'll read 3226 // the expected_active value from the SATB queue set. 3227 satb_mq_set.set_active_all_threads( 3228 false, /* new active value */ 3229 satb_mq_set.is_active() /* expected_active */); 3230 } 3231 3232 static void print_ms_time_info(const char* prefix, const char* name, 3233 NumberSeq& ns) { 3234 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3235 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3236 if (ns.num() > 0) { 3237 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3238 prefix, ns.sd(), ns.maximum()); 3239 } 3240 } 3241 3242 void ConcurrentMark::print_summary_info() { 3243 gclog_or_tty->print_cr(" Concurrent marking:"); 3244 print_ms_time_info(" ", "init marks", _init_times); 3245 print_ms_time_info(" ", "remarks", _remark_times); 3246 { 3247 print_ms_time_info(" ", "final marks", _remark_mark_times); 3248 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3249 3250 } 3251 print_ms_time_info(" ", "cleanups", _cleanup_times); 3252 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3253 _total_counting_time, 3254 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3255 (double)_cleanup_times.num() 3256 : 0.0)); 3257 if (G1ScrubRemSets) { 3258 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3259 _total_rs_scrub_time, 3260 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3261 (double)_cleanup_times.num() 3262 : 0.0)); 3263 } 3264 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3265 (_init_times.sum() + _remark_times.sum() + 3266 _cleanup_times.sum())/1000.0); 3267 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3268 "(%8.2f s marking).", 3269 cmThread()->vtime_accum(), 3270 cmThread()->vtime_mark_accum()); 3271 } 3272 3273 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3274 if (use_parallel_marking_threads()) { 3275 _parallel_workers->print_worker_threads_on(st); 3276 } 3277 } 3278 3279 // We take a break if someone is trying to stop the world. 3280 bool ConcurrentMark::do_yield_check(uint worker_id) { 3281 if (should_yield()) { 3282 if (worker_id == 0) { 3283 _g1h->g1_policy()->record_concurrent_pause(); 3284 } 3285 cmThread()->yield(); 3286 return true; 3287 } else { 3288 return false; 3289 } 3290 } 3291 3292 bool ConcurrentMark::should_yield() { 3293 return cmThread()->should_yield(); 3294 } 3295 3296 bool ConcurrentMark::containing_card_is_marked(void* p) { 3297 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3298 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3299 } 3300 3301 bool ConcurrentMark::containing_cards_are_marked(void* start, 3302 void* last) { 3303 return containing_card_is_marked(start) && 3304 containing_card_is_marked(last); 3305 } 3306 3307 #ifndef PRODUCT 3308 // for debugging purposes 3309 void ConcurrentMark::print_finger() { 3310 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3311 _heap_start, _heap_end, _finger); 3312 for (uint i = 0; i < _max_worker_id; ++i) { 3313 gclog_or_tty->print(" %u: "PTR_FORMAT, i, _tasks[i]->finger()); 3314 } 3315 gclog_or_tty->print_cr(""); 3316 } 3317 #endif 3318 3319 void CMTask::scan_object(oop obj) { 3320 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3321 3322 if (_cm->verbose_high()) { 3323 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT, 3324 _worker_id, (void*) obj); 3325 } 3326 3327 size_t obj_size = obj->size(); 3328 _words_scanned += obj_size; 3329 3330 obj->oop_iterate(_cm_oop_closure); 3331 statsOnly( ++_objs_scanned ); 3332 check_limits(); 3333 } 3334 3335 // Closure for iteration over bitmaps 3336 class CMBitMapClosure : public BitMapClosure { 3337 private: 3338 // the bitmap that is being iterated over 3339 CMBitMap* _nextMarkBitMap; 3340 ConcurrentMark* _cm; 3341 CMTask* _task; 3342 3343 public: 3344 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3345 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3346 3347 bool do_bit(size_t offset) { 3348 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3349 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3350 assert( addr < _cm->finger(), "invariant"); 3351 3352 statsOnly( _task->increase_objs_found_on_bitmap() ); 3353 assert(addr >= _task->finger(), "invariant"); 3354 3355 // We move that task's local finger along. 3356 _task->move_finger_to(addr); 3357 3358 _task->scan_object(oop(addr)); 3359 // we only partially drain the local queue and global stack 3360 _task->drain_local_queue(true); 3361 _task->drain_global_stack(true); 3362 3363 // if the has_aborted flag has been raised, we need to bail out of 3364 // the iteration 3365 return !_task->has_aborted(); 3366 } 3367 }; 3368 3369 // Closure for iterating over objects, currently only used for 3370 // processing SATB buffers. 3371 class CMObjectClosure : public ObjectClosure { 3372 private: 3373 CMTask* _task; 3374 3375 public: 3376 void do_object(oop obj) { 3377 _task->deal_with_reference(obj); 3378 } 3379 3380 CMObjectClosure(CMTask* task) : _task(task) { } 3381 }; 3382 3383 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3384 ConcurrentMark* cm, 3385 CMTask* task) 3386 : _g1h(g1h), _cm(cm), _task(task) { 3387 assert(_ref_processor == NULL, "should be initialized to NULL"); 3388 3389 if (G1UseConcMarkReferenceProcessing) { 3390 _ref_processor = g1h->ref_processor_cm(); 3391 assert(_ref_processor != NULL, "should not be NULL"); 3392 } 3393 } 3394 3395 void CMTask::setup_for_region(HeapRegion* hr) { 3396 // Separated the asserts so that we know which one fires. 3397 assert(hr != NULL, 3398 "claim_region() should have filtered out continues humongous regions"); 3399 assert(!hr->continuesHumongous(), 3400 "claim_region() should have filtered out continues humongous regions"); 3401 3402 if (_cm->verbose_low()) { 3403 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT, 3404 _worker_id, hr); 3405 } 3406 3407 _curr_region = hr; 3408 _finger = hr->bottom(); 3409 update_region_limit(); 3410 } 3411 3412 void CMTask::update_region_limit() { 3413 HeapRegion* hr = _curr_region; 3414 HeapWord* bottom = hr->bottom(); 3415 HeapWord* limit = hr->next_top_at_mark_start(); 3416 3417 if (limit == bottom) { 3418 if (_cm->verbose_low()) { 3419 gclog_or_tty->print_cr("[%u] found an empty region " 3420 "["PTR_FORMAT", "PTR_FORMAT")", 3421 _worker_id, bottom, limit); 3422 } 3423 // The region was collected underneath our feet. 3424 // We set the finger to bottom to ensure that the bitmap 3425 // iteration that will follow this will not do anything. 3426 // (this is not a condition that holds when we set the region up, 3427 // as the region is not supposed to be empty in the first place) 3428 _finger = bottom; 3429 } else if (limit >= _region_limit) { 3430 assert(limit >= _finger, "peace of mind"); 3431 } else { 3432 assert(limit < _region_limit, "only way to get here"); 3433 // This can happen under some pretty unusual circumstances. An 3434 // evacuation pause empties the region underneath our feet (NTAMS 3435 // at bottom). We then do some allocation in the region (NTAMS 3436 // stays at bottom), followed by the region being used as a GC 3437 // alloc region (NTAMS will move to top() and the objects 3438 // originally below it will be grayed). All objects now marked in 3439 // the region are explicitly grayed, if below the global finger, 3440 // and we do not need in fact to scan anything else. So, we simply 3441 // set _finger to be limit to ensure that the bitmap iteration 3442 // doesn't do anything. 3443 _finger = limit; 3444 } 3445 3446 _region_limit = limit; 3447 } 3448 3449 void CMTask::giveup_current_region() { 3450 assert(_curr_region != NULL, "invariant"); 3451 if (_cm->verbose_low()) { 3452 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT, 3453 _worker_id, _curr_region); 3454 } 3455 clear_region_fields(); 3456 } 3457 3458 void CMTask::clear_region_fields() { 3459 // Values for these three fields that indicate that we're not 3460 // holding on to a region. 3461 _curr_region = NULL; 3462 _finger = NULL; 3463 _region_limit = NULL; 3464 } 3465 3466 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3467 if (cm_oop_closure == NULL) { 3468 assert(_cm_oop_closure != NULL, "invariant"); 3469 } else { 3470 assert(_cm_oop_closure == NULL, "invariant"); 3471 } 3472 _cm_oop_closure = cm_oop_closure; 3473 } 3474 3475 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3476 guarantee(nextMarkBitMap != NULL, "invariant"); 3477 3478 if (_cm->verbose_low()) { 3479 gclog_or_tty->print_cr("[%u] resetting", _worker_id); 3480 } 3481 3482 _nextMarkBitMap = nextMarkBitMap; 3483 clear_region_fields(); 3484 3485 _calls = 0; 3486 _elapsed_time_ms = 0.0; 3487 _termination_time_ms = 0.0; 3488 _termination_start_time_ms = 0.0; 3489 3490 #if _MARKING_STATS_ 3491 _local_pushes = 0; 3492 _local_pops = 0; 3493 _local_max_size = 0; 3494 _objs_scanned = 0; 3495 _global_pushes = 0; 3496 _global_pops = 0; 3497 _global_max_size = 0; 3498 _global_transfers_to = 0; 3499 _global_transfers_from = 0; 3500 _regions_claimed = 0; 3501 _objs_found_on_bitmap = 0; 3502 _satb_buffers_processed = 0; 3503 _steal_attempts = 0; 3504 _steals = 0; 3505 _aborted = 0; 3506 _aborted_overflow = 0; 3507 _aborted_cm_aborted = 0; 3508 _aborted_yield = 0; 3509 _aborted_timed_out = 0; 3510 _aborted_satb = 0; 3511 _aborted_termination = 0; 3512 #endif // _MARKING_STATS_ 3513 } 3514 3515 bool CMTask::should_exit_termination() { 3516 regular_clock_call(); 3517 // This is called when we are in the termination protocol. We should 3518 // quit if, for some reason, this task wants to abort or the global 3519 // stack is not empty (this means that we can get work from it). 3520 return !_cm->mark_stack_empty() || has_aborted(); 3521 } 3522 3523 void CMTask::reached_limit() { 3524 assert(_words_scanned >= _words_scanned_limit || 3525 _refs_reached >= _refs_reached_limit , 3526 "shouldn't have been called otherwise"); 3527 regular_clock_call(); 3528 } 3529 3530 void CMTask::regular_clock_call() { 3531 if (has_aborted()) return; 3532 3533 // First, we need to recalculate the words scanned and refs reached 3534 // limits for the next clock call. 3535 recalculate_limits(); 3536 3537 // During the regular clock call we do the following 3538 3539 // (1) If an overflow has been flagged, then we abort. 3540 if (_cm->has_overflown()) { 3541 set_has_aborted(); 3542 return; 3543 } 3544 3545 // If we are not concurrent (i.e. we're doing remark) we don't need 3546 // to check anything else. The other steps are only needed during 3547 // the concurrent marking phase. 3548 if (!concurrent()) return; 3549 3550 // (2) If marking has been aborted for Full GC, then we also abort. 3551 if (_cm->has_aborted()) { 3552 set_has_aborted(); 3553 statsOnly( ++_aborted_cm_aborted ); 3554 return; 3555 } 3556 3557 double curr_time_ms = os::elapsedVTime() * 1000.0; 3558 3559 // (3) If marking stats are enabled, then we update the step history. 3560 #if _MARKING_STATS_ 3561 if (_words_scanned >= _words_scanned_limit) { 3562 ++_clock_due_to_scanning; 3563 } 3564 if (_refs_reached >= _refs_reached_limit) { 3565 ++_clock_due_to_marking; 3566 } 3567 3568 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3569 _interval_start_time_ms = curr_time_ms; 3570 _all_clock_intervals_ms.add(last_interval_ms); 3571 3572 if (_cm->verbose_medium()) { 3573 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, " 3574 "scanned = %d%s, refs reached = %d%s", 3575 _worker_id, last_interval_ms, 3576 _words_scanned, 3577 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3578 _refs_reached, 3579 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3580 } 3581 #endif // _MARKING_STATS_ 3582 3583 // (4) We check whether we should yield. If we have to, then we abort. 3584 if (_cm->should_yield()) { 3585 // We should yield. To do this we abort the task. The caller is 3586 // responsible for yielding. 3587 set_has_aborted(); 3588 statsOnly( ++_aborted_yield ); 3589 return; 3590 } 3591 3592 // (5) We check whether we've reached our time quota. If we have, 3593 // then we abort. 3594 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3595 if (elapsed_time_ms > _time_target_ms) { 3596 set_has_aborted(); 3597 _has_timed_out = true; 3598 statsOnly( ++_aborted_timed_out ); 3599 return; 3600 } 3601 3602 // (6) Finally, we check whether there are enough completed STAB 3603 // buffers available for processing. If there are, we abort. 3604 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3605 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3606 if (_cm->verbose_low()) { 3607 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers", 3608 _worker_id); 3609 } 3610 // we do need to process SATB buffers, we'll abort and restart 3611 // the marking task to do so 3612 set_has_aborted(); 3613 statsOnly( ++_aborted_satb ); 3614 return; 3615 } 3616 } 3617 3618 void CMTask::recalculate_limits() { 3619 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3620 _words_scanned_limit = _real_words_scanned_limit; 3621 3622 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3623 _refs_reached_limit = _real_refs_reached_limit; 3624 } 3625 3626 void CMTask::decrease_limits() { 3627 // This is called when we believe that we're going to do an infrequent 3628 // operation which will increase the per byte scanned cost (i.e. move 3629 // entries to/from the global stack). It basically tries to decrease the 3630 // scanning limit so that the clock is called earlier. 3631 3632 if (_cm->verbose_medium()) { 3633 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id); 3634 } 3635 3636 _words_scanned_limit = _real_words_scanned_limit - 3637 3 * words_scanned_period / 4; 3638 _refs_reached_limit = _real_refs_reached_limit - 3639 3 * refs_reached_period / 4; 3640 } 3641 3642 void CMTask::move_entries_to_global_stack() { 3643 // local array where we'll store the entries that will be popped 3644 // from the local queue 3645 oop buffer[global_stack_transfer_size]; 3646 3647 int n = 0; 3648 oop obj; 3649 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3650 buffer[n] = obj; 3651 ++n; 3652 } 3653 3654 if (n > 0) { 3655 // we popped at least one entry from the local queue 3656 3657 statsOnly( ++_global_transfers_to; _local_pops += n ); 3658 3659 if (!_cm->mark_stack_push(buffer, n)) { 3660 if (_cm->verbose_low()) { 3661 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow", 3662 _worker_id); 3663 } 3664 set_has_aborted(); 3665 } else { 3666 // the transfer was successful 3667 3668 if (_cm->verbose_medium()) { 3669 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack", 3670 _worker_id, n); 3671 } 3672 statsOnly( int tmp_size = _cm->mark_stack_size(); 3673 if (tmp_size > _global_max_size) { 3674 _global_max_size = tmp_size; 3675 } 3676 _global_pushes += n ); 3677 } 3678 } 3679 3680 // this operation was quite expensive, so decrease the limits 3681 decrease_limits(); 3682 } 3683 3684 void CMTask::get_entries_from_global_stack() { 3685 // local array where we'll store the entries that will be popped 3686 // from the global stack. 3687 oop buffer[global_stack_transfer_size]; 3688 int n; 3689 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3690 assert(n <= global_stack_transfer_size, 3691 "we should not pop more than the given limit"); 3692 if (n > 0) { 3693 // yes, we did actually pop at least one entry 3694 3695 statsOnly( ++_global_transfers_from; _global_pops += n ); 3696 if (_cm->verbose_medium()) { 3697 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack", 3698 _worker_id, n); 3699 } 3700 for (int i = 0; i < n; ++i) { 3701 bool success = _task_queue->push(buffer[i]); 3702 // We only call this when the local queue is empty or under a 3703 // given target limit. So, we do not expect this push to fail. 3704 assert(success, "invariant"); 3705 } 3706 3707 statsOnly( int tmp_size = _task_queue->size(); 3708 if (tmp_size > _local_max_size) { 3709 _local_max_size = tmp_size; 3710 } 3711 _local_pushes += n ); 3712 } 3713 3714 // this operation was quite expensive, so decrease the limits 3715 decrease_limits(); 3716 } 3717 3718 void CMTask::drain_local_queue(bool partially) { 3719 if (has_aborted()) return; 3720 3721 // Decide what the target size is, depending whether we're going to 3722 // drain it partially (so that other tasks can steal if they run out 3723 // of things to do) or totally (at the very end). 3724 size_t target_size; 3725 if (partially) { 3726 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3727 } else { 3728 target_size = 0; 3729 } 3730 3731 if (_task_queue->size() > target_size) { 3732 if (_cm->verbose_high()) { 3733 gclog_or_tty->print_cr("[%u] draining local queue, target size = %d", 3734 _worker_id, target_size); 3735 } 3736 3737 oop obj; 3738 bool ret = _task_queue->pop_local(obj); 3739 while (ret) { 3740 statsOnly( ++_local_pops ); 3741 3742 if (_cm->verbose_high()) { 3743 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id, 3744 (void*) obj); 3745 } 3746 3747 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3748 assert(!_g1h->is_on_master_free_list( 3749 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3750 3751 scan_object(obj); 3752 3753 if (_task_queue->size() <= target_size || has_aborted()) { 3754 ret = false; 3755 } else { 3756 ret = _task_queue->pop_local(obj); 3757 } 3758 } 3759 3760 if (_cm->verbose_high()) { 3761 gclog_or_tty->print_cr("[%u] drained local queue, size = %d", 3762 _worker_id, _task_queue->size()); 3763 } 3764 } 3765 } 3766 3767 void CMTask::drain_global_stack(bool partially) { 3768 if (has_aborted()) return; 3769 3770 // We have a policy to drain the local queue before we attempt to 3771 // drain the global stack. 3772 assert(partially || _task_queue->size() == 0, "invariant"); 3773 3774 // Decide what the target size is, depending whether we're going to 3775 // drain it partially (so that other tasks can steal if they run out 3776 // of things to do) or totally (at the very end). Notice that, 3777 // because we move entries from the global stack in chunks or 3778 // because another task might be doing the same, we might in fact 3779 // drop below the target. But, this is not a problem. 3780 size_t target_size; 3781 if (partially) { 3782 target_size = _cm->partial_mark_stack_size_target(); 3783 } else { 3784 target_size = 0; 3785 } 3786 3787 if (_cm->mark_stack_size() > target_size) { 3788 if (_cm->verbose_low()) { 3789 gclog_or_tty->print_cr("[%u] draining global_stack, target size %d", 3790 _worker_id, target_size); 3791 } 3792 3793 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3794 get_entries_from_global_stack(); 3795 drain_local_queue(partially); 3796 } 3797 3798 if (_cm->verbose_low()) { 3799 gclog_or_tty->print_cr("[%u] drained global stack, size = %d", 3800 _worker_id, _cm->mark_stack_size()); 3801 } 3802 } 3803 } 3804 3805 // SATB Queue has several assumptions on whether to call the par or 3806 // non-par versions of the methods. this is why some of the code is 3807 // replicated. We should really get rid of the single-threaded version 3808 // of the code to simplify things. 3809 void CMTask::drain_satb_buffers() { 3810 if (has_aborted()) return; 3811 3812 // We set this so that the regular clock knows that we're in the 3813 // middle of draining buffers and doesn't set the abort flag when it 3814 // notices that SATB buffers are available for draining. It'd be 3815 // very counter productive if it did that. :-) 3816 _draining_satb_buffers = true; 3817 3818 CMObjectClosure oc(this); 3819 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3820 if (G1CollectedHeap::use_parallel_gc_threads()) { 3821 satb_mq_set.set_par_closure(_worker_id, &oc); 3822 } else { 3823 satb_mq_set.set_closure(&oc); 3824 } 3825 3826 // This keeps claiming and applying the closure to completed buffers 3827 // until we run out of buffers or we need to abort. 3828 if (G1CollectedHeap::use_parallel_gc_threads()) { 3829 while (!has_aborted() && 3830 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) { 3831 if (_cm->verbose_medium()) { 3832 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3833 } 3834 statsOnly( ++_satb_buffers_processed ); 3835 regular_clock_call(); 3836 } 3837 } else { 3838 while (!has_aborted() && 3839 satb_mq_set.apply_closure_to_completed_buffer()) { 3840 if (_cm->verbose_medium()) { 3841 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3842 } 3843 statsOnly( ++_satb_buffers_processed ); 3844 regular_clock_call(); 3845 } 3846 } 3847 3848 if (!concurrent() && !has_aborted()) { 3849 // We should only do this during remark. 3850 if (G1CollectedHeap::use_parallel_gc_threads()) { 3851 satb_mq_set.par_iterate_closure_all_threads(_worker_id); 3852 } else { 3853 satb_mq_set.iterate_closure_all_threads(); 3854 } 3855 } 3856 3857 _draining_satb_buffers = false; 3858 3859 assert(has_aborted() || 3860 concurrent() || 3861 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3862 3863 if (G1CollectedHeap::use_parallel_gc_threads()) { 3864 satb_mq_set.set_par_closure(_worker_id, NULL); 3865 } else { 3866 satb_mq_set.set_closure(NULL); 3867 } 3868 3869 // again, this was a potentially expensive operation, decrease the 3870 // limits to get the regular clock call early 3871 decrease_limits(); 3872 } 3873 3874 void CMTask::print_stats() { 3875 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 3876 _worker_id, _calls); 3877 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3878 _elapsed_time_ms, _termination_time_ms); 3879 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3880 _step_times_ms.num(), _step_times_ms.avg(), 3881 _step_times_ms.sd()); 3882 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3883 _step_times_ms.maximum(), _step_times_ms.sum()); 3884 3885 #if _MARKING_STATS_ 3886 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3887 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3888 _all_clock_intervals_ms.sd()); 3889 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3890 _all_clock_intervals_ms.maximum(), 3891 _all_clock_intervals_ms.sum()); 3892 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3893 _clock_due_to_scanning, _clock_due_to_marking); 3894 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3895 _objs_scanned, _objs_found_on_bitmap); 3896 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3897 _local_pushes, _local_pops, _local_max_size); 3898 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3899 _global_pushes, _global_pops, _global_max_size); 3900 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3901 _global_transfers_to,_global_transfers_from); 3902 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 3903 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3904 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3905 _steal_attempts, _steals); 3906 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3907 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3908 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3909 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3910 _aborted_timed_out, _aborted_satb, _aborted_termination); 3911 #endif // _MARKING_STATS_ 3912 } 3913 3914 /***************************************************************************** 3915 3916 The do_marking_step(time_target_ms, ...) method is the building 3917 block of the parallel marking framework. It can be called in parallel 3918 with other invocations of do_marking_step() on different tasks 3919 (but only one per task, obviously) and concurrently with the 3920 mutator threads, or during remark, hence it eliminates the need 3921 for two versions of the code. When called during remark, it will 3922 pick up from where the task left off during the concurrent marking 3923 phase. Interestingly, tasks are also claimable during evacuation 3924 pauses too, since do_marking_step() ensures that it aborts before 3925 it needs to yield. 3926 3927 The data structures that it uses to do marking work are the 3928 following: 3929 3930 (1) Marking Bitmap. If there are gray objects that appear only 3931 on the bitmap (this happens either when dealing with an overflow 3932 or when the initial marking phase has simply marked the roots 3933 and didn't push them on the stack), then tasks claim heap 3934 regions whose bitmap they then scan to find gray objects. A 3935 global finger indicates where the end of the last claimed region 3936 is. A local finger indicates how far into the region a task has 3937 scanned. The two fingers are used to determine how to gray an 3938 object (i.e. whether simply marking it is OK, as it will be 3939 visited by a task in the future, or whether it needs to be also 3940 pushed on a stack). 3941 3942 (2) Local Queue. The local queue of the task which is accessed 3943 reasonably efficiently by the task. Other tasks can steal from 3944 it when they run out of work. Throughout the marking phase, a 3945 task attempts to keep its local queue short but not totally 3946 empty, so that entries are available for stealing by other 3947 tasks. Only when there is no more work, a task will totally 3948 drain its local queue. 3949 3950 (3) Global Mark Stack. This handles local queue overflow. During 3951 marking only sets of entries are moved between it and the local 3952 queues, as access to it requires a mutex and more fine-grain 3953 interaction with it which might cause contention. If it 3954 overflows, then the marking phase should restart and iterate 3955 over the bitmap to identify gray objects. Throughout the marking 3956 phase, tasks attempt to keep the global mark stack at a small 3957 length but not totally empty, so that entries are available for 3958 popping by other tasks. Only when there is no more work, tasks 3959 will totally drain the global mark stack. 3960 3961 (4) SATB Buffer Queue. This is where completed SATB buffers are 3962 made available. Buffers are regularly removed from this queue 3963 and scanned for roots, so that the queue doesn't get too 3964 long. During remark, all completed buffers are processed, as 3965 well as the filled in parts of any uncompleted buffers. 3966 3967 The do_marking_step() method tries to abort when the time target 3968 has been reached. There are a few other cases when the 3969 do_marking_step() method also aborts: 3970 3971 (1) When the marking phase has been aborted (after a Full GC). 3972 3973 (2) When a global overflow (on the global stack) has been 3974 triggered. Before the task aborts, it will actually sync up with 3975 the other tasks to ensure that all the marking data structures 3976 (local queues, stacks, fingers etc.) are re-initialised so that 3977 when do_marking_step() completes, the marking phase can 3978 immediately restart. 3979 3980 (3) When enough completed SATB buffers are available. The 3981 do_marking_step() method only tries to drain SATB buffers right 3982 at the beginning. So, if enough buffers are available, the 3983 marking step aborts and the SATB buffers are processed at 3984 the beginning of the next invocation. 3985 3986 (4) To yield. when we have to yield then we abort and yield 3987 right at the end of do_marking_step(). This saves us from a lot 3988 of hassle as, by yielding we might allow a Full GC. If this 3989 happens then objects will be compacted underneath our feet, the 3990 heap might shrink, etc. We save checking for this by just 3991 aborting and doing the yield right at the end. 3992 3993 From the above it follows that the do_marking_step() method should 3994 be called in a loop (or, otherwise, regularly) until it completes. 3995 3996 If a marking step completes without its has_aborted() flag being 3997 true, it means it has completed the current marking phase (and 3998 also all other marking tasks have done so and have all synced up). 3999 4000 A method called regular_clock_call() is invoked "regularly" (in 4001 sub ms intervals) throughout marking. It is this clock method that 4002 checks all the abort conditions which were mentioned above and 4003 decides when the task should abort. A work-based scheme is used to 4004 trigger this clock method: when the number of object words the 4005 marking phase has scanned or the number of references the marking 4006 phase has visited reach a given limit. Additional invocations to 4007 the method clock have been planted in a few other strategic places 4008 too. The initial reason for the clock method was to avoid calling 4009 vtime too regularly, as it is quite expensive. So, once it was in 4010 place, it was natural to piggy-back all the other conditions on it 4011 too and not constantly check them throughout the code. 4012 4013 If do_stealing is true then do_marking_step will attempt to steal 4014 work from the other CMTasks. It only makes sense to enable 4015 stealing when being called by multiple threads. 4016 4017 If do_termination is true then do_marking_step will enter its 4018 termination protocol. 4019 4020 The value of is_serial should be true when do_marking_step is 4021 being called by the serial reference processing closures. 4022 In this case the calling thread is the VM thread and do_marking_step 4023 should skip any synchronization in the termination and overflow 4024 code. 4025 4026 *****************************************************************************/ 4027 4028 void CMTask::do_marking_step(double time_target_ms, 4029 bool do_stealing, 4030 bool do_termination, 4031 bool is_serial) { 4032 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 4033 assert(concurrent() == _cm->concurrent(), "they should be the same"); 4034 4035 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4036 assert(_task_queues != NULL, "invariant"); 4037 assert(_task_queue != NULL, "invariant"); 4038 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 4039 4040 assert(!_claimed, 4041 "only one thread should claim this task at any one time"); 4042 4043 // OK, this doesn't safeguard again all possible scenarios, as it is 4044 // possible for two threads to set the _claimed flag at the same 4045 // time. But it is only for debugging purposes anyway and it will 4046 // catch most problems. 4047 _claimed = true; 4048 4049 _start_time_ms = os::elapsedVTime() * 1000.0; 4050 statsOnly( _interval_start_time_ms = _start_time_ms ); 4051 4052 double diff_prediction_ms = 4053 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4054 _time_target_ms = time_target_ms - diff_prediction_ms; 4055 4056 // set up the variables that are used in the work-based scheme to 4057 // call the regular clock method 4058 _words_scanned = 0; 4059 _refs_reached = 0; 4060 recalculate_limits(); 4061 4062 // clear all flags 4063 clear_has_aborted(); 4064 _has_timed_out = false; 4065 _draining_satb_buffers = false; 4066 4067 ++_calls; 4068 4069 if (_cm->verbose_low()) { 4070 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, " 4071 "target = %1.2lfms >>>>>>>>>>", 4072 _worker_id, _calls, _time_target_ms); 4073 } 4074 4075 // Set up the bitmap and oop closures. Anything that uses them is 4076 // eventually called from this method, so it is OK to allocate these 4077 // statically. 4078 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4079 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 4080 set_cm_oop_closure(&cm_oop_closure); 4081 4082 if (_cm->has_overflown()) { 4083 // This can happen if the mark stack overflows during a GC pause 4084 // and this task, after a yield point, restarts. We have to abort 4085 // as we need to get into the overflow protocol which happens 4086 // right at the end of this task. 4087 set_has_aborted(); 4088 } 4089 4090 // First drain any available SATB buffers. After this, we will not 4091 // look at SATB buffers before the next invocation of this method. 4092 // If enough completed SATB buffers are queued up, the regular clock 4093 // will abort this task so that it restarts. 4094 drain_satb_buffers(); 4095 // ...then partially drain the local queue and the global stack 4096 drain_local_queue(true); 4097 drain_global_stack(true); 4098 4099 do { 4100 if (!has_aborted() && _curr_region != NULL) { 4101 // This means that we're already holding on to a region. 4102 assert(_finger != NULL, "if region is not NULL, then the finger " 4103 "should not be NULL either"); 4104 4105 // We might have restarted this task after an evacuation pause 4106 // which might have evacuated the region we're holding on to 4107 // underneath our feet. Let's read its limit again to make sure 4108 // that we do not iterate over a region of the heap that 4109 // contains garbage (update_region_limit() will also move 4110 // _finger to the start of the region if it is found empty). 4111 update_region_limit(); 4112 // We will start from _finger not from the start of the region, 4113 // as we might be restarting this task after aborting half-way 4114 // through scanning this region. In this case, _finger points to 4115 // the address where we last found a marked object. If this is a 4116 // fresh region, _finger points to start(). 4117 MemRegion mr = MemRegion(_finger, _region_limit); 4118 4119 if (_cm->verbose_low()) { 4120 gclog_or_tty->print_cr("[%u] we're scanning part " 4121 "["PTR_FORMAT", "PTR_FORMAT") " 4122 "of region "HR_FORMAT, 4123 _worker_id, _finger, _region_limit, 4124 HR_FORMAT_PARAMS(_curr_region)); 4125 } 4126 4127 assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(), 4128 "humongous regions should go around loop once only"); 4129 4130 // Some special cases: 4131 // If the memory region is empty, we can just give up the region. 4132 // If the current region is humongous then we only need to check 4133 // the bitmap for the bit associated with the start of the object, 4134 // scan the object if it's live, and give up the region. 4135 // Otherwise, let's iterate over the bitmap of the part of the region 4136 // that is left. 4137 // If the iteration is successful, give up the region. 4138 if (mr.is_empty()) { 4139 giveup_current_region(); 4140 regular_clock_call(); 4141 } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) { 4142 if (_nextMarkBitMap->isMarked(mr.start())) { 4143 // The object is marked - apply the closure 4144 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 4145 bitmap_closure.do_bit(offset); 4146 } 4147 // Even if this task aborted while scanning the humongous object 4148 // we can (and should) give up the current region. 4149 giveup_current_region(); 4150 regular_clock_call(); 4151 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4152 giveup_current_region(); 4153 regular_clock_call(); 4154 } else { 4155 assert(has_aborted(), "currently the only way to do so"); 4156 // The only way to abort the bitmap iteration is to return 4157 // false from the do_bit() method. However, inside the 4158 // do_bit() method we move the _finger to point to the 4159 // object currently being looked at. So, if we bail out, we 4160 // have definitely set _finger to something non-null. 4161 assert(_finger != NULL, "invariant"); 4162 4163 // Region iteration was actually aborted. So now _finger 4164 // points to the address of the object we last scanned. If we 4165 // leave it there, when we restart this task, we will rescan 4166 // the object. It is easy to avoid this. We move the finger by 4167 // enough to point to the next possible object header (the 4168 // bitmap knows by how much we need to move it as it knows its 4169 // granularity). 4170 assert(_finger < _region_limit, "invariant"); 4171 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 4172 // Check if bitmap iteration was aborted while scanning the last object 4173 if (new_finger >= _region_limit) { 4174 giveup_current_region(); 4175 } else { 4176 move_finger_to(new_finger); 4177 } 4178 } 4179 } 4180 // At this point we have either completed iterating over the 4181 // region we were holding on to, or we have aborted. 4182 4183 // We then partially drain the local queue and the global stack. 4184 // (Do we really need this?) 4185 drain_local_queue(true); 4186 drain_global_stack(true); 4187 4188 // Read the note on the claim_region() method on why it might 4189 // return NULL with potentially more regions available for 4190 // claiming and why we have to check out_of_regions() to determine 4191 // whether we're done or not. 4192 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4193 // We are going to try to claim a new region. We should have 4194 // given up on the previous one. 4195 // Separated the asserts so that we know which one fires. 4196 assert(_curr_region == NULL, "invariant"); 4197 assert(_finger == NULL, "invariant"); 4198 assert(_region_limit == NULL, "invariant"); 4199 if (_cm->verbose_low()) { 4200 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id); 4201 } 4202 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 4203 if (claimed_region != NULL) { 4204 // Yes, we managed to claim one 4205 statsOnly( ++_regions_claimed ); 4206 4207 if (_cm->verbose_low()) { 4208 gclog_or_tty->print_cr("[%u] we successfully claimed " 4209 "region "PTR_FORMAT, 4210 _worker_id, claimed_region); 4211 } 4212 4213 setup_for_region(claimed_region); 4214 assert(_curr_region == claimed_region, "invariant"); 4215 } 4216 // It is important to call the regular clock here. It might take 4217 // a while to claim a region if, for example, we hit a large 4218 // block of empty regions. So we need to call the regular clock 4219 // method once round the loop to make sure it's called 4220 // frequently enough. 4221 regular_clock_call(); 4222 } 4223 4224 if (!has_aborted() && _curr_region == NULL) { 4225 assert(_cm->out_of_regions(), 4226 "at this point we should be out of regions"); 4227 } 4228 } while ( _curr_region != NULL && !has_aborted()); 4229 4230 if (!has_aborted()) { 4231 // We cannot check whether the global stack is empty, since other 4232 // tasks might be pushing objects to it concurrently. 4233 assert(_cm->out_of_regions(), 4234 "at this point we should be out of regions"); 4235 4236 if (_cm->verbose_low()) { 4237 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id); 4238 } 4239 4240 // Try to reduce the number of available SATB buffers so that 4241 // remark has less work to do. 4242 drain_satb_buffers(); 4243 } 4244 4245 // Since we've done everything else, we can now totally drain the 4246 // local queue and global stack. 4247 drain_local_queue(false); 4248 drain_global_stack(false); 4249 4250 // Attempt at work stealing from other task's queues. 4251 if (do_stealing && !has_aborted()) { 4252 // We have not aborted. This means that we have finished all that 4253 // we could. Let's try to do some stealing... 4254 4255 // We cannot check whether the global stack is empty, since other 4256 // tasks might be pushing objects to it concurrently. 4257 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4258 "only way to reach here"); 4259 4260 if (_cm->verbose_low()) { 4261 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id); 4262 } 4263 4264 while (!has_aborted()) { 4265 oop obj; 4266 statsOnly( ++_steal_attempts ); 4267 4268 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 4269 if (_cm->verbose_medium()) { 4270 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully", 4271 _worker_id, (void*) obj); 4272 } 4273 4274 statsOnly( ++_steals ); 4275 4276 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4277 "any stolen object should be marked"); 4278 scan_object(obj); 4279 4280 // And since we're towards the end, let's totally drain the 4281 // local queue and global stack. 4282 drain_local_queue(false); 4283 drain_global_stack(false); 4284 } else { 4285 break; 4286 } 4287 } 4288 } 4289 4290 // If we are about to wrap up and go into termination, check if we 4291 // should raise the overflow flag. 4292 if (do_termination && !has_aborted()) { 4293 if (_cm->force_overflow()->should_force()) { 4294 _cm->set_has_overflown(); 4295 regular_clock_call(); 4296 } 4297 } 4298 4299 // We still haven't aborted. Now, let's try to get into the 4300 // termination protocol. 4301 if (do_termination && !has_aborted()) { 4302 // We cannot check whether the global stack is empty, since other 4303 // tasks might be concurrently pushing objects on it. 4304 // Separated the asserts so that we know which one fires. 4305 assert(_cm->out_of_regions(), "only way to reach here"); 4306 assert(_task_queue->size() == 0, "only way to reach here"); 4307 4308 if (_cm->verbose_low()) { 4309 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id); 4310 } 4311 4312 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4313 4314 // The CMTask class also extends the TerminatorTerminator class, 4315 // hence its should_exit_termination() method will also decide 4316 // whether to exit the termination protocol or not. 4317 bool finished = (is_serial ? true 4318 :_cm->terminator()->offer_termination(this)); 4319 4320 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4321 _termination_time_ms += 4322 termination_end_time_ms - _termination_start_time_ms; 4323 4324 if (finished) { 4325 // We're all done. 4326 4327 if (_worker_id == 0) { 4328 // let's allow task 0 to do this 4329 if (concurrent()) { 4330 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4331 // we need to set this to false before the next 4332 // safepoint. This way we ensure that the marking phase 4333 // doesn't observe any more heap expansions. 4334 _cm->clear_concurrent_marking_in_progress(); 4335 } 4336 } 4337 4338 // We can now guarantee that the global stack is empty, since 4339 // all other tasks have finished. We separated the guarantees so 4340 // that, if a condition is false, we can immediately find out 4341 // which one. 4342 guarantee(_cm->out_of_regions(), "only way to reach here"); 4343 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4344 guarantee(_task_queue->size() == 0, "only way to reach here"); 4345 guarantee(!_cm->has_overflown(), "only way to reach here"); 4346 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4347 4348 if (_cm->verbose_low()) { 4349 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id); 4350 } 4351 } else { 4352 // Apparently there's more work to do. Let's abort this task. It 4353 // will restart it and we can hopefully find more things to do. 4354 4355 if (_cm->verbose_low()) { 4356 gclog_or_tty->print_cr("[%u] apparently there is more work to do", 4357 _worker_id); 4358 } 4359 4360 set_has_aborted(); 4361 statsOnly( ++_aborted_termination ); 4362 } 4363 } 4364 4365 // Mainly for debugging purposes to make sure that a pointer to the 4366 // closure which was statically allocated in this frame doesn't 4367 // escape it by accident. 4368 set_cm_oop_closure(NULL); 4369 double end_time_ms = os::elapsedVTime() * 1000.0; 4370 double elapsed_time_ms = end_time_ms - _start_time_ms; 4371 // Update the step history. 4372 _step_times_ms.add(elapsed_time_ms); 4373 4374 if (has_aborted()) { 4375 // The task was aborted for some reason. 4376 4377 statsOnly( ++_aborted ); 4378 4379 if (_has_timed_out) { 4380 double diff_ms = elapsed_time_ms - _time_target_ms; 4381 // Keep statistics of how well we did with respect to hitting 4382 // our target only if we actually timed out (if we aborted for 4383 // other reasons, then the results might get skewed). 4384 _marking_step_diffs_ms.add(diff_ms); 4385 } 4386 4387 if (_cm->has_overflown()) { 4388 // This is the interesting one. We aborted because a global 4389 // overflow was raised. This means we have to restart the 4390 // marking phase and start iterating over regions. However, in 4391 // order to do this we have to make sure that all tasks stop 4392 // what they are doing and re-initialise in a safe manner. We 4393 // will achieve this with the use of two barrier sync points. 4394 4395 if (_cm->verbose_low()) { 4396 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id); 4397 } 4398 4399 if (!is_serial) { 4400 // We only need to enter the sync barrier if being called 4401 // from a parallel context 4402 _cm->enter_first_sync_barrier(_worker_id); 4403 4404 // When we exit this sync barrier we know that all tasks have 4405 // stopped doing marking work. So, it's now safe to 4406 // re-initialise our data structures. At the end of this method, 4407 // task 0 will clear the global data structures. 4408 } 4409 4410 statsOnly( ++_aborted_overflow ); 4411 4412 // We clear the local state of this task... 4413 clear_region_fields(); 4414 4415 if (!is_serial) { 4416 // ...and enter the second barrier. 4417 _cm->enter_second_sync_barrier(_worker_id); 4418 } 4419 // At this point, if we're during the concurrent phase of 4420 // marking, everything has been re-initialised and we're 4421 // ready to restart. 4422 } 4423 4424 if (_cm->verbose_low()) { 4425 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4426 "elapsed = %1.2lfms <<<<<<<<<<", 4427 _worker_id, _time_target_ms, elapsed_time_ms); 4428 if (_cm->has_aborted()) { 4429 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========", 4430 _worker_id); 4431 } 4432 } 4433 } else { 4434 if (_cm->verbose_low()) { 4435 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4436 "elapsed = %1.2lfms <<<<<<<<<<", 4437 _worker_id, _time_target_ms, elapsed_time_ms); 4438 } 4439 } 4440 4441 _claimed = false; 4442 } 4443 4444 CMTask::CMTask(uint worker_id, 4445 ConcurrentMark* cm, 4446 size_t* marked_bytes, 4447 BitMap* card_bm, 4448 CMTaskQueue* task_queue, 4449 CMTaskQueueSet* task_queues) 4450 : _g1h(G1CollectedHeap::heap()), 4451 _worker_id(worker_id), _cm(cm), 4452 _claimed(false), 4453 _nextMarkBitMap(NULL), _hash_seed(17), 4454 _task_queue(task_queue), 4455 _task_queues(task_queues), 4456 _cm_oop_closure(NULL), 4457 _marked_bytes_array(marked_bytes), 4458 _card_bm(card_bm) { 4459 guarantee(task_queue != NULL, "invariant"); 4460 guarantee(task_queues != NULL, "invariant"); 4461 4462 statsOnly( _clock_due_to_scanning = 0; 4463 _clock_due_to_marking = 0 ); 4464 4465 _marking_step_diffs_ms.add(0.5); 4466 } 4467 4468 // These are formatting macros that are used below to ensure 4469 // consistent formatting. The *_H_* versions are used to format the 4470 // header for a particular value and they should be kept consistent 4471 // with the corresponding macro. Also note that most of the macros add 4472 // the necessary white space (as a prefix) which makes them a bit 4473 // easier to compose. 4474 4475 // All the output lines are prefixed with this string to be able to 4476 // identify them easily in a large log file. 4477 #define G1PPRL_LINE_PREFIX "###" 4478 4479 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4480 #ifdef _LP64 4481 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4482 #else // _LP64 4483 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4484 #endif // _LP64 4485 4486 // For per-region info 4487 #define G1PPRL_TYPE_FORMAT " %-4s" 4488 #define G1PPRL_TYPE_H_FORMAT " %4s" 4489 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4490 #define G1PPRL_BYTE_H_FORMAT " %9s" 4491 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4492 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4493 4494 // For summary info 4495 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4496 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4497 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4498 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4499 4500 G1PrintRegionLivenessInfoClosure:: 4501 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4502 : _out(out), 4503 _total_used_bytes(0), _total_capacity_bytes(0), 4504 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4505 _hum_used_bytes(0), _hum_capacity_bytes(0), 4506 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) { 4507 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4508 MemRegion g1_committed = g1h->g1_committed(); 4509 MemRegion g1_reserved = g1h->g1_reserved(); 4510 double now = os::elapsedTime(); 4511 4512 // Print the header of the output. 4513 _out->cr(); 4514 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4515 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4516 G1PPRL_SUM_ADDR_FORMAT("committed") 4517 G1PPRL_SUM_ADDR_FORMAT("reserved") 4518 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4519 g1_committed.start(), g1_committed.end(), 4520 g1_reserved.start(), g1_reserved.end(), 4521 HeapRegion::GrainBytes); 4522 _out->print_cr(G1PPRL_LINE_PREFIX); 4523 _out->print_cr(G1PPRL_LINE_PREFIX 4524 G1PPRL_TYPE_H_FORMAT 4525 G1PPRL_ADDR_BASE_H_FORMAT 4526 G1PPRL_BYTE_H_FORMAT 4527 G1PPRL_BYTE_H_FORMAT 4528 G1PPRL_BYTE_H_FORMAT 4529 G1PPRL_DOUBLE_H_FORMAT, 4530 "type", "address-range", 4531 "used", "prev-live", "next-live", "gc-eff"); 4532 _out->print_cr(G1PPRL_LINE_PREFIX 4533 G1PPRL_TYPE_H_FORMAT 4534 G1PPRL_ADDR_BASE_H_FORMAT 4535 G1PPRL_BYTE_H_FORMAT 4536 G1PPRL_BYTE_H_FORMAT 4537 G1PPRL_BYTE_H_FORMAT 4538 G1PPRL_DOUBLE_H_FORMAT, 4539 "", "", 4540 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)"); 4541 } 4542 4543 // It takes as a parameter a reference to one of the _hum_* fields, it 4544 // deduces the corresponding value for a region in a humongous region 4545 // series (either the region size, or what's left if the _hum_* field 4546 // is < the region size), and updates the _hum_* field accordingly. 4547 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4548 size_t bytes = 0; 4549 // The > 0 check is to deal with the prev and next live bytes which 4550 // could be 0. 4551 if (*hum_bytes > 0) { 4552 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4553 *hum_bytes -= bytes; 4554 } 4555 return bytes; 4556 } 4557 4558 // It deduces the values for a region in a humongous region series 4559 // from the _hum_* fields and updates those accordingly. It assumes 4560 // that that _hum_* fields have already been set up from the "starts 4561 // humongous" region and we visit the regions in address order. 4562 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4563 size_t* capacity_bytes, 4564 size_t* prev_live_bytes, 4565 size_t* next_live_bytes) { 4566 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4567 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4568 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4569 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4570 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4571 } 4572 4573 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4574 const char* type = ""; 4575 HeapWord* bottom = r->bottom(); 4576 HeapWord* end = r->end(); 4577 size_t capacity_bytes = r->capacity(); 4578 size_t used_bytes = r->used(); 4579 size_t prev_live_bytes = r->live_bytes(); 4580 size_t next_live_bytes = r->next_live_bytes(); 4581 double gc_eff = r->gc_efficiency(); 4582 if (r->used() == 0) { 4583 type = "FREE"; 4584 } else if (r->is_survivor()) { 4585 type = "SURV"; 4586 } else if (r->is_young()) { 4587 type = "EDEN"; 4588 } else if (r->startsHumongous()) { 4589 type = "HUMS"; 4590 4591 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4592 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4593 "they should have been zeroed after the last time we used them"); 4594 // Set up the _hum_* fields. 4595 _hum_capacity_bytes = capacity_bytes; 4596 _hum_used_bytes = used_bytes; 4597 _hum_prev_live_bytes = prev_live_bytes; 4598 _hum_next_live_bytes = next_live_bytes; 4599 get_hum_bytes(&used_bytes, &capacity_bytes, 4600 &prev_live_bytes, &next_live_bytes); 4601 end = bottom + HeapRegion::GrainWords; 4602 } else if (r->continuesHumongous()) { 4603 type = "HUMC"; 4604 get_hum_bytes(&used_bytes, &capacity_bytes, 4605 &prev_live_bytes, &next_live_bytes); 4606 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4607 } else { 4608 type = "OLD"; 4609 } 4610 4611 _total_used_bytes += used_bytes; 4612 _total_capacity_bytes += capacity_bytes; 4613 _total_prev_live_bytes += prev_live_bytes; 4614 _total_next_live_bytes += next_live_bytes; 4615 4616 // Print a line for this particular region. 4617 _out->print_cr(G1PPRL_LINE_PREFIX 4618 G1PPRL_TYPE_FORMAT 4619 G1PPRL_ADDR_BASE_FORMAT 4620 G1PPRL_BYTE_FORMAT 4621 G1PPRL_BYTE_FORMAT 4622 G1PPRL_BYTE_FORMAT 4623 G1PPRL_DOUBLE_FORMAT, 4624 type, bottom, end, 4625 used_bytes, prev_live_bytes, next_live_bytes, gc_eff); 4626 4627 return false; 4628 } 4629 4630 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4631 // Print the footer of the output. 4632 _out->print_cr(G1PPRL_LINE_PREFIX); 4633 _out->print_cr(G1PPRL_LINE_PREFIX 4634 " SUMMARY" 4635 G1PPRL_SUM_MB_FORMAT("capacity") 4636 G1PPRL_SUM_MB_PERC_FORMAT("used") 4637 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4638 G1PPRL_SUM_MB_PERC_FORMAT("next-live"), 4639 bytes_to_mb(_total_capacity_bytes), 4640 bytes_to_mb(_total_used_bytes), 4641 perc(_total_used_bytes, _total_capacity_bytes), 4642 bytes_to_mb(_total_prev_live_bytes), 4643 perc(_total_prev_live_bytes, _total_capacity_bytes), 4644 bytes_to_mb(_total_next_live_bytes), 4645 perc(_total_next_live_bytes, _total_capacity_bytes)); 4646 _out->cr(); 4647 }