1 /* 2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 32 #include "gc_implementation/g1/g1Log.hpp" 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 34 #include "gc_implementation/g1/g1RemSet.hpp" 35 #include "gc_implementation/g1/heapRegion.inline.hpp" 36 #include "gc_implementation/g1/heapRegionRemSet.hpp" 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 38 #include "gc_implementation/shared/vmGCOperations.hpp" 39 #include "memory/genOopClosures.inline.hpp" 40 #include "memory/referencePolicy.hpp" 41 #include "memory/resourceArea.hpp" 42 #include "oops/oop.inline.hpp" 43 #include "runtime/handles.inline.hpp" 44 #include "runtime/java.hpp" 45 #include "services/memTracker.hpp" 46 47 // Concurrent marking bit map wrapper 48 49 CMBitMapRO::CMBitMapRO(int shifter) : 50 _bm(), 51 _shifter(shifter) { 52 _bmStartWord = 0; 53 _bmWordSize = 0; 54 } 55 56 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 57 HeapWord* limit) const { 58 // First we must round addr *up* to a possible object boundary. 59 addr = (HeapWord*)align_size_up((intptr_t)addr, 60 HeapWordSize << _shifter); 61 size_t addrOffset = heapWordToOffset(addr); 62 if (limit == NULL) { 63 limit = _bmStartWord + _bmWordSize; 64 } 65 size_t limitOffset = heapWordToOffset(limit); 66 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 67 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 68 assert(nextAddr >= addr, "get_next_one postcondition"); 69 assert(nextAddr == limit || isMarked(nextAddr), 70 "get_next_one postcondition"); 71 return nextAddr; 72 } 73 74 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 75 HeapWord* limit) const { 76 size_t addrOffset = heapWordToOffset(addr); 77 if (limit == NULL) { 78 limit = _bmStartWord + _bmWordSize; 79 } 80 size_t limitOffset = heapWordToOffset(limit); 81 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 82 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 83 assert(nextAddr >= addr, "get_next_one postcondition"); 84 assert(nextAddr == limit || !isMarked(nextAddr), 85 "get_next_one postcondition"); 86 return nextAddr; 87 } 88 89 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 90 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 91 return (int) (diff >> _shifter); 92 } 93 94 #ifndef PRODUCT 95 bool CMBitMapRO::covers(ReservedSpace heap_rs) const { 96 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 97 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 98 "size inconsistency"); 99 return _bmStartWord == (HeapWord*)(heap_rs.base()) && 100 _bmWordSize == heap_rs.size()>>LogHeapWordSize; 101 } 102 #endif 103 104 bool CMBitMap::allocate(ReservedSpace heap_rs) { 105 _bmStartWord = (HeapWord*)(heap_rs.base()); 106 _bmWordSize = heap_rs.size()/HeapWordSize; // heap_rs.size() is in bytes 107 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 108 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 109 if (!brs.is_reserved()) { 110 warning("ConcurrentMark marking bit map allocation failure"); 111 return false; 112 } 113 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC); 114 // For now we'll just commit all of the bit map up front. 115 // Later on we'll try to be more parsimonious with swap. 116 if (!_virtual_space.initialize(brs, brs.size())) { 117 warning("ConcurrentMark marking bit map backing store failure"); 118 return false; 119 } 120 assert(_virtual_space.committed_size() == brs.size(), 121 "didn't reserve backing store for all of concurrent marking bit map?"); 122 _bm.set_map((uintptr_t*)_virtual_space.low()); 123 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 124 _bmWordSize, "inconsistency in bit map sizing"); 125 _bm.set_size(_bmWordSize >> _shifter); 126 return true; 127 } 128 129 void CMBitMap::clearAll() { 130 _bm.clear(); 131 return; 132 } 133 134 void CMBitMap::markRange(MemRegion mr) { 135 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 136 assert(!mr.is_empty(), "unexpected empty region"); 137 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 138 ((HeapWord *) mr.end())), 139 "markRange memory region end is not card aligned"); 140 // convert address range into offset range 141 _bm.at_put_range(heapWordToOffset(mr.start()), 142 heapWordToOffset(mr.end()), true); 143 } 144 145 void CMBitMap::clearRange(MemRegion mr) { 146 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 147 assert(!mr.is_empty(), "unexpected empty region"); 148 // convert address range into offset range 149 _bm.at_put_range(heapWordToOffset(mr.start()), 150 heapWordToOffset(mr.end()), false); 151 } 152 153 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 154 HeapWord* end_addr) { 155 HeapWord* start = getNextMarkedWordAddress(addr); 156 start = MIN2(start, end_addr); 157 HeapWord* end = getNextUnmarkedWordAddress(start); 158 end = MIN2(end, end_addr); 159 assert(start <= end, "Consistency check"); 160 MemRegion mr(start, end); 161 if (!mr.is_empty()) { 162 clearRange(mr); 163 } 164 return mr; 165 } 166 167 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 168 _base(NULL), _cm(cm) 169 #ifdef ASSERT 170 , _drain_in_progress(false) 171 , _drain_in_progress_yields(false) 172 #endif 173 {} 174 175 bool CMMarkStack::allocate(size_t capacity) { 176 // allocate a stack of the requisite depth 177 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 178 if (!rs.is_reserved()) { 179 warning("ConcurrentMark MarkStack allocation failure"); 180 return false; 181 } 182 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 183 if (!_virtual_space.initialize(rs, rs.size())) { 184 warning("ConcurrentMark MarkStack backing store failure"); 185 // Release the virtual memory reserved for the marking stack 186 rs.release(); 187 return false; 188 } 189 assert(_virtual_space.committed_size() == rs.size(), 190 "Didn't reserve backing store for all of ConcurrentMark stack?"); 191 _base = (oop*) _virtual_space.low(); 192 setEmpty(); 193 _capacity = (jint) capacity; 194 _saved_index = -1; 195 _should_expand = false; 196 NOT_PRODUCT(_max_depth = 0); 197 return true; 198 } 199 200 void CMMarkStack::expand() { 201 // Called, during remark, if we've overflown the marking stack during marking. 202 assert(isEmpty(), "stack should been emptied while handling overflow"); 203 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 204 // Clear expansion flag 205 _should_expand = false; 206 if (_capacity == (jint) MarkStackSizeMax) { 207 if (PrintGCDetails && Verbose) { 208 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 209 } 210 return; 211 } 212 // Double capacity if possible 213 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 214 // Do not give up existing stack until we have managed to 215 // get the double capacity that we desired. 216 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 217 sizeof(oop))); 218 if (rs.is_reserved()) { 219 // Release the backing store associated with old stack 220 _virtual_space.release(); 221 // Reinitialize virtual space for new stack 222 if (!_virtual_space.initialize(rs, rs.size())) { 223 fatal("Not enough swap for expanded marking stack capacity"); 224 } 225 _base = (oop*)(_virtual_space.low()); 226 _index = 0; 227 _capacity = new_capacity; 228 } else { 229 if (PrintGCDetails && Verbose) { 230 // Failed to double capacity, continue; 231 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 232 SIZE_FORMAT"K to " SIZE_FORMAT"K", 233 _capacity / K, new_capacity / K); 234 } 235 } 236 } 237 238 void CMMarkStack::set_should_expand() { 239 // If we're resetting the marking state because of an 240 // marking stack overflow, record that we should, if 241 // possible, expand the stack. 242 _should_expand = _cm->has_overflown(); 243 } 244 245 CMMarkStack::~CMMarkStack() { 246 if (_base != NULL) { 247 _base = NULL; 248 _virtual_space.release(); 249 } 250 } 251 252 void CMMarkStack::par_push(oop ptr) { 253 while (true) { 254 if (isFull()) { 255 _overflow = true; 256 return; 257 } 258 // Otherwise... 259 jint index = _index; 260 jint next_index = index+1; 261 jint res = Atomic::cmpxchg(next_index, &_index, index); 262 if (res == index) { 263 _base[index] = ptr; 264 // Note that we don't maintain this atomically. We could, but it 265 // doesn't seem necessary. 266 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 267 return; 268 } 269 // Otherwise, we need to try again. 270 } 271 } 272 273 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 274 while (true) { 275 if (isFull()) { 276 _overflow = true; 277 return; 278 } 279 // Otherwise... 280 jint index = _index; 281 jint next_index = index + n; 282 if (next_index > _capacity) { 283 _overflow = true; 284 return; 285 } 286 jint res = Atomic::cmpxchg(next_index, &_index, index); 287 if (res == index) { 288 for (int i = 0; i < n; i++) { 289 int ind = index + i; 290 assert(ind < _capacity, "By overflow test above."); 291 _base[ind] = ptr_arr[i]; 292 } 293 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 294 return; 295 } 296 // Otherwise, we need to try again. 297 } 298 } 299 300 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 301 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 302 jint start = _index; 303 jint next_index = start + n; 304 if (next_index > _capacity) { 305 _overflow = true; 306 return; 307 } 308 // Otherwise. 309 _index = next_index; 310 for (int i = 0; i < n; i++) { 311 int ind = start + i; 312 assert(ind < _capacity, "By overflow test above."); 313 _base[ind] = ptr_arr[i]; 314 } 315 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 316 } 317 318 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 319 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 320 jint index = _index; 321 if (index == 0) { 322 *n = 0; 323 return false; 324 } else { 325 int k = MIN2(max, index); 326 jint new_ind = index - k; 327 for (int j = 0; j < k; j++) { 328 ptr_arr[j] = _base[new_ind + j]; 329 } 330 _index = new_ind; 331 *n = k; 332 return true; 333 } 334 } 335 336 template<class OopClosureClass> 337 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 338 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 339 || SafepointSynchronize::is_at_safepoint(), 340 "Drain recursion must be yield-safe."); 341 bool res = true; 342 debug_only(_drain_in_progress = true); 343 debug_only(_drain_in_progress_yields = yield_after); 344 while (!isEmpty()) { 345 oop newOop = pop(); 346 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 347 assert(newOop->is_oop(), "Expected an oop"); 348 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 349 "only grey objects on this stack"); 350 newOop->oop_iterate(cl); 351 if (yield_after && _cm->do_yield_check()) { 352 res = false; 353 break; 354 } 355 } 356 debug_only(_drain_in_progress = false); 357 return res; 358 } 359 360 void CMMarkStack::note_start_of_gc() { 361 assert(_saved_index == -1, 362 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 363 _saved_index = _index; 364 } 365 366 void CMMarkStack::note_end_of_gc() { 367 // This is intentionally a guarantee, instead of an assert. If we 368 // accidentally add something to the mark stack during GC, it 369 // will be a correctness issue so it's better if we crash. we'll 370 // only check this once per GC anyway, so it won't be a performance 371 // issue in any way. 372 guarantee(_saved_index == _index, 373 err_msg("saved index: %d index: %d", _saved_index, _index)); 374 _saved_index = -1; 375 } 376 377 void CMMarkStack::oops_do(OopClosure* f) { 378 assert(_saved_index == _index, 379 err_msg("saved index: %d index: %d", _saved_index, _index)); 380 for (int i = 0; i < _index; i += 1) { 381 f->do_oop(&_base[i]); 382 } 383 } 384 385 bool ConcurrentMark::not_yet_marked(oop obj) const { 386 return _g1h->is_obj_ill(obj); 387 } 388 389 CMRootRegions::CMRootRegions() : 390 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 391 _should_abort(false), _next_survivor(NULL) { } 392 393 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 394 _young_list = g1h->young_list(); 395 _cm = cm; 396 } 397 398 void CMRootRegions::prepare_for_scan() { 399 assert(!scan_in_progress(), "pre-condition"); 400 401 // Currently, only survivors can be root regions. 402 assert(_next_survivor == NULL, "pre-condition"); 403 _next_survivor = _young_list->first_survivor_region(); 404 _scan_in_progress = (_next_survivor != NULL); 405 _should_abort = false; 406 } 407 408 HeapRegion* CMRootRegions::claim_next() { 409 if (_should_abort) { 410 // If someone has set the should_abort flag, we return NULL to 411 // force the caller to bail out of their loop. 412 return NULL; 413 } 414 415 // Currently, only survivors can be root regions. 416 HeapRegion* res = _next_survivor; 417 if (res != NULL) { 418 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 419 // Read it again in case it changed while we were waiting for the lock. 420 res = _next_survivor; 421 if (res != NULL) { 422 if (res == _young_list->last_survivor_region()) { 423 // We just claimed the last survivor so store NULL to indicate 424 // that we're done. 425 _next_survivor = NULL; 426 } else { 427 _next_survivor = res->get_next_young_region(); 428 } 429 } else { 430 // Someone else claimed the last survivor while we were trying 431 // to take the lock so nothing else to do. 432 } 433 } 434 assert(res == NULL || res->is_survivor(), "post-condition"); 435 436 return res; 437 } 438 439 void CMRootRegions::scan_finished() { 440 assert(scan_in_progress(), "pre-condition"); 441 442 // Currently, only survivors can be root regions. 443 if (!_should_abort) { 444 assert(_next_survivor == NULL, "we should have claimed all survivors"); 445 } 446 _next_survivor = NULL; 447 448 { 449 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 450 _scan_in_progress = false; 451 RootRegionScan_lock->notify_all(); 452 } 453 } 454 455 bool CMRootRegions::wait_until_scan_finished() { 456 if (!scan_in_progress()) return false; 457 458 { 459 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 460 while (scan_in_progress()) { 461 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 462 } 463 } 464 return true; 465 } 466 467 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 468 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 469 #endif // _MSC_VER 470 471 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 472 return MAX2((n_par_threads + 2) / 4, 1U); 473 } 474 475 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) : 476 _g1h(g1h), 477 _markBitMap1(MinObjAlignment - 1), 478 _markBitMap2(MinObjAlignment - 1), 479 480 _parallel_marking_threads(0), 481 _max_parallel_marking_threads(0), 482 _sleep_factor(0.0), 483 _marking_task_overhead(1.0), 484 _cleanup_sleep_factor(0.0), 485 _cleanup_task_overhead(1.0), 486 _cleanup_list("Cleanup List"), 487 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 488 _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >> 489 CardTableModRefBS::card_shift, 490 false /* in_resource_area*/), 491 492 _prevMarkBitMap(&_markBitMap1), 493 _nextMarkBitMap(&_markBitMap2), 494 495 _markStack(this), 496 // _finger set in set_non_marking_state 497 498 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)), 499 // _active_tasks set in set_non_marking_state 500 // _tasks set inside the constructor 501 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 502 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 503 504 _has_overflown(false), 505 _concurrent(false), 506 _has_aborted(false), 507 _restart_for_overflow(false), 508 _concurrent_marking_in_progress(false), 509 510 // _verbose_level set below 511 512 _init_times(), 513 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 514 _cleanup_times(), 515 _total_counting_time(0.0), 516 _total_rs_scrub_time(0.0), 517 518 _parallel_workers(NULL), 519 520 _count_card_bitmaps(NULL), 521 _count_marked_bytes(NULL), 522 _completed_initialization(false) { 523 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 524 if (verbose_level < no_verbose) { 525 verbose_level = no_verbose; 526 } 527 if (verbose_level > high_verbose) { 528 verbose_level = high_verbose; 529 } 530 _verbose_level = verbose_level; 531 532 if (verbose_low()) { 533 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 534 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 535 } 536 537 if (!_markBitMap1.allocate(heap_rs)) { 538 warning("Failed to allocate first CM bit map"); 539 return; 540 } 541 if (!_markBitMap2.allocate(heap_rs)) { 542 warning("Failed to allocate second CM bit map"); 543 return; 544 } 545 546 // Create & start a ConcurrentMark thread. 547 _cmThread = new ConcurrentMarkThread(this); 548 assert(cmThread() != NULL, "CM Thread should have been created"); 549 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 550 551 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 552 assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency"); 553 assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency"); 554 555 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 556 satb_qs.set_buffer_size(G1SATBBufferSize); 557 558 _root_regions.init(_g1h, this); 559 560 if (ConcGCThreads > ParallelGCThreads) { 561 warning("Can't have more ConcGCThreads (" UINT32_FORMAT ") " 562 "than ParallelGCThreads (" UINT32_FORMAT ").", 563 ConcGCThreads, ParallelGCThreads); 564 return; 565 } 566 if (ParallelGCThreads == 0) { 567 // if we are not running with any parallel GC threads we will not 568 // spawn any marking threads either 569 _parallel_marking_threads = 0; 570 _max_parallel_marking_threads = 0; 571 _sleep_factor = 0.0; 572 _marking_task_overhead = 1.0; 573 } else { 574 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 575 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 576 // if both are set 577 _sleep_factor = 0.0; 578 _marking_task_overhead = 1.0; 579 } else if (G1MarkingOverheadPercent > 0) { 580 // We will calculate the number of parallel marking threads based 581 // on a target overhead with respect to the soft real-time goal 582 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 583 double overall_cm_overhead = 584 (double) MaxGCPauseMillis * marking_overhead / 585 (double) GCPauseIntervalMillis; 586 double cpu_ratio = 1.0 / (double) os::processor_count(); 587 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 588 double marking_task_overhead = 589 overall_cm_overhead / marking_thread_num * 590 (double) os::processor_count(); 591 double sleep_factor = 592 (1.0 - marking_task_overhead) / marking_task_overhead; 593 594 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num); 595 _sleep_factor = sleep_factor; 596 _marking_task_overhead = marking_task_overhead; 597 } else { 598 // Calculate the number of parallel marking threads by scaling 599 // the number of parallel GC threads. 600 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads); 601 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num); 602 _sleep_factor = 0.0; 603 _marking_task_overhead = 1.0; 604 } 605 606 assert(ConcGCThreads > 0, "Should have been set"); 607 _parallel_marking_threads = (uint) ConcGCThreads; 608 _max_parallel_marking_threads = _parallel_marking_threads; 609 610 if (parallel_marking_threads() > 1) { 611 _cleanup_task_overhead = 1.0; 612 } else { 613 _cleanup_task_overhead = marking_task_overhead(); 614 } 615 _cleanup_sleep_factor = 616 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 617 618 #if 0 619 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 620 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 621 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 622 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 623 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 624 #endif 625 626 guarantee(parallel_marking_threads() > 0, "peace of mind"); 627 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 628 _max_parallel_marking_threads, false, true); 629 if (_parallel_workers == NULL) { 630 vm_exit_during_initialization("Failed necessary allocation."); 631 } else { 632 _parallel_workers->initialize_workers(); 633 } 634 } 635 636 if (FLAG_IS_DEFAULT(MarkStackSize)) { 637 uintx mark_stack_size = 638 MIN2(MarkStackSizeMax, 639 MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE))); 640 // Verify that the calculated value for MarkStackSize is in range. 641 // It would be nice to use the private utility routine from Arguments. 642 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 643 warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): " 644 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 645 mark_stack_size, 1, MarkStackSizeMax); 646 return; 647 } 648 FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size); 649 } else { 650 // Verify MarkStackSize is in range. 651 if (FLAG_IS_CMDLINE(MarkStackSize)) { 652 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 653 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 654 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): " 655 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 656 MarkStackSize, 1, MarkStackSizeMax); 657 return; 658 } 659 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 660 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 661 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")" 662 " or for MarkStackSizeMax (" UINTX_FORMAT ")", 663 MarkStackSize, MarkStackSizeMax); 664 return; 665 } 666 } 667 } 668 } 669 670 if (!_markStack.allocate(MarkStackSize)) { 671 warning("Failed to allocate CM marking stack"); 672 return; 673 } 674 675 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 676 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 677 678 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 679 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 680 681 BitMap::idx_t card_bm_size = _card_bm.size(); 682 683 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 684 _active_tasks = _max_worker_id; 685 686 size_t max_regions = (size_t) _g1h->max_regions(); 687 for (uint i = 0; i < _max_worker_id; ++i) { 688 CMTaskQueue* task_queue = new CMTaskQueue(); 689 task_queue->initialize(); 690 _task_queues->register_queue(i, task_queue); 691 692 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 693 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 694 695 _tasks[i] = new CMTask(i, this, 696 _count_marked_bytes[i], 697 &_count_card_bitmaps[i], 698 task_queue, _task_queues); 699 700 _accum_task_vtime[i] = 0.0; 701 } 702 703 // Calculate the card number for the bottom of the heap. Used 704 // in biasing indexes into the accounting card bitmaps. 705 _heap_bottom_card_num = 706 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 707 CardTableModRefBS::card_shift); 708 709 // Clear all the liveness counting data 710 clear_all_count_data(); 711 712 // so that the call below can read a sensible value 713 _heap_start = (HeapWord*) heap_rs.base(); 714 set_non_marking_state(); 715 _completed_initialization = true; 716 } 717 718 void ConcurrentMark::update_g1_committed(bool force) { 719 // If concurrent marking is not in progress, then we do not need to 720 // update _heap_end. 721 if (!concurrent_marking_in_progress() && !force) return; 722 723 MemRegion committed = _g1h->g1_committed(); 724 assert(committed.start() == _heap_start, "start shouldn't change"); 725 HeapWord* new_end = committed.end(); 726 if (new_end > _heap_end) { 727 // The heap has been expanded. 728 729 _heap_end = new_end; 730 } 731 // Notice that the heap can also shrink. However, this only happens 732 // during a Full GC (at least currently) and the entire marking 733 // phase will bail out and the task will not be restarted. So, let's 734 // do nothing. 735 } 736 737 void ConcurrentMark::reset() { 738 // Starting values for these two. This should be called in a STW 739 // phase. CM will be notified of any future g1_committed expansions 740 // will be at the end of evacuation pauses, when tasks are 741 // inactive. 742 MemRegion committed = _g1h->g1_committed(); 743 _heap_start = committed.start(); 744 _heap_end = committed.end(); 745 746 // Separated the asserts so that we know which one fires. 747 assert(_heap_start != NULL, "heap bounds should look ok"); 748 assert(_heap_end != NULL, "heap bounds should look ok"); 749 assert(_heap_start < _heap_end, "heap bounds should look ok"); 750 751 // Reset all the marking data structures and any necessary flags 752 reset_marking_state(); 753 754 if (verbose_low()) { 755 gclog_or_tty->print_cr("[global] resetting"); 756 } 757 758 // We do reset all of them, since different phases will use 759 // different number of active threads. So, it's easiest to have all 760 // of them ready. 761 for (uint i = 0; i < _max_worker_id; ++i) { 762 _tasks[i]->reset(_nextMarkBitMap); 763 } 764 765 // we need this to make sure that the flag is on during the evac 766 // pause with initial mark piggy-backed 767 set_concurrent_marking_in_progress(); 768 } 769 770 771 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 772 _markStack.set_should_expand(); 773 _markStack.setEmpty(); // Also clears the _markStack overflow flag 774 if (clear_overflow) { 775 clear_has_overflown(); 776 } else { 777 assert(has_overflown(), "pre-condition"); 778 } 779 _finger = _heap_start; 780 781 for (uint i = 0; i < _max_worker_id; ++i) { 782 CMTaskQueue* queue = _task_queues->queue(i); 783 queue->set_empty(); 784 } 785 } 786 787 void ConcurrentMark::set_concurrency(uint active_tasks) { 788 assert(active_tasks <= _max_worker_id, "we should not have more"); 789 790 _active_tasks = active_tasks; 791 // Need to update the three data structures below according to the 792 // number of active threads for this phase. 793 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 794 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 795 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 796 } 797 798 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 799 set_concurrency(active_tasks); 800 801 _concurrent = concurrent; 802 // We propagate this to all tasks, not just the active ones. 803 for (uint i = 0; i < _max_worker_id; ++i) 804 _tasks[i]->set_concurrent(concurrent); 805 806 if (concurrent) { 807 set_concurrent_marking_in_progress(); 808 } else { 809 // We currently assume that the concurrent flag has been set to 810 // false before we start remark. At this point we should also be 811 // in a STW phase. 812 assert(!concurrent_marking_in_progress(), "invariant"); 813 assert(_finger == _heap_end, 814 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT, 815 _finger, _heap_end)); 816 update_g1_committed(true); 817 } 818 } 819 820 void ConcurrentMark::set_non_marking_state() { 821 // We set the global marking state to some default values when we're 822 // not doing marking. 823 reset_marking_state(); 824 _active_tasks = 0; 825 clear_concurrent_marking_in_progress(); 826 } 827 828 ConcurrentMark::~ConcurrentMark() { 829 // The ConcurrentMark instance is never freed. 830 ShouldNotReachHere(); 831 } 832 833 void ConcurrentMark::clearNextBitmap() { 834 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 835 G1CollectorPolicy* g1p = g1h->g1_policy(); 836 837 // Make sure that the concurrent mark thread looks to still be in 838 // the current cycle. 839 guarantee(cmThread()->during_cycle(), "invariant"); 840 841 // We are finishing up the current cycle by clearing the next 842 // marking bitmap and getting it ready for the next cycle. During 843 // this time no other cycle can start. So, let's make sure that this 844 // is the case. 845 guarantee(!g1h->mark_in_progress(), "invariant"); 846 847 // clear the mark bitmap (no grey objects to start with). 848 // We need to do this in chunks and offer to yield in between 849 // each chunk. 850 HeapWord* start = _nextMarkBitMap->startWord(); 851 HeapWord* end = _nextMarkBitMap->endWord(); 852 HeapWord* cur = start; 853 size_t chunkSize = M; 854 while (cur < end) { 855 HeapWord* next = cur + chunkSize; 856 if (next > end) { 857 next = end; 858 } 859 MemRegion mr(cur,next); 860 _nextMarkBitMap->clearRange(mr); 861 cur = next; 862 do_yield_check(); 863 864 // Repeat the asserts from above. We'll do them as asserts here to 865 // minimize their overhead on the product. However, we'll have 866 // them as guarantees at the beginning / end of the bitmap 867 // clearing to get some checking in the product. 868 assert(cmThread()->during_cycle(), "invariant"); 869 assert(!g1h->mark_in_progress(), "invariant"); 870 } 871 872 // Clear the liveness counting data 873 clear_all_count_data(); 874 875 // Repeat the asserts from above. 876 guarantee(cmThread()->during_cycle(), "invariant"); 877 guarantee(!g1h->mark_in_progress(), "invariant"); 878 } 879 880 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 881 public: 882 bool doHeapRegion(HeapRegion* r) { 883 if (!r->continuesHumongous()) { 884 r->note_start_of_marking(); 885 } 886 return false; 887 } 888 }; 889 890 void ConcurrentMark::checkpointRootsInitialPre() { 891 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 892 G1CollectorPolicy* g1p = g1h->g1_policy(); 893 894 _has_aborted = false; 895 896 #ifndef PRODUCT 897 if (G1PrintReachableAtInitialMark) { 898 print_reachable("at-cycle-start", 899 VerifyOption_G1UsePrevMarking, true /* all */); 900 } 901 #endif 902 903 // Initialise marking structures. This has to be done in a STW phase. 904 reset(); 905 906 // For each region note start of marking. 907 NoteStartOfMarkHRClosure startcl; 908 g1h->heap_region_iterate(&startcl); 909 } 910 911 912 void ConcurrentMark::checkpointRootsInitialPost() { 913 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 914 915 // If we force an overflow during remark, the remark operation will 916 // actually abort and we'll restart concurrent marking. If we always 917 // force an oveflow during remark we'll never actually complete the 918 // marking phase. So, we initilize this here, at the start of the 919 // cycle, so that at the remaining overflow number will decrease at 920 // every remark and we'll eventually not need to cause one. 921 force_overflow_stw()->init(); 922 923 // Start Concurrent Marking weak-reference discovery. 924 ReferenceProcessor* rp = g1h->ref_processor_cm(); 925 // enable ("weak") refs discovery 926 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 927 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 928 929 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 930 // This is the start of the marking cycle, we're expected all 931 // threads to have SATB queues with active set to false. 932 satb_mq_set.set_active_all_threads(true, /* new active value */ 933 false /* expected_active */); 934 935 _root_regions.prepare_for_scan(); 936 937 // update_g1_committed() will be called at the end of an evac pause 938 // when marking is on. So, it's also called at the end of the 939 // initial-mark pause to update the heap end, if the heap expands 940 // during it. No need to call it here. 941 } 942 943 /* 944 * Notice that in the next two methods, we actually leave the STS 945 * during the barrier sync and join it immediately afterwards. If we 946 * do not do this, the following deadlock can occur: one thread could 947 * be in the barrier sync code, waiting for the other thread to also 948 * sync up, whereas another one could be trying to yield, while also 949 * waiting for the other threads to sync up too. 950 * 951 * Note, however, that this code is also used during remark and in 952 * this case we should not attempt to leave / enter the STS, otherwise 953 * we'll either hit an asseert (debug / fastdebug) or deadlock 954 * (product). So we should only leave / enter the STS if we are 955 * operating concurrently. 956 * 957 * Because the thread that does the sync barrier has left the STS, it 958 * is possible to be suspended for a Full GC or an evacuation pause 959 * could occur. This is actually safe, since the entering the sync 960 * barrier is one of the last things do_marking_step() does, and it 961 * doesn't manipulate any data structures afterwards. 962 */ 963 964 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 965 if (verbose_low()) { 966 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id); 967 } 968 969 if (concurrent()) { 970 ConcurrentGCThread::stsLeave(); 971 } 972 _first_overflow_barrier_sync.enter(); 973 if (concurrent()) { 974 ConcurrentGCThread::stsJoin(); 975 } 976 // at this point everyone should have synced up and not be doing any 977 // more work 978 979 if (verbose_low()) { 980 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); 981 } 982 983 // If we're executing the concurrent phase of marking, reset the marking 984 // state; otherwise the marking state is reset after reference processing, 985 // during the remark pause. 986 // If we reset here as a result of an overflow during the remark we will 987 // see assertion failures from any subsequent set_concurrency_and_phase() 988 // calls. 989 if (concurrent()) { 990 // let the task associated with with worker 0 do this 991 if (worker_id == 0) { 992 // task 0 is responsible for clearing the global data structures 993 // We should be here because of an overflow. During STW we should 994 // not clear the overflow flag since we rely on it being true when 995 // we exit this method to abort the pause and restart concurent 996 // marking. 997 reset_marking_state(true /* clear_overflow */); 998 force_overflow()->update(); 999 1000 if (G1Log::fine()) { 1001 gclog_or_tty->date_stamp(PrintGCDateStamps); 1002 gclog_or_tty->stamp(PrintGCTimeStamps); 1003 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 1004 } 1005 } 1006 } 1007 1008 // after this, each task should reset its own data structures then 1009 // then go into the second barrier 1010 } 1011 1012 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 1013 if (verbose_low()) { 1014 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id); 1015 } 1016 1017 if (concurrent()) { 1018 ConcurrentGCThread::stsLeave(); 1019 } 1020 _second_overflow_barrier_sync.enter(); 1021 if (concurrent()) { 1022 ConcurrentGCThread::stsJoin(); 1023 } 1024 // at this point everything should be re-initialized and ready to go 1025 1026 if (verbose_low()) { 1027 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); 1028 } 1029 } 1030 1031 #ifndef PRODUCT 1032 void ForceOverflowSettings::init() { 1033 _num_remaining = G1ConcMarkForceOverflow; 1034 _force = false; 1035 update(); 1036 } 1037 1038 void ForceOverflowSettings::update() { 1039 if (_num_remaining > 0) { 1040 _num_remaining -= 1; 1041 _force = true; 1042 } else { 1043 _force = false; 1044 } 1045 } 1046 1047 bool ForceOverflowSettings::should_force() { 1048 if (_force) { 1049 _force = false; 1050 return true; 1051 } else { 1052 return false; 1053 } 1054 } 1055 #endif // !PRODUCT 1056 1057 class CMConcurrentMarkingTask: public AbstractGangTask { 1058 private: 1059 ConcurrentMark* _cm; 1060 ConcurrentMarkThread* _cmt; 1061 1062 public: 1063 void work(uint worker_id) { 1064 assert(Thread::current()->is_ConcurrentGC_thread(), 1065 "this should only be done by a conc GC thread"); 1066 ResourceMark rm; 1067 1068 double start_vtime = os::elapsedVTime(); 1069 1070 ConcurrentGCThread::stsJoin(); 1071 1072 assert(worker_id < _cm->active_tasks(), "invariant"); 1073 CMTask* the_task = _cm->task(worker_id); 1074 the_task->record_start_time(); 1075 if (!_cm->has_aborted()) { 1076 do { 1077 double start_vtime_sec = os::elapsedVTime(); 1078 double start_time_sec = os::elapsedTime(); 1079 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1080 1081 the_task->do_marking_step(mark_step_duration_ms, 1082 true /* do_stealing */, 1083 true /* do_termination */, 1084 false /* is_serial*/); 1085 1086 double end_time_sec = os::elapsedTime(); 1087 double end_vtime_sec = os::elapsedVTime(); 1088 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1089 double elapsed_time_sec = end_time_sec - start_time_sec; 1090 _cm->clear_has_overflown(); 1091 1092 bool ret = _cm->do_yield_check(worker_id); 1093 1094 jlong sleep_time_ms; 1095 if (!_cm->has_aborted() && the_task->has_aborted()) { 1096 sleep_time_ms = 1097 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1098 ConcurrentGCThread::stsLeave(); 1099 os::sleep(Thread::current(), sleep_time_ms, false); 1100 ConcurrentGCThread::stsJoin(); 1101 } 1102 double end_time2_sec = os::elapsedTime(); 1103 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1104 1105 #if 0 1106 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1107 "overhead %1.4lf", 1108 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1109 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1110 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1111 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1112 #endif 1113 } while (!_cm->has_aborted() && the_task->has_aborted()); 1114 } 1115 the_task->record_end_time(); 1116 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1117 1118 ConcurrentGCThread::stsLeave(); 1119 1120 double end_vtime = os::elapsedVTime(); 1121 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1122 } 1123 1124 CMConcurrentMarkingTask(ConcurrentMark* cm, 1125 ConcurrentMarkThread* cmt) : 1126 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1127 1128 ~CMConcurrentMarkingTask() { } 1129 }; 1130 1131 // Calculates the number of active workers for a concurrent 1132 // phase. 1133 uint ConcurrentMark::calc_parallel_marking_threads() { 1134 if (G1CollectedHeap::use_parallel_gc_threads()) { 1135 uint n_conc_workers = 0; 1136 if (!UseDynamicNumberOfGCThreads || 1137 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1138 !ForceDynamicNumberOfGCThreads)) { 1139 n_conc_workers = max_parallel_marking_threads(); 1140 } else { 1141 n_conc_workers = 1142 AdaptiveSizePolicy::calc_default_active_workers( 1143 max_parallel_marking_threads(), 1144 1, /* Minimum workers */ 1145 parallel_marking_threads(), 1146 Threads::number_of_non_daemon_threads()); 1147 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1148 // that scaling has already gone into "_max_parallel_marking_threads". 1149 } 1150 assert(n_conc_workers > 0, "Always need at least 1"); 1151 return n_conc_workers; 1152 } 1153 // If we are not running with any parallel GC threads we will not 1154 // have spawned any marking threads either. Hence the number of 1155 // concurrent workers should be 0. 1156 return 0; 1157 } 1158 1159 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1160 // Currently, only survivors can be root regions. 1161 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1162 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1163 1164 const uintx interval = PrefetchScanIntervalInBytes; 1165 HeapWord* curr = hr->bottom(); 1166 const HeapWord* end = hr->top(); 1167 while (curr < end) { 1168 Prefetch::read(curr, interval); 1169 oop obj = oop(curr); 1170 int size = obj->oop_iterate(&cl); 1171 assert(size == obj->size(), "sanity"); 1172 curr += size; 1173 } 1174 } 1175 1176 class CMRootRegionScanTask : public AbstractGangTask { 1177 private: 1178 ConcurrentMark* _cm; 1179 1180 public: 1181 CMRootRegionScanTask(ConcurrentMark* cm) : 1182 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1183 1184 void work(uint worker_id) { 1185 assert(Thread::current()->is_ConcurrentGC_thread(), 1186 "this should only be done by a conc GC thread"); 1187 1188 CMRootRegions* root_regions = _cm->root_regions(); 1189 HeapRegion* hr = root_regions->claim_next(); 1190 while (hr != NULL) { 1191 _cm->scanRootRegion(hr, worker_id); 1192 hr = root_regions->claim_next(); 1193 } 1194 } 1195 }; 1196 1197 void ConcurrentMark::scanRootRegions() { 1198 // scan_in_progress() will have been set to true only if there was 1199 // at least one root region to scan. So, if it's false, we 1200 // should not attempt to do any further work. 1201 if (root_regions()->scan_in_progress()) { 1202 _parallel_marking_threads = calc_parallel_marking_threads(); 1203 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1204 "Maximum number of marking threads exceeded"); 1205 uint active_workers = MAX2(1U, parallel_marking_threads()); 1206 1207 CMRootRegionScanTask task(this); 1208 if (use_parallel_marking_threads()) { 1209 _parallel_workers->set_active_workers((int) active_workers); 1210 _parallel_workers->run_task(&task); 1211 } else { 1212 task.work(0); 1213 } 1214 1215 // It's possible that has_aborted() is true here without actually 1216 // aborting the survivor scan earlier. This is OK as it's 1217 // mainly used for sanity checking. 1218 root_regions()->scan_finished(); 1219 } 1220 } 1221 1222 void ConcurrentMark::markFromRoots() { 1223 // we might be tempted to assert that: 1224 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1225 // "inconsistent argument?"); 1226 // However that wouldn't be right, because it's possible that 1227 // a safepoint is indeed in progress as a younger generation 1228 // stop-the-world GC happens even as we mark in this generation. 1229 1230 _restart_for_overflow = false; 1231 force_overflow_conc()->init(); 1232 1233 // _g1h has _n_par_threads 1234 _parallel_marking_threads = calc_parallel_marking_threads(); 1235 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1236 "Maximum number of marking threads exceeded"); 1237 1238 uint active_workers = MAX2(1U, parallel_marking_threads()); 1239 1240 // Parallel task terminator is set in "set_concurrency_and_phase()" 1241 set_concurrency_and_phase(active_workers, true /* concurrent */); 1242 1243 CMConcurrentMarkingTask markingTask(this, cmThread()); 1244 if (use_parallel_marking_threads()) { 1245 _parallel_workers->set_active_workers((int)active_workers); 1246 // Don't set _n_par_threads because it affects MT in proceess_strong_roots() 1247 // and the decisions on that MT processing is made elsewhere. 1248 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1249 _parallel_workers->run_task(&markingTask); 1250 } else { 1251 markingTask.work(0); 1252 } 1253 print_stats(); 1254 } 1255 1256 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1257 // world is stopped at this checkpoint 1258 assert(SafepointSynchronize::is_at_safepoint(), 1259 "world should be stopped"); 1260 1261 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1262 1263 // If a full collection has happened, we shouldn't do this. 1264 if (has_aborted()) { 1265 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1266 return; 1267 } 1268 1269 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1270 1271 if (VerifyDuringGC) { 1272 HandleMark hm; // handle scope 1273 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1274 Universe::heap()->prepare_for_verify(); 1275 Universe::verify(/* silent */ false, 1276 /* option */ VerifyOption_G1UsePrevMarking); 1277 } 1278 1279 G1CollectorPolicy* g1p = g1h->g1_policy(); 1280 g1p->record_concurrent_mark_remark_start(); 1281 1282 double start = os::elapsedTime(); 1283 1284 checkpointRootsFinalWork(); 1285 1286 double mark_work_end = os::elapsedTime(); 1287 1288 weakRefsWork(clear_all_soft_refs); 1289 1290 if (has_overflown()) { 1291 // Oops. We overflowed. Restart concurrent marking. 1292 _restart_for_overflow = true; 1293 // Clear the marking state because we will be restarting 1294 // marking due to overflowing the global mark stack. 1295 reset_marking_state(); 1296 if (G1TraceMarkStackOverflow) { 1297 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1298 } 1299 } else { 1300 // Aggregate the per-task counting data that we have accumulated 1301 // while marking. 1302 aggregate_count_data(); 1303 1304 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1305 // We're done with marking. 1306 // This is the end of the marking cycle, we're expected all 1307 // threads to have SATB queues with active set to true. 1308 satb_mq_set.set_active_all_threads(false, /* new active value */ 1309 true /* expected_active */); 1310 1311 if (VerifyDuringGC) { 1312 HandleMark hm; // handle scope 1313 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1314 Universe::heap()->prepare_for_verify(); 1315 Universe::verify(/* silent */ false, 1316 /* option */ VerifyOption_G1UseNextMarking); 1317 } 1318 assert(!restart_for_overflow(), "sanity"); 1319 // Completely reset the marking state since marking completed 1320 set_non_marking_state(); 1321 } 1322 1323 // Expand the marking stack, if we have to and if we can. 1324 if (_markStack.should_expand()) { 1325 _markStack.expand(); 1326 } 1327 1328 #if VERIFY_OBJS_PROCESSED 1329 _scan_obj_cl.objs_processed = 0; 1330 ThreadLocalObjQueue::objs_enqueued = 0; 1331 #endif 1332 1333 // Statistics 1334 double now = os::elapsedTime(); 1335 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1336 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1337 _remark_times.add((now - start) * 1000.0); 1338 1339 g1p->record_concurrent_mark_remark_end(); 1340 } 1341 1342 // Base class of the closures that finalize and verify the 1343 // liveness counting data. 1344 class CMCountDataClosureBase: public HeapRegionClosure { 1345 protected: 1346 G1CollectedHeap* _g1h; 1347 ConcurrentMark* _cm; 1348 CardTableModRefBS* _ct_bs; 1349 1350 BitMap* _region_bm; 1351 BitMap* _card_bm; 1352 1353 // Takes a region that's not empty (i.e., it has at least one 1354 // live object in it and sets its corresponding bit on the region 1355 // bitmap to 1. If the region is "starts humongous" it will also set 1356 // to 1 the bits on the region bitmap that correspond to its 1357 // associated "continues humongous" regions. 1358 void set_bit_for_region(HeapRegion* hr) { 1359 assert(!hr->continuesHumongous(), "should have filtered those out"); 1360 1361 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1362 if (!hr->startsHumongous()) { 1363 // Normal (non-humongous) case: just set the bit. 1364 _region_bm->par_at_put(index, true); 1365 } else { 1366 // Starts humongous case: calculate how many regions are part of 1367 // this humongous region and then set the bit range. 1368 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1369 _region_bm->par_at_put_range(index, end_index, true); 1370 } 1371 } 1372 1373 public: 1374 CMCountDataClosureBase(G1CollectedHeap* g1h, 1375 BitMap* region_bm, BitMap* card_bm): 1376 _g1h(g1h), _cm(g1h->concurrent_mark()), 1377 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 1378 _region_bm(region_bm), _card_bm(card_bm) { } 1379 }; 1380 1381 // Closure that calculates the # live objects per region. Used 1382 // for verification purposes during the cleanup pause. 1383 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1384 CMBitMapRO* _bm; 1385 size_t _region_marked_bytes; 1386 1387 public: 1388 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1389 BitMap* region_bm, BitMap* card_bm) : 1390 CMCountDataClosureBase(g1h, region_bm, card_bm), 1391 _bm(bm), _region_marked_bytes(0) { } 1392 1393 bool doHeapRegion(HeapRegion* hr) { 1394 1395 if (hr->continuesHumongous()) { 1396 // We will ignore these here and process them when their 1397 // associated "starts humongous" region is processed (see 1398 // set_bit_for_heap_region()). Note that we cannot rely on their 1399 // associated "starts humongous" region to have their bit set to 1400 // 1 since, due to the region chunking in the parallel region 1401 // iteration, a "continues humongous" region might be visited 1402 // before its associated "starts humongous". 1403 return false; 1404 } 1405 1406 HeapWord* ntams = hr->next_top_at_mark_start(); 1407 HeapWord* start = hr->bottom(); 1408 1409 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1410 err_msg("Preconditions not met - " 1411 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT, 1412 start, ntams, hr->end())); 1413 1414 // Find the first marked object at or after "start". 1415 start = _bm->getNextMarkedWordAddress(start, ntams); 1416 1417 size_t marked_bytes = 0; 1418 1419 while (start < ntams) { 1420 oop obj = oop(start); 1421 int obj_sz = obj->size(); 1422 HeapWord* obj_end = start + obj_sz; 1423 1424 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1425 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1426 1427 // Note: if we're looking at the last region in heap - obj_end 1428 // could be actually just beyond the end of the heap; end_idx 1429 // will then correspond to a (non-existent) card that is also 1430 // just beyond the heap. 1431 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1432 // end of object is not card aligned - increment to cover 1433 // all the cards spanned by the object 1434 end_idx += 1; 1435 } 1436 1437 // Set the bits in the card BM for the cards spanned by this object. 1438 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1439 1440 // Add the size of this object to the number of marked bytes. 1441 marked_bytes += (size_t)obj_sz * HeapWordSize; 1442 1443 // Find the next marked object after this one. 1444 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1445 } 1446 1447 // Mark the allocated-since-marking portion... 1448 HeapWord* top = hr->top(); 1449 if (ntams < top) { 1450 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1451 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1452 1453 // Note: if we're looking at the last region in heap - top 1454 // could be actually just beyond the end of the heap; end_idx 1455 // will then correspond to a (non-existent) card that is also 1456 // just beyond the heap. 1457 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1458 // end of object is not card aligned - increment to cover 1459 // all the cards spanned by the object 1460 end_idx += 1; 1461 } 1462 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1463 1464 // This definitely means the region has live objects. 1465 set_bit_for_region(hr); 1466 } 1467 1468 // Update the live region bitmap. 1469 if (marked_bytes > 0) { 1470 set_bit_for_region(hr); 1471 } 1472 1473 // Set the marked bytes for the current region so that 1474 // it can be queried by a calling verificiation routine 1475 _region_marked_bytes = marked_bytes; 1476 1477 return false; 1478 } 1479 1480 size_t region_marked_bytes() const { return _region_marked_bytes; } 1481 }; 1482 1483 // Heap region closure used for verifying the counting data 1484 // that was accumulated concurrently and aggregated during 1485 // the remark pause. This closure is applied to the heap 1486 // regions during the STW cleanup pause. 1487 1488 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1489 G1CollectedHeap* _g1h; 1490 ConcurrentMark* _cm; 1491 CalcLiveObjectsClosure _calc_cl; 1492 BitMap* _region_bm; // Region BM to be verified 1493 BitMap* _card_bm; // Card BM to be verified 1494 bool _verbose; // verbose output? 1495 1496 BitMap* _exp_region_bm; // Expected Region BM values 1497 BitMap* _exp_card_bm; // Expected card BM values 1498 1499 int _failures; 1500 1501 public: 1502 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1503 BitMap* region_bm, 1504 BitMap* card_bm, 1505 BitMap* exp_region_bm, 1506 BitMap* exp_card_bm, 1507 bool verbose) : 1508 _g1h(g1h), _cm(g1h->concurrent_mark()), 1509 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1510 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1511 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1512 _failures(0) { } 1513 1514 int failures() const { return _failures; } 1515 1516 bool doHeapRegion(HeapRegion* hr) { 1517 if (hr->continuesHumongous()) { 1518 // We will ignore these here and process them when their 1519 // associated "starts humongous" region is processed (see 1520 // set_bit_for_heap_region()). Note that we cannot rely on their 1521 // associated "starts humongous" region to have their bit set to 1522 // 1 since, due to the region chunking in the parallel region 1523 // iteration, a "continues humongous" region might be visited 1524 // before its associated "starts humongous". 1525 return false; 1526 } 1527 1528 int failures = 0; 1529 1530 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1531 // this region and set the corresponding bits in the expected region 1532 // and card bitmaps. 1533 bool res = _calc_cl.doHeapRegion(hr); 1534 assert(res == false, "should be continuing"); 1535 1536 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1537 Mutex::_no_safepoint_check_flag); 1538 1539 // Verify the marked bytes for this region. 1540 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1541 size_t act_marked_bytes = hr->next_marked_bytes(); 1542 1543 // We're not OK if expected marked bytes > actual marked bytes. It means 1544 // we have missed accounting some objects during the actual marking. 1545 if (exp_marked_bytes > act_marked_bytes) { 1546 if (_verbose) { 1547 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1548 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1549 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1550 } 1551 failures += 1; 1552 } 1553 1554 // Verify the bit, for this region, in the actual and expected 1555 // (which was just calculated) region bit maps. 1556 // We're not OK if the bit in the calculated expected region 1557 // bitmap is set and the bit in the actual region bitmap is not. 1558 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1559 1560 bool expected = _exp_region_bm->at(index); 1561 bool actual = _region_bm->at(index); 1562 if (expected && !actual) { 1563 if (_verbose) { 1564 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1565 "expected: %s, actual: %s", 1566 hr->hrs_index(), 1567 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1568 } 1569 failures += 1; 1570 } 1571 1572 // Verify that the card bit maps for the cards spanned by the current 1573 // region match. We have an error if we have a set bit in the expected 1574 // bit map and the corresponding bit in the actual bitmap is not set. 1575 1576 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1577 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1578 1579 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1580 expected = _exp_card_bm->at(i); 1581 actual = _card_bm->at(i); 1582 1583 if (expected && !actual) { 1584 if (_verbose) { 1585 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1586 "expected: %s, actual: %s", 1587 hr->hrs_index(), i, 1588 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1589 } 1590 failures += 1; 1591 } 1592 } 1593 1594 if (failures > 0 && _verbose) { 1595 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1596 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1597 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(), 1598 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1599 } 1600 1601 _failures += failures; 1602 1603 // We could stop iteration over the heap when we 1604 // find the first violating region by returning true. 1605 return false; 1606 } 1607 }; 1608 1609 1610 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1611 protected: 1612 G1CollectedHeap* _g1h; 1613 ConcurrentMark* _cm; 1614 BitMap* _actual_region_bm; 1615 BitMap* _actual_card_bm; 1616 1617 uint _n_workers; 1618 1619 BitMap* _expected_region_bm; 1620 BitMap* _expected_card_bm; 1621 1622 int _failures; 1623 bool _verbose; 1624 1625 public: 1626 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1627 BitMap* region_bm, BitMap* card_bm, 1628 BitMap* expected_region_bm, BitMap* expected_card_bm) 1629 : AbstractGangTask("G1 verify final counting"), 1630 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1631 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1632 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1633 _failures(0), _verbose(false), 1634 _n_workers(0) { 1635 assert(VerifyDuringGC, "don't call this otherwise"); 1636 1637 // Use the value already set as the number of active threads 1638 // in the call to run_task(). 1639 if (G1CollectedHeap::use_parallel_gc_threads()) { 1640 assert( _g1h->workers()->active_workers() > 0, 1641 "Should have been previously set"); 1642 _n_workers = _g1h->workers()->active_workers(); 1643 } else { 1644 _n_workers = 1; 1645 } 1646 1647 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1648 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1649 1650 _verbose = _cm->verbose_medium(); 1651 } 1652 1653 void work(uint worker_id) { 1654 assert(worker_id < _n_workers, "invariant"); 1655 1656 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1657 _actual_region_bm, _actual_card_bm, 1658 _expected_region_bm, 1659 _expected_card_bm, 1660 _verbose); 1661 1662 if (G1CollectedHeap::use_parallel_gc_threads()) { 1663 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1664 worker_id, 1665 _n_workers, 1666 HeapRegion::VerifyCountClaimValue); 1667 } else { 1668 _g1h->heap_region_iterate(&verify_cl); 1669 } 1670 1671 Atomic::add(verify_cl.failures(), &_failures); 1672 } 1673 1674 int failures() const { return _failures; } 1675 }; 1676 1677 // Closure that finalizes the liveness counting data. 1678 // Used during the cleanup pause. 1679 // Sets the bits corresponding to the interval [NTAMS, top] 1680 // (which contains the implicitly live objects) in the 1681 // card liveness bitmap. Also sets the bit for each region, 1682 // containing live data, in the region liveness bitmap. 1683 1684 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1685 public: 1686 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1687 BitMap* region_bm, 1688 BitMap* card_bm) : 1689 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1690 1691 bool doHeapRegion(HeapRegion* hr) { 1692 1693 if (hr->continuesHumongous()) { 1694 // We will ignore these here and process them when their 1695 // associated "starts humongous" region is processed (see 1696 // set_bit_for_heap_region()). Note that we cannot rely on their 1697 // associated "starts humongous" region to have their bit set to 1698 // 1 since, due to the region chunking in the parallel region 1699 // iteration, a "continues humongous" region might be visited 1700 // before its associated "starts humongous". 1701 return false; 1702 } 1703 1704 HeapWord* ntams = hr->next_top_at_mark_start(); 1705 HeapWord* top = hr->top(); 1706 1707 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1708 1709 // Mark the allocated-since-marking portion... 1710 if (ntams < top) { 1711 // This definitely means the region has live objects. 1712 set_bit_for_region(hr); 1713 1714 // Now set the bits in the card bitmap for [ntams, top) 1715 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1716 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1717 1718 // Note: if we're looking at the last region in heap - top 1719 // could be actually just beyond the end of the heap; end_idx 1720 // will then correspond to a (non-existent) card that is also 1721 // just beyond the heap. 1722 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1723 // end of object is not card aligned - increment to cover 1724 // all the cards spanned by the object 1725 end_idx += 1; 1726 } 1727 1728 assert(end_idx <= _card_bm->size(), 1729 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1730 end_idx, _card_bm->size())); 1731 assert(start_idx < _card_bm->size(), 1732 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1733 start_idx, _card_bm->size())); 1734 1735 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1736 } 1737 1738 // Set the bit for the region if it contains live data 1739 if (hr->next_marked_bytes() > 0) { 1740 set_bit_for_region(hr); 1741 } 1742 1743 return false; 1744 } 1745 }; 1746 1747 class G1ParFinalCountTask: public AbstractGangTask { 1748 protected: 1749 G1CollectedHeap* _g1h; 1750 ConcurrentMark* _cm; 1751 BitMap* _actual_region_bm; 1752 BitMap* _actual_card_bm; 1753 1754 uint _n_workers; 1755 1756 public: 1757 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1758 : AbstractGangTask("G1 final counting"), 1759 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1760 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1761 _n_workers(0) { 1762 // Use the value already set as the number of active threads 1763 // in the call to run_task(). 1764 if (G1CollectedHeap::use_parallel_gc_threads()) { 1765 assert( _g1h->workers()->active_workers() > 0, 1766 "Should have been previously set"); 1767 _n_workers = _g1h->workers()->active_workers(); 1768 } else { 1769 _n_workers = 1; 1770 } 1771 } 1772 1773 void work(uint worker_id) { 1774 assert(worker_id < _n_workers, "invariant"); 1775 1776 FinalCountDataUpdateClosure final_update_cl(_g1h, 1777 _actual_region_bm, 1778 _actual_card_bm); 1779 1780 if (G1CollectedHeap::use_parallel_gc_threads()) { 1781 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1782 worker_id, 1783 _n_workers, 1784 HeapRegion::FinalCountClaimValue); 1785 } else { 1786 _g1h->heap_region_iterate(&final_update_cl); 1787 } 1788 } 1789 }; 1790 1791 class G1ParNoteEndTask; 1792 1793 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1794 G1CollectedHeap* _g1; 1795 int _worker_num; 1796 size_t _max_live_bytes; 1797 uint _regions_claimed; 1798 size_t _freed_bytes; 1799 FreeRegionList* _local_cleanup_list; 1800 OldRegionSet* _old_proxy_set; 1801 HumongousRegionSet* _humongous_proxy_set; 1802 HRRSCleanupTask* _hrrs_cleanup_task; 1803 double _claimed_region_time; 1804 double _max_region_time; 1805 1806 public: 1807 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1808 int worker_num, 1809 FreeRegionList* local_cleanup_list, 1810 OldRegionSet* old_proxy_set, 1811 HumongousRegionSet* humongous_proxy_set, 1812 HRRSCleanupTask* hrrs_cleanup_task) : 1813 _g1(g1), _worker_num(worker_num), 1814 _max_live_bytes(0), _regions_claimed(0), 1815 _freed_bytes(0), 1816 _claimed_region_time(0.0), _max_region_time(0.0), 1817 _local_cleanup_list(local_cleanup_list), 1818 _old_proxy_set(old_proxy_set), 1819 _humongous_proxy_set(humongous_proxy_set), 1820 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1821 1822 size_t freed_bytes() { return _freed_bytes; } 1823 1824 bool doHeapRegion(HeapRegion *hr) { 1825 if (hr->continuesHumongous()) { 1826 return false; 1827 } 1828 // We use a claim value of zero here because all regions 1829 // were claimed with value 1 in the FinalCount task. 1830 _g1->reset_gc_time_stamps(hr); 1831 double start = os::elapsedTime(); 1832 _regions_claimed++; 1833 hr->note_end_of_marking(); 1834 _max_live_bytes += hr->max_live_bytes(); 1835 _g1->free_region_if_empty(hr, 1836 &_freed_bytes, 1837 _local_cleanup_list, 1838 _old_proxy_set, 1839 _humongous_proxy_set, 1840 _hrrs_cleanup_task, 1841 true /* par */); 1842 double region_time = (os::elapsedTime() - start); 1843 _claimed_region_time += region_time; 1844 if (region_time > _max_region_time) { 1845 _max_region_time = region_time; 1846 } 1847 return false; 1848 } 1849 1850 size_t max_live_bytes() { return _max_live_bytes; } 1851 uint regions_claimed() { return _regions_claimed; } 1852 double claimed_region_time_sec() { return _claimed_region_time; } 1853 double max_region_time_sec() { return _max_region_time; } 1854 }; 1855 1856 class G1ParNoteEndTask: public AbstractGangTask { 1857 friend class G1NoteEndOfConcMarkClosure; 1858 1859 protected: 1860 G1CollectedHeap* _g1h; 1861 size_t _max_live_bytes; 1862 size_t _freed_bytes; 1863 FreeRegionList* _cleanup_list; 1864 1865 public: 1866 G1ParNoteEndTask(G1CollectedHeap* g1h, 1867 FreeRegionList* cleanup_list) : 1868 AbstractGangTask("G1 note end"), _g1h(g1h), 1869 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1870 1871 void work(uint worker_id) { 1872 double start = os::elapsedTime(); 1873 FreeRegionList local_cleanup_list("Local Cleanup List"); 1874 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set"); 1875 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1876 HRRSCleanupTask hrrs_cleanup_task; 1877 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list, 1878 &old_proxy_set, 1879 &humongous_proxy_set, 1880 &hrrs_cleanup_task); 1881 if (G1CollectedHeap::use_parallel_gc_threads()) { 1882 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1883 _g1h->workers()->active_workers(), 1884 HeapRegion::NoteEndClaimValue); 1885 } else { 1886 _g1h->heap_region_iterate(&g1_note_end); 1887 } 1888 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1889 1890 // Now update the lists 1891 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1892 NULL /* free_list */, 1893 &old_proxy_set, 1894 &humongous_proxy_set, 1895 true /* par */); 1896 { 1897 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1898 _max_live_bytes += g1_note_end.max_live_bytes(); 1899 _freed_bytes += g1_note_end.freed_bytes(); 1900 1901 // If we iterate over the global cleanup list at the end of 1902 // cleanup to do this printing we will not guarantee to only 1903 // generate output for the newly-reclaimed regions (the list 1904 // might not be empty at the beginning of cleanup; we might 1905 // still be working on its previous contents). So we do the 1906 // printing here, before we append the new regions to the global 1907 // cleanup list. 1908 1909 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1910 if (hr_printer->is_active()) { 1911 HeapRegionLinkedListIterator iter(&local_cleanup_list); 1912 while (iter.more_available()) { 1913 HeapRegion* hr = iter.get_next(); 1914 hr_printer->cleanup(hr); 1915 } 1916 } 1917 1918 _cleanup_list->add_as_tail(&local_cleanup_list); 1919 assert(local_cleanup_list.is_empty(), "post-condition"); 1920 1921 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1922 } 1923 } 1924 size_t max_live_bytes() { return _max_live_bytes; } 1925 size_t freed_bytes() { return _freed_bytes; } 1926 }; 1927 1928 class G1ParScrubRemSetTask: public AbstractGangTask { 1929 protected: 1930 G1RemSet* _g1rs; 1931 BitMap* _region_bm; 1932 BitMap* _card_bm; 1933 public: 1934 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1935 BitMap* region_bm, BitMap* card_bm) : 1936 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1937 _region_bm(region_bm), _card_bm(card_bm) { } 1938 1939 void work(uint worker_id) { 1940 if (G1CollectedHeap::use_parallel_gc_threads()) { 1941 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1942 HeapRegion::ScrubRemSetClaimValue); 1943 } else { 1944 _g1rs->scrub(_region_bm, _card_bm); 1945 } 1946 } 1947 1948 }; 1949 1950 void ConcurrentMark::cleanup() { 1951 // world is stopped at this checkpoint 1952 assert(SafepointSynchronize::is_at_safepoint(), 1953 "world should be stopped"); 1954 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1955 1956 // If a full collection has happened, we shouldn't do this. 1957 if (has_aborted()) { 1958 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1959 return; 1960 } 1961 1962 HRSPhaseSetter x(HRSPhaseCleanup); 1963 g1h->verify_region_sets_optional(); 1964 1965 if (VerifyDuringGC) { 1966 HandleMark hm; // handle scope 1967 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1968 Universe::heap()->prepare_for_verify(); 1969 Universe::verify(/* silent */ false, 1970 /* option */ VerifyOption_G1UsePrevMarking); 1971 } 1972 1973 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1974 g1p->record_concurrent_mark_cleanup_start(); 1975 1976 double start = os::elapsedTime(); 1977 1978 HeapRegionRemSet::reset_for_cleanup_tasks(); 1979 1980 uint n_workers; 1981 1982 // Do counting once more with the world stopped for good measure. 1983 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1984 1985 if (G1CollectedHeap::use_parallel_gc_threads()) { 1986 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 1987 "sanity check"); 1988 1989 g1h->set_par_threads(); 1990 n_workers = g1h->n_par_threads(); 1991 assert(g1h->n_par_threads() == n_workers, 1992 "Should not have been reset"); 1993 g1h->workers()->run_task(&g1_par_count_task); 1994 // Done with the parallel phase so reset to 0. 1995 g1h->set_par_threads(0); 1996 1997 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 1998 "sanity check"); 1999 } else { 2000 n_workers = 1; 2001 g1_par_count_task.work(0); 2002 } 2003 2004 if (VerifyDuringGC) { 2005 // Verify that the counting data accumulated during marking matches 2006 // that calculated by walking the marking bitmap. 2007 2008 // Bitmaps to hold expected values 2009 BitMap expected_region_bm(_region_bm.size(), false); 2010 BitMap expected_card_bm(_card_bm.size(), false); 2011 2012 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 2013 &_region_bm, 2014 &_card_bm, 2015 &expected_region_bm, 2016 &expected_card_bm); 2017 2018 if (G1CollectedHeap::use_parallel_gc_threads()) { 2019 g1h->set_par_threads((int)n_workers); 2020 g1h->workers()->run_task(&g1_par_verify_task); 2021 // Done with the parallel phase so reset to 0. 2022 g1h->set_par_threads(0); 2023 2024 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 2025 "sanity check"); 2026 } else { 2027 g1_par_verify_task.work(0); 2028 } 2029 2030 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 2031 } 2032 2033 size_t start_used_bytes = g1h->used(); 2034 g1h->set_marking_complete(); 2035 2036 double count_end = os::elapsedTime(); 2037 double this_final_counting_time = (count_end - start); 2038 _total_counting_time += this_final_counting_time; 2039 2040 if (G1PrintRegionLivenessInfo) { 2041 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 2042 _g1h->heap_region_iterate(&cl); 2043 } 2044 2045 // Install newly created mark bitMap as "prev". 2046 swapMarkBitMaps(); 2047 2048 g1h->reset_gc_time_stamp(); 2049 2050 // Note end of marking in all heap regions. 2051 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 2052 if (G1CollectedHeap::use_parallel_gc_threads()) { 2053 g1h->set_par_threads((int)n_workers); 2054 g1h->workers()->run_task(&g1_par_note_end_task); 2055 g1h->set_par_threads(0); 2056 2057 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 2058 "sanity check"); 2059 } else { 2060 g1_par_note_end_task.work(0); 2061 } 2062 g1h->check_gc_time_stamps(); 2063 2064 if (!cleanup_list_is_empty()) { 2065 // The cleanup list is not empty, so we'll have to process it 2066 // concurrently. Notify anyone else that might be wanting free 2067 // regions that there will be more free regions coming soon. 2068 g1h->set_free_regions_coming(); 2069 } 2070 2071 // call below, since it affects the metric by which we sort the heap 2072 // regions. 2073 if (G1ScrubRemSets) { 2074 double rs_scrub_start = os::elapsedTime(); 2075 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 2076 if (G1CollectedHeap::use_parallel_gc_threads()) { 2077 g1h->set_par_threads((int)n_workers); 2078 g1h->workers()->run_task(&g1_par_scrub_rs_task); 2079 g1h->set_par_threads(0); 2080 2081 assert(g1h->check_heap_region_claim_values( 2082 HeapRegion::ScrubRemSetClaimValue), 2083 "sanity check"); 2084 } else { 2085 g1_par_scrub_rs_task.work(0); 2086 } 2087 2088 double rs_scrub_end = os::elapsedTime(); 2089 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 2090 _total_rs_scrub_time += this_rs_scrub_time; 2091 } 2092 2093 // this will also free any regions totally full of garbage objects, 2094 // and sort the regions. 2095 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 2096 2097 // Statistics. 2098 double end = os::elapsedTime(); 2099 _cleanup_times.add((end - start) * 1000.0); 2100 2101 if (G1Log::fine()) { 2102 g1h->print_size_transition(gclog_or_tty, 2103 start_used_bytes, 2104 g1h->used(), 2105 g1h->capacity()); 2106 } 2107 2108 // Clean up will have freed any regions completely full of garbage. 2109 // Update the soft reference policy with the new heap occupancy. 2110 Universe::update_heap_info_at_gc(); 2111 2112 // We need to make this be a "collection" so any collection pause that 2113 // races with it goes around and waits for completeCleanup to finish. 2114 g1h->increment_total_collections(); 2115 2116 // We reclaimed old regions so we should calculate the sizes to make 2117 // sure we update the old gen/space data. 2118 g1h->g1mm()->update_sizes(); 2119 2120 if (VerifyDuringGC) { 2121 HandleMark hm; // handle scope 2122 gclog_or_tty->print(" VerifyDuringGC:(after)"); 2123 Universe::heap()->prepare_for_verify(); 2124 Universe::verify(/* silent */ false, 2125 /* option */ VerifyOption_G1UsePrevMarking); 2126 } 2127 2128 g1h->verify_region_sets_optional(); 2129 } 2130 2131 void ConcurrentMark::completeCleanup() { 2132 if (has_aborted()) return; 2133 2134 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2135 2136 _cleanup_list.verify_optional(); 2137 FreeRegionList tmp_free_list("Tmp Free List"); 2138 2139 if (G1ConcRegionFreeingVerbose) { 2140 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2141 "cleanup list has %u entries", 2142 _cleanup_list.length()); 2143 } 2144 2145 // Noone else should be accessing the _cleanup_list at this point, 2146 // so it's not necessary to take any locks 2147 while (!_cleanup_list.is_empty()) { 2148 HeapRegion* hr = _cleanup_list.remove_head(); 2149 assert(hr != NULL, "the list was not empty"); 2150 hr->par_clear(); 2151 tmp_free_list.add_as_tail(hr); 2152 2153 // Instead of adding one region at a time to the secondary_free_list, 2154 // we accumulate them in the local list and move them a few at a 2155 // time. This also cuts down on the number of notify_all() calls 2156 // we do during this process. We'll also append the local list when 2157 // _cleanup_list is empty (which means we just removed the last 2158 // region from the _cleanup_list). 2159 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2160 _cleanup_list.is_empty()) { 2161 if (G1ConcRegionFreeingVerbose) { 2162 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2163 "appending %u entries to the secondary_free_list, " 2164 "cleanup list still has %u entries", 2165 tmp_free_list.length(), 2166 _cleanup_list.length()); 2167 } 2168 2169 { 2170 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2171 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 2172 SecondaryFreeList_lock->notify_all(); 2173 } 2174 2175 if (G1StressConcRegionFreeing) { 2176 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2177 os::sleep(Thread::current(), (jlong) 1, false); 2178 } 2179 } 2180 } 2181 } 2182 assert(tmp_free_list.is_empty(), "post-condition"); 2183 } 2184 2185 // Supporting Object and Oop closures for reference discovery 2186 // and processing in during marking 2187 2188 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2189 HeapWord* addr = (HeapWord*)obj; 2190 return addr != NULL && 2191 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2192 } 2193 2194 // 'Keep Alive' oop closure used by both serial parallel reference processing. 2195 // Uses the CMTask associated with a worker thread (for serial reference 2196 // processing the CMTask for worker 0 is used) to preserve (mark) and 2197 // trace referent objects. 2198 // 2199 // Using the CMTask and embedded local queues avoids having the worker 2200 // threads operating on the global mark stack. This reduces the risk 2201 // of overflowing the stack - which we would rather avoid at this late 2202 // state. Also using the tasks' local queues removes the potential 2203 // of the workers interfering with each other that could occur if 2204 // operating on the global stack. 2205 2206 class G1CMKeepAliveAndDrainClosure: public OopClosure { 2207 ConcurrentMark* _cm; 2208 CMTask* _task; 2209 int _ref_counter_limit; 2210 int _ref_counter; 2211 bool _is_serial; 2212 public: 2213 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2214 _cm(cm), _task(task), _is_serial(is_serial), 2215 _ref_counter_limit(G1RefProcDrainInterval) { 2216 assert(_ref_counter_limit > 0, "sanity"); 2217 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2218 _ref_counter = _ref_counter_limit; 2219 } 2220 2221 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2222 virtual void do_oop( oop* p) { do_oop_work(p); } 2223 2224 template <class T> void do_oop_work(T* p) { 2225 if (!_cm->has_overflown()) { 2226 oop obj = oopDesc::load_decode_heap_oop(p); 2227 if (_cm->verbose_high()) { 2228 gclog_or_tty->print_cr("\t[%u] we're looking at location " 2229 "*"PTR_FORMAT" = "PTR_FORMAT, 2230 _task->worker_id(), p, (void*) obj); 2231 } 2232 2233 _task->deal_with_reference(obj); 2234 _ref_counter--; 2235 2236 if (_ref_counter == 0) { 2237 // We have dealt with _ref_counter_limit references, pushing them 2238 // and objects reachable from them on to the local stack (and 2239 // possibly the global stack). Call CMTask::do_marking_step() to 2240 // process these entries. 2241 // 2242 // We call CMTask::do_marking_step() in a loop, which we'll exit if 2243 // there's nothing more to do (i.e. we're done with the entries that 2244 // were pushed as a result of the CMTask::deal_with_reference() calls 2245 // above) or we overflow. 2246 // 2247 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2248 // flag while there may still be some work to do. (See the comment at 2249 // the beginning of CMTask::do_marking_step() for those conditions - 2250 // one of which is reaching the specified time target.) It is only 2251 // when CMTask::do_marking_step() returns without setting the 2252 // has_aborted() flag that the marking step has completed. 2253 do { 2254 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2255 _task->do_marking_step(mark_step_duration_ms, 2256 false /* do_stealing */, 2257 false /* do_termination */, 2258 _is_serial /* is_serial */); 2259 } while (_task->has_aborted() && !_cm->has_overflown()); 2260 _ref_counter = _ref_counter_limit; 2261 } 2262 } else { 2263 if (_cm->verbose_high()) { 2264 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id()); 2265 } 2266 } 2267 } 2268 }; 2269 2270 // 'Drain' oop closure used by both serial and parallel reference processing. 2271 // Uses the CMTask associated with a given worker thread (for serial 2272 // reference processing the CMtask for worker 0 is used). Calls the 2273 // do_marking_step routine, with an unbelievably large timeout value, 2274 // to drain the marking data structures of the remaining entries 2275 // added by the 'keep alive' oop closure above. 2276 2277 class G1CMDrainMarkingStackClosure: public VoidClosure { 2278 ConcurrentMark* _cm; 2279 CMTask* _task; 2280 bool _is_serial; 2281 public: 2282 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2283 _cm(cm), _task(task) { 2284 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2285 } 2286 2287 void do_void() { 2288 do { 2289 if (_cm->verbose_high()) { 2290 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s", 2291 _task->worker_id(), BOOL_TO_STR(_is_serial)); 2292 } 2293 2294 // We call CMTask::do_marking_step() to completely drain the local 2295 // and global marking stacks of entries pushed by the 'keep alive' 2296 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2297 // 2298 // CMTask::do_marking_step() is called in a loop, which we'll exit 2299 // if there's nothing more to do (i.e. we'completely drained the 2300 // entries that were pushed as a a result of applying the 'keep alive' 2301 // closure to the entries on the discovered ref lists) or we overflow 2302 // the global marking stack. 2303 // 2304 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2305 // flag while there may still be some work to do. (See the comment at 2306 // the beginning of CMTask::do_marking_step() for those conditions - 2307 // one of which is reaching the specified time target.) It is only 2308 // when CMTask::do_marking_step() returns without setting the 2309 // has_aborted() flag that the marking step has completed. 2310 2311 // It only makes sense to allow stealing in CMTask::do_marking_step() 2312 // if this closure is being instantiated for parallel reference 2313 // processing. 2314 2315 _task->do_marking_step(1000000000.0 /* something very large */, 2316 !_is_serial /* do_stealing */, 2317 true /* do_termination */, 2318 _is_serial /* is_serial */); 2319 } while (_task->has_aborted() && !_cm->has_overflown()); 2320 } 2321 }; 2322 2323 // Implementation of AbstractRefProcTaskExecutor for parallel 2324 // reference processing at the end of G1 concurrent marking 2325 2326 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2327 private: 2328 G1CollectedHeap* _g1h; 2329 ConcurrentMark* _cm; 2330 WorkGang* _workers; 2331 int _active_workers; 2332 2333 public: 2334 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2335 ConcurrentMark* cm, 2336 WorkGang* workers, 2337 int n_workers) : 2338 _g1h(g1h), _cm(cm), 2339 _workers(workers), _active_workers(n_workers) { } 2340 2341 // Executes the given task using concurrent marking worker threads. 2342 virtual void execute(ProcessTask& task); 2343 virtual void execute(EnqueueTask& task); 2344 }; 2345 2346 class G1CMRefProcTaskProxy: public AbstractGangTask { 2347 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2348 ProcessTask& _proc_task; 2349 G1CollectedHeap* _g1h; 2350 ConcurrentMark* _cm; 2351 2352 public: 2353 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2354 G1CollectedHeap* g1h, 2355 ConcurrentMark* cm) : 2356 AbstractGangTask("Process reference objects in parallel"), 2357 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2358 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2359 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2360 } 2361 2362 virtual void work(uint worker_id) { 2363 CMTask* task = _cm->task(worker_id); 2364 G1CMIsAliveClosure g1_is_alive(_g1h); 2365 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2366 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2367 2368 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2369 } 2370 }; 2371 2372 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2373 assert(_workers != NULL, "Need parallel worker threads."); 2374 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2375 2376 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2377 2378 // We need to reset the concurrency level before each 2379 // proxy task execution, so that the termination protocol 2380 // and overflow handling in CMTask::do_marking_step() knows 2381 // how many workers to wait for. 2382 _cm->set_concurrency(_active_workers); 2383 _g1h->set_par_threads(_active_workers); 2384 _workers->run_task(&proc_task_proxy); 2385 _g1h->set_par_threads(0); 2386 } 2387 2388 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2389 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2390 EnqueueTask& _enq_task; 2391 2392 public: 2393 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2394 AbstractGangTask("Enqueue reference objects in parallel"), 2395 _enq_task(enq_task) { } 2396 2397 virtual void work(uint worker_id) { 2398 _enq_task.work(worker_id); 2399 } 2400 }; 2401 2402 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2403 assert(_workers != NULL, "Need parallel worker threads."); 2404 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2405 2406 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2407 2408 // Not strictly necessary but... 2409 // 2410 // We need to reset the concurrency level before each 2411 // proxy task execution, so that the termination protocol 2412 // and overflow handling in CMTask::do_marking_step() knows 2413 // how many workers to wait for. 2414 _cm->set_concurrency(_active_workers); 2415 _g1h->set_par_threads(_active_workers); 2416 _workers->run_task(&enq_task_proxy); 2417 _g1h->set_par_threads(0); 2418 } 2419 2420 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2421 if (has_overflown()) { 2422 // Skip processing the discovered references if we have 2423 // overflown the global marking stack. Reference objects 2424 // only get discovered once so it is OK to not 2425 // de-populate the discovered reference lists. We could have, 2426 // but the only benefit would be that, when marking restarts, 2427 // less reference objects are discovered. 2428 return; 2429 } 2430 2431 ResourceMark rm; 2432 HandleMark hm; 2433 2434 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2435 2436 // Is alive closure. 2437 G1CMIsAliveClosure g1_is_alive(g1h); 2438 2439 // Inner scope to exclude the cleaning of the string and symbol 2440 // tables from the displayed time. 2441 { 2442 if (G1Log::finer()) { 2443 gclog_or_tty->put(' '); 2444 } 2445 TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty); 2446 2447 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2448 2449 // See the comment in G1CollectedHeap::ref_processing_init() 2450 // about how reference processing currently works in G1. 2451 2452 // Set the soft reference policy 2453 rp->setup_policy(clear_all_soft_refs); 2454 assert(_markStack.isEmpty(), "mark stack should be empty"); 2455 2456 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2457 // in serial reference processing. Note these closures are also 2458 // used for serially processing (by the the current thread) the 2459 // JNI references during parallel reference processing. 2460 // 2461 // These closures do not need to synchronize with the worker 2462 // threads involved in parallel reference processing as these 2463 // instances are executed serially by the current thread (e.g. 2464 // reference processing is not multi-threaded and is thus 2465 // performed by the current thread instead of a gang worker). 2466 // 2467 // The gang tasks involved in parallel reference procssing create 2468 // their own instances of these closures, which do their own 2469 // synchronization among themselves. 2470 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2471 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2472 2473 // We need at least one active thread. If reference processing 2474 // is not multi-threaded we use the current (VMThread) thread, 2475 // otherwise we use the work gang from the G1CollectedHeap and 2476 // we utilize all the worker threads we can. 2477 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL; 2478 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2479 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2480 2481 // Parallel processing task executor. 2482 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2483 g1h->workers(), active_workers); 2484 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2485 2486 // Set the concurrency level. The phase was already set prior to 2487 // executing the remark task. 2488 set_concurrency(active_workers); 2489 2490 // Set the degree of MT processing here. If the discovery was done MT, 2491 // the number of threads involved during discovery could differ from 2492 // the number of active workers. This is OK as long as the discovered 2493 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2494 rp->set_active_mt_degree(active_workers); 2495 2496 // Process the weak references. 2497 rp->process_discovered_references(&g1_is_alive, 2498 &g1_keep_alive, 2499 &g1_drain_mark_stack, 2500 executor); 2501 2502 // The do_oop work routines of the keep_alive and drain_marking_stack 2503 // oop closures will set the has_overflown flag if we overflow the 2504 // global marking stack. 2505 2506 assert(_markStack.overflow() || _markStack.isEmpty(), 2507 "mark stack should be empty (unless it overflowed)"); 2508 2509 if (_markStack.overflow()) { 2510 // This should have been done already when we tried to push an 2511 // entry on to the global mark stack. But let's do it again. 2512 set_has_overflown(); 2513 } 2514 2515 assert(rp->num_q() == active_workers, "why not"); 2516 2517 rp->enqueue_discovered_references(executor); 2518 2519 rp->verify_no_references_recorded(); 2520 assert(!rp->discovery_enabled(), "Post condition"); 2521 } 2522 2523 // Now clean up stale oops in StringTable 2524 StringTable::unlink(&g1_is_alive); 2525 // Clean up unreferenced symbols in symbol table. 2526 SymbolTable::unlink(); 2527 } 2528 2529 void ConcurrentMark::swapMarkBitMaps() { 2530 CMBitMapRO* temp = _prevMarkBitMap; 2531 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2532 _nextMarkBitMap = (CMBitMap*) temp; 2533 } 2534 2535 class CMRemarkTask: public AbstractGangTask { 2536 private: 2537 ConcurrentMark* _cm; 2538 bool _is_serial; 2539 public: 2540 void work(uint worker_id) { 2541 // Since all available tasks are actually started, we should 2542 // only proceed if we're supposed to be actived. 2543 if (worker_id < _cm->active_tasks()) { 2544 CMTask* task = _cm->task(worker_id); 2545 task->record_start_time(); 2546 do { 2547 task->do_marking_step(1000000000.0 /* something very large */, 2548 true /* do_stealing */, 2549 true /* do_termination */, 2550 _is_serial); 2551 } while (task->has_aborted() && !_cm->has_overflown()); 2552 // If we overflow, then we do not want to restart. We instead 2553 // want to abort remark and do concurrent marking again. 2554 task->record_end_time(); 2555 } 2556 } 2557 2558 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) : 2559 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) { 2560 _cm->terminator()->reset_for_reuse(active_workers); 2561 } 2562 }; 2563 2564 void ConcurrentMark::checkpointRootsFinalWork() { 2565 ResourceMark rm; 2566 HandleMark hm; 2567 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2568 2569 g1h->ensure_parsability(false); 2570 2571 if (G1CollectedHeap::use_parallel_gc_threads()) { 2572 G1CollectedHeap::StrongRootsScope srs(g1h); 2573 // this is remark, so we'll use up all active threads 2574 uint active_workers = g1h->workers()->active_workers(); 2575 if (active_workers == 0) { 2576 assert(active_workers > 0, "Should have been set earlier"); 2577 active_workers = (uint) ParallelGCThreads; 2578 g1h->workers()->set_active_workers(active_workers); 2579 } 2580 set_concurrency_and_phase(active_workers, false /* concurrent */); 2581 // Leave _parallel_marking_threads at it's 2582 // value originally calculated in the ConcurrentMark 2583 // constructor and pass values of the active workers 2584 // through the gang in the task. 2585 2586 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */); 2587 // We will start all available threads, even if we decide that the 2588 // active_workers will be fewer. The extra ones will just bail out 2589 // immediately. 2590 g1h->set_par_threads(active_workers); 2591 g1h->workers()->run_task(&remarkTask); 2592 g1h->set_par_threads(0); 2593 } else { 2594 G1CollectedHeap::StrongRootsScope srs(g1h); 2595 uint active_workers = 1; 2596 set_concurrency_and_phase(active_workers, false /* concurrent */); 2597 2598 // Note - if there's no work gang then the VMThread will be 2599 // the thread to execute the remark - serially. We have 2600 // to pass true for the is_serial parameter so that 2601 // CMTask::do_marking_step() doesn't enter the sync 2602 // barriers in the event of an overflow. Doing so will 2603 // cause an assert that the current thread is not a 2604 // concurrent GC thread. 2605 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/); 2606 remarkTask.work(0); 2607 } 2608 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2609 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2610 2611 print_stats(); 2612 2613 #if VERIFY_OBJS_PROCESSED 2614 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2615 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2616 _scan_obj_cl.objs_processed, 2617 ThreadLocalObjQueue::objs_enqueued); 2618 guarantee(_scan_obj_cl.objs_processed == 2619 ThreadLocalObjQueue::objs_enqueued, 2620 "Different number of objs processed and enqueued."); 2621 } 2622 #endif 2623 } 2624 2625 #ifndef PRODUCT 2626 2627 class PrintReachableOopClosure: public OopClosure { 2628 private: 2629 G1CollectedHeap* _g1h; 2630 outputStream* _out; 2631 VerifyOption _vo; 2632 bool _all; 2633 2634 public: 2635 PrintReachableOopClosure(outputStream* out, 2636 VerifyOption vo, 2637 bool all) : 2638 _g1h(G1CollectedHeap::heap()), 2639 _out(out), _vo(vo), _all(all) { } 2640 2641 void do_oop(narrowOop* p) { do_oop_work(p); } 2642 void do_oop( oop* p) { do_oop_work(p); } 2643 2644 template <class T> void do_oop_work(T* p) { 2645 oop obj = oopDesc::load_decode_heap_oop(p); 2646 const char* str = NULL; 2647 const char* str2 = ""; 2648 2649 if (obj == NULL) { 2650 str = ""; 2651 } else if (!_g1h->is_in_g1_reserved(obj)) { 2652 str = " O"; 2653 } else { 2654 HeapRegion* hr = _g1h->heap_region_containing(obj); 2655 guarantee(hr != NULL, "invariant"); 2656 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2657 bool marked = _g1h->is_marked(obj, _vo); 2658 2659 if (over_tams) { 2660 str = " >"; 2661 if (marked) { 2662 str2 = " AND MARKED"; 2663 } 2664 } else if (marked) { 2665 str = " M"; 2666 } else { 2667 str = " NOT"; 2668 } 2669 } 2670 2671 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2672 p, (void*) obj, str, str2); 2673 } 2674 }; 2675 2676 class PrintReachableObjectClosure : public ObjectClosure { 2677 private: 2678 G1CollectedHeap* _g1h; 2679 outputStream* _out; 2680 VerifyOption _vo; 2681 bool _all; 2682 HeapRegion* _hr; 2683 2684 public: 2685 PrintReachableObjectClosure(outputStream* out, 2686 VerifyOption vo, 2687 bool all, 2688 HeapRegion* hr) : 2689 _g1h(G1CollectedHeap::heap()), 2690 _out(out), _vo(vo), _all(all), _hr(hr) { } 2691 2692 void do_object(oop o) { 2693 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2694 bool marked = _g1h->is_marked(o, _vo); 2695 bool print_it = _all || over_tams || marked; 2696 2697 if (print_it) { 2698 _out->print_cr(" "PTR_FORMAT"%s", 2699 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2700 PrintReachableOopClosure oopCl(_out, _vo, _all); 2701 o->oop_iterate_no_header(&oopCl); 2702 } 2703 } 2704 }; 2705 2706 class PrintReachableRegionClosure : public HeapRegionClosure { 2707 private: 2708 G1CollectedHeap* _g1h; 2709 outputStream* _out; 2710 VerifyOption _vo; 2711 bool _all; 2712 2713 public: 2714 bool doHeapRegion(HeapRegion* hr) { 2715 HeapWord* b = hr->bottom(); 2716 HeapWord* e = hr->end(); 2717 HeapWord* t = hr->top(); 2718 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2719 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2720 "TAMS: "PTR_FORMAT, b, e, t, p); 2721 _out->cr(); 2722 2723 HeapWord* from = b; 2724 HeapWord* to = t; 2725 2726 if (to > from) { 2727 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2728 _out->cr(); 2729 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2730 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2731 _out->cr(); 2732 } 2733 2734 return false; 2735 } 2736 2737 PrintReachableRegionClosure(outputStream* out, 2738 VerifyOption vo, 2739 bool all) : 2740 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2741 }; 2742 2743 void ConcurrentMark::print_reachable(const char* str, 2744 VerifyOption vo, 2745 bool all) { 2746 gclog_or_tty->cr(); 2747 gclog_or_tty->print_cr("== Doing heap dump... "); 2748 2749 if (G1PrintReachableBaseFile == NULL) { 2750 gclog_or_tty->print_cr(" #### error: no base file defined"); 2751 return; 2752 } 2753 2754 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2755 (JVM_MAXPATHLEN - 1)) { 2756 gclog_or_tty->print_cr(" #### error: file name too long"); 2757 return; 2758 } 2759 2760 char file_name[JVM_MAXPATHLEN]; 2761 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2762 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2763 2764 fileStream fout(file_name); 2765 if (!fout.is_open()) { 2766 gclog_or_tty->print_cr(" #### error: could not open file"); 2767 return; 2768 } 2769 2770 outputStream* out = &fout; 2771 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2772 out->cr(); 2773 2774 out->print_cr("--- ITERATING OVER REGIONS"); 2775 out->cr(); 2776 PrintReachableRegionClosure rcl(out, vo, all); 2777 _g1h->heap_region_iterate(&rcl); 2778 out->cr(); 2779 2780 gclog_or_tty->print_cr(" done"); 2781 gclog_or_tty->flush(); 2782 } 2783 2784 #endif // PRODUCT 2785 2786 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2787 // Note we are overriding the read-only view of the prev map here, via 2788 // the cast. 2789 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2790 } 2791 2792 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2793 _nextMarkBitMap->clearRange(mr); 2794 } 2795 2796 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2797 clearRangePrevBitmap(mr); 2798 clearRangeNextBitmap(mr); 2799 } 2800 2801 HeapRegion* 2802 ConcurrentMark::claim_region(uint worker_id) { 2803 // "checkpoint" the finger 2804 HeapWord* finger = _finger; 2805 2806 // _heap_end will not change underneath our feet; it only changes at 2807 // yield points. 2808 while (finger < _heap_end) { 2809 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2810 2811 // Note on how this code handles humongous regions. In the 2812 // normal case the finger will reach the start of a "starts 2813 // humongous" (SH) region. Its end will either be the end of the 2814 // last "continues humongous" (CH) region in the sequence, or the 2815 // standard end of the SH region (if the SH is the only region in 2816 // the sequence). That way claim_region() will skip over the CH 2817 // regions. However, there is a subtle race between a CM thread 2818 // executing this method and a mutator thread doing a humongous 2819 // object allocation. The two are not mutually exclusive as the CM 2820 // thread does not need to hold the Heap_lock when it gets 2821 // here. So there is a chance that claim_region() will come across 2822 // a free region that's in the progress of becoming a SH or a CH 2823 // region. In the former case, it will either 2824 // a) Miss the update to the region's end, in which case it will 2825 // visit every subsequent CH region, will find their bitmaps 2826 // empty, and do nothing, or 2827 // b) Will observe the update of the region's end (in which case 2828 // it will skip the subsequent CH regions). 2829 // If it comes across a region that suddenly becomes CH, the 2830 // scenario will be similar to b). So, the race between 2831 // claim_region() and a humongous object allocation might force us 2832 // to do a bit of unnecessary work (due to some unnecessary bitmap 2833 // iterations) but it should not introduce and correctness issues. 2834 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2835 HeapWord* bottom = curr_region->bottom(); 2836 HeapWord* end = curr_region->end(); 2837 HeapWord* limit = curr_region->next_top_at_mark_start(); 2838 2839 if (verbose_low()) { 2840 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" " 2841 "["PTR_FORMAT", "PTR_FORMAT"), " 2842 "limit = "PTR_FORMAT, 2843 worker_id, curr_region, bottom, end, limit); 2844 } 2845 2846 // Is the gap between reading the finger and doing the CAS too long? 2847 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2848 if (res == finger) { 2849 // we succeeded 2850 2851 // notice that _finger == end cannot be guaranteed here since, 2852 // someone else might have moved the finger even further 2853 assert(_finger >= end, "the finger should have moved forward"); 2854 2855 if (verbose_low()) { 2856 gclog_or_tty->print_cr("[%u] we were successful with region = " 2857 PTR_FORMAT, worker_id, curr_region); 2858 } 2859 2860 if (limit > bottom) { 2861 if (verbose_low()) { 2862 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, " 2863 "returning it ", worker_id, curr_region); 2864 } 2865 return curr_region; 2866 } else { 2867 assert(limit == bottom, 2868 "the region limit should be at bottom"); 2869 if (verbose_low()) { 2870 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, " 2871 "returning NULL", worker_id, curr_region); 2872 } 2873 // we return NULL and the caller should try calling 2874 // claim_region() again. 2875 return NULL; 2876 } 2877 } else { 2878 assert(_finger > finger, "the finger should have moved forward"); 2879 if (verbose_low()) { 2880 gclog_or_tty->print_cr("[%u] somebody else moved the finger, " 2881 "global finger = "PTR_FORMAT", " 2882 "our finger = "PTR_FORMAT, 2883 worker_id, _finger, finger); 2884 } 2885 2886 // read it again 2887 finger = _finger; 2888 } 2889 } 2890 2891 return NULL; 2892 } 2893 2894 #ifndef PRODUCT 2895 enum VerifyNoCSetOopsPhase { 2896 VerifyNoCSetOopsStack, 2897 VerifyNoCSetOopsQueues, 2898 VerifyNoCSetOopsSATBCompleted, 2899 VerifyNoCSetOopsSATBThread 2900 }; 2901 2902 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 2903 private: 2904 G1CollectedHeap* _g1h; 2905 VerifyNoCSetOopsPhase _phase; 2906 int _info; 2907 2908 const char* phase_str() { 2909 switch (_phase) { 2910 case VerifyNoCSetOopsStack: return "Stack"; 2911 case VerifyNoCSetOopsQueues: return "Queue"; 2912 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 2913 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 2914 default: ShouldNotReachHere(); 2915 } 2916 return NULL; 2917 } 2918 2919 void do_object_work(oop obj) { 2920 guarantee(!_g1h->obj_in_cs(obj), 2921 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 2922 (void*) obj, phase_str(), _info)); 2923 } 2924 2925 public: 2926 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 2927 2928 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 2929 _phase = phase; 2930 _info = info; 2931 } 2932 2933 virtual void do_oop(oop* p) { 2934 oop obj = oopDesc::load_decode_heap_oop(p); 2935 do_object_work(obj); 2936 } 2937 2938 virtual void do_oop(narrowOop* p) { 2939 // We should not come across narrow oops while scanning marking 2940 // stacks and SATB buffers. 2941 ShouldNotReachHere(); 2942 } 2943 2944 virtual void do_object(oop obj) { 2945 do_object_work(obj); 2946 } 2947 }; 2948 2949 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 2950 bool verify_enqueued_buffers, 2951 bool verify_thread_buffers, 2952 bool verify_fingers) { 2953 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2954 if (!G1CollectedHeap::heap()->mark_in_progress()) { 2955 return; 2956 } 2957 2958 VerifyNoCSetOopsClosure cl; 2959 2960 if (verify_stacks) { 2961 // Verify entries on the global mark stack 2962 cl.set_phase(VerifyNoCSetOopsStack); 2963 _markStack.oops_do(&cl); 2964 2965 // Verify entries on the task queues 2966 for (uint i = 0; i < _max_worker_id; i += 1) { 2967 cl.set_phase(VerifyNoCSetOopsQueues, i); 2968 CMTaskQueue* queue = _task_queues->queue(i); 2969 queue->oops_do(&cl); 2970 } 2971 } 2972 2973 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 2974 2975 // Verify entries on the enqueued SATB buffers 2976 if (verify_enqueued_buffers) { 2977 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 2978 satb_qs.iterate_completed_buffers_read_only(&cl); 2979 } 2980 2981 // Verify entries on the per-thread SATB buffers 2982 if (verify_thread_buffers) { 2983 cl.set_phase(VerifyNoCSetOopsSATBThread); 2984 satb_qs.iterate_thread_buffers_read_only(&cl); 2985 } 2986 2987 if (verify_fingers) { 2988 // Verify the global finger 2989 HeapWord* global_finger = finger(); 2990 if (global_finger != NULL && global_finger < _heap_end) { 2991 // The global finger always points to a heap region boundary. We 2992 // use heap_region_containing_raw() to get the containing region 2993 // given that the global finger could be pointing to a free region 2994 // which subsequently becomes continues humongous. If that 2995 // happens, heap_region_containing() will return the bottom of the 2996 // corresponding starts humongous region and the check below will 2997 // not hold any more. 2998 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 2999 guarantee(global_finger == global_hr->bottom(), 3000 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 3001 global_finger, HR_FORMAT_PARAMS(global_hr))); 3002 } 3003 3004 // Verify the task fingers 3005 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 3006 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 3007 CMTask* task = _tasks[i]; 3008 HeapWord* task_finger = task->finger(); 3009 if (task_finger != NULL && task_finger < _heap_end) { 3010 // See above note on the global finger verification. 3011 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 3012 guarantee(task_finger == task_hr->bottom() || 3013 !task_hr->in_collection_set(), 3014 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 3015 task_finger, HR_FORMAT_PARAMS(task_hr))); 3016 } 3017 } 3018 } 3019 } 3020 #endif // PRODUCT 3021 3022 // Aggregate the counting data that was constructed concurrently 3023 // with marking. 3024 class AggregateCountDataHRClosure: public HeapRegionClosure { 3025 G1CollectedHeap* _g1h; 3026 ConcurrentMark* _cm; 3027 CardTableModRefBS* _ct_bs; 3028 BitMap* _cm_card_bm; 3029 uint _max_worker_id; 3030 3031 public: 3032 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 3033 BitMap* cm_card_bm, 3034 uint max_worker_id) : 3035 _g1h(g1h), _cm(g1h->concurrent_mark()), 3036 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 3037 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 3038 3039 bool doHeapRegion(HeapRegion* hr) { 3040 if (hr->continuesHumongous()) { 3041 // We will ignore these here and process them when their 3042 // associated "starts humongous" region is processed. 3043 // Note that we cannot rely on their associated 3044 // "starts humongous" region to have their bit set to 1 3045 // since, due to the region chunking in the parallel region 3046 // iteration, a "continues humongous" region might be visited 3047 // before its associated "starts humongous". 3048 return false; 3049 } 3050 3051 HeapWord* start = hr->bottom(); 3052 HeapWord* limit = hr->next_top_at_mark_start(); 3053 HeapWord* end = hr->end(); 3054 3055 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 3056 err_msg("Preconditions not met - " 3057 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 3058 "top: "PTR_FORMAT", end: "PTR_FORMAT, 3059 start, limit, hr->top(), hr->end())); 3060 3061 assert(hr->next_marked_bytes() == 0, "Precondition"); 3062 3063 if (start == limit) { 3064 // NTAMS of this region has not been set so nothing to do. 3065 return false; 3066 } 3067 3068 // 'start' should be in the heap. 3069 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 3070 // 'end' *may* be just beyone the end of the heap (if hr is the last region) 3071 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 3072 3073 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 3074 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 3075 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 3076 3077 // If ntams is not card aligned then we bump card bitmap index 3078 // for limit so that we get the all the cards spanned by 3079 // the object ending at ntams. 3080 // Note: if this is the last region in the heap then ntams 3081 // could be actually just beyond the end of the the heap; 3082 // limit_idx will then correspond to a (non-existent) card 3083 // that is also outside the heap. 3084 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 3085 limit_idx += 1; 3086 } 3087 3088 assert(limit_idx <= end_idx, "or else use atomics"); 3089 3090 // Aggregate the "stripe" in the count data associated with hr. 3091 uint hrs_index = hr->hrs_index(); 3092 size_t marked_bytes = 0; 3093 3094 for (uint i = 0; i < _max_worker_id; i += 1) { 3095 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 3096 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 3097 3098 // Fetch the marked_bytes in this region for task i and 3099 // add it to the running total for this region. 3100 marked_bytes += marked_bytes_array[hrs_index]; 3101 3102 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 3103 // into the global card bitmap. 3104 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 3105 3106 while (scan_idx < limit_idx) { 3107 assert(task_card_bm->at(scan_idx) == true, "should be"); 3108 _cm_card_bm->set_bit(scan_idx); 3109 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 3110 3111 // BitMap::get_next_one_offset() can handle the case when 3112 // its left_offset parameter is greater than its right_offset 3113 // parameter. It does, however, have an early exit if 3114 // left_offset == right_offset. So let's limit the value 3115 // passed in for left offset here. 3116 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 3117 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 3118 } 3119 } 3120 3121 // Update the marked bytes for this region. 3122 hr->add_to_marked_bytes(marked_bytes); 3123 3124 // Next heap region 3125 return false; 3126 } 3127 }; 3128 3129 class G1AggregateCountDataTask: public AbstractGangTask { 3130 protected: 3131 G1CollectedHeap* _g1h; 3132 ConcurrentMark* _cm; 3133 BitMap* _cm_card_bm; 3134 uint _max_worker_id; 3135 int _active_workers; 3136 3137 public: 3138 G1AggregateCountDataTask(G1CollectedHeap* g1h, 3139 ConcurrentMark* cm, 3140 BitMap* cm_card_bm, 3141 uint max_worker_id, 3142 int n_workers) : 3143 AbstractGangTask("Count Aggregation"), 3144 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 3145 _max_worker_id(max_worker_id), 3146 _active_workers(n_workers) { } 3147 3148 void work(uint worker_id) { 3149 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 3150 3151 if (G1CollectedHeap::use_parallel_gc_threads()) { 3152 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 3153 _active_workers, 3154 HeapRegion::AggregateCountClaimValue); 3155 } else { 3156 _g1h->heap_region_iterate(&cl); 3157 } 3158 } 3159 }; 3160 3161 3162 void ConcurrentMark::aggregate_count_data() { 3163 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3164 _g1h->workers()->active_workers() : 3165 1); 3166 3167 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3168 _max_worker_id, n_workers); 3169 3170 if (G1CollectedHeap::use_parallel_gc_threads()) { 3171 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3172 "sanity check"); 3173 _g1h->set_par_threads(n_workers); 3174 _g1h->workers()->run_task(&g1_par_agg_task); 3175 _g1h->set_par_threads(0); 3176 3177 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3178 "sanity check"); 3179 _g1h->reset_heap_region_claim_values(); 3180 } else { 3181 g1_par_agg_task.work(0); 3182 } 3183 } 3184 3185 // Clear the per-worker arrays used to store the per-region counting data 3186 void ConcurrentMark::clear_all_count_data() { 3187 // Clear the global card bitmap - it will be filled during 3188 // liveness count aggregation (during remark) and the 3189 // final counting task. 3190 _card_bm.clear(); 3191 3192 // Clear the global region bitmap - it will be filled as part 3193 // of the final counting task. 3194 _region_bm.clear(); 3195 3196 uint max_regions = _g1h->max_regions(); 3197 assert(_max_worker_id > 0, "uninitialized"); 3198 3199 for (uint i = 0; i < _max_worker_id; i += 1) { 3200 BitMap* task_card_bm = count_card_bitmap_for(i); 3201 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3202 3203 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3204 assert(marked_bytes_array != NULL, "uninitialized"); 3205 3206 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3207 task_card_bm->clear(); 3208 } 3209 } 3210 3211 void ConcurrentMark::print_stats() { 3212 if (verbose_stats()) { 3213 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3214 for (size_t i = 0; i < _active_tasks; ++i) { 3215 _tasks[i]->print_stats(); 3216 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3217 } 3218 } 3219 } 3220 3221 // abandon current marking iteration due to a Full GC 3222 void ConcurrentMark::abort() { 3223 // Clear all marks to force marking thread to do nothing 3224 _nextMarkBitMap->clearAll(); 3225 // Clear the liveness counting data 3226 clear_all_count_data(); 3227 // Empty mark stack 3228 reset_marking_state(); 3229 for (uint i = 0; i < _max_worker_id; ++i) { 3230 _tasks[i]->clear_region_fields(); 3231 } 3232 _has_aborted = true; 3233 3234 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3235 satb_mq_set.abandon_partial_marking(); 3236 // This can be called either during or outside marking, we'll read 3237 // the expected_active value from the SATB queue set. 3238 satb_mq_set.set_active_all_threads( 3239 false, /* new active value */ 3240 satb_mq_set.is_active() /* expected_active */); 3241 } 3242 3243 static void print_ms_time_info(const char* prefix, const char* name, 3244 NumberSeq& ns) { 3245 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3246 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3247 if (ns.num() > 0) { 3248 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3249 prefix, ns.sd(), ns.maximum()); 3250 } 3251 } 3252 3253 void ConcurrentMark::print_summary_info() { 3254 gclog_or_tty->print_cr(" Concurrent marking:"); 3255 print_ms_time_info(" ", "init marks", _init_times); 3256 print_ms_time_info(" ", "remarks", _remark_times); 3257 { 3258 print_ms_time_info(" ", "final marks", _remark_mark_times); 3259 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3260 3261 } 3262 print_ms_time_info(" ", "cleanups", _cleanup_times); 3263 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3264 _total_counting_time, 3265 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3266 (double)_cleanup_times.num() 3267 : 0.0)); 3268 if (G1ScrubRemSets) { 3269 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3270 _total_rs_scrub_time, 3271 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3272 (double)_cleanup_times.num() 3273 : 0.0)); 3274 } 3275 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3276 (_init_times.sum() + _remark_times.sum() + 3277 _cleanup_times.sum())/1000.0); 3278 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3279 "(%8.2f s marking).", 3280 cmThread()->vtime_accum(), 3281 cmThread()->vtime_mark_accum()); 3282 } 3283 3284 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3285 if (use_parallel_marking_threads()) { 3286 _parallel_workers->print_worker_threads_on(st); 3287 } 3288 } 3289 3290 // We take a break if someone is trying to stop the world. 3291 bool ConcurrentMark::do_yield_check(uint worker_id) { 3292 if (should_yield()) { 3293 if (worker_id == 0) { 3294 _g1h->g1_policy()->record_concurrent_pause(); 3295 } 3296 cmThread()->yield(); 3297 return true; 3298 } else { 3299 return false; 3300 } 3301 } 3302 3303 bool ConcurrentMark::should_yield() { 3304 return cmThread()->should_yield(); 3305 } 3306 3307 bool ConcurrentMark::containing_card_is_marked(void* p) { 3308 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3309 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3310 } 3311 3312 bool ConcurrentMark::containing_cards_are_marked(void* start, 3313 void* last) { 3314 return containing_card_is_marked(start) && 3315 containing_card_is_marked(last); 3316 } 3317 3318 #ifndef PRODUCT 3319 // for debugging purposes 3320 void ConcurrentMark::print_finger() { 3321 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3322 _heap_start, _heap_end, _finger); 3323 for (uint i = 0; i < _max_worker_id; ++i) { 3324 gclog_or_tty->print(" %u: "PTR_FORMAT, i, _tasks[i]->finger()); 3325 } 3326 gclog_or_tty->print_cr(""); 3327 } 3328 #endif 3329 3330 void CMTask::scan_object(oop obj) { 3331 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3332 3333 if (_cm->verbose_high()) { 3334 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT, 3335 _worker_id, (void*) obj); 3336 } 3337 3338 size_t obj_size = obj->size(); 3339 _words_scanned += obj_size; 3340 3341 obj->oop_iterate(_cm_oop_closure); 3342 statsOnly( ++_objs_scanned ); 3343 check_limits(); 3344 } 3345 3346 // Closure for iteration over bitmaps 3347 class CMBitMapClosure : public BitMapClosure { 3348 private: 3349 // the bitmap that is being iterated over 3350 CMBitMap* _nextMarkBitMap; 3351 ConcurrentMark* _cm; 3352 CMTask* _task; 3353 3354 public: 3355 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3356 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3357 3358 bool do_bit(size_t offset) { 3359 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3360 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3361 assert( addr < _cm->finger(), "invariant"); 3362 3363 statsOnly( _task->increase_objs_found_on_bitmap() ); 3364 assert(addr >= _task->finger(), "invariant"); 3365 3366 // We move that task's local finger along. 3367 _task->move_finger_to(addr); 3368 3369 _task->scan_object(oop(addr)); 3370 // we only partially drain the local queue and global stack 3371 _task->drain_local_queue(true); 3372 _task->drain_global_stack(true); 3373 3374 // if the has_aborted flag has been raised, we need to bail out of 3375 // the iteration 3376 return !_task->has_aborted(); 3377 } 3378 }; 3379 3380 // Closure for iterating over objects, currently only used for 3381 // processing SATB buffers. 3382 class CMObjectClosure : public ObjectClosure { 3383 private: 3384 CMTask* _task; 3385 3386 public: 3387 void do_object(oop obj) { 3388 _task->deal_with_reference(obj); 3389 } 3390 3391 CMObjectClosure(CMTask* task) : _task(task) { } 3392 }; 3393 3394 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3395 ConcurrentMark* cm, 3396 CMTask* task) 3397 : _g1h(g1h), _cm(cm), _task(task) { 3398 assert(_ref_processor == NULL, "should be initialized to NULL"); 3399 3400 if (G1UseConcMarkReferenceProcessing) { 3401 _ref_processor = g1h->ref_processor_cm(); 3402 assert(_ref_processor != NULL, "should not be NULL"); 3403 } 3404 } 3405 3406 void CMTask::setup_for_region(HeapRegion* hr) { 3407 // Separated the asserts so that we know which one fires. 3408 assert(hr != NULL, 3409 "claim_region() should have filtered out continues humongous regions"); 3410 assert(!hr->continuesHumongous(), 3411 "claim_region() should have filtered out continues humongous regions"); 3412 3413 if (_cm->verbose_low()) { 3414 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT, 3415 _worker_id, hr); 3416 } 3417 3418 _curr_region = hr; 3419 _finger = hr->bottom(); 3420 update_region_limit(); 3421 } 3422 3423 void CMTask::update_region_limit() { 3424 HeapRegion* hr = _curr_region; 3425 HeapWord* bottom = hr->bottom(); 3426 HeapWord* limit = hr->next_top_at_mark_start(); 3427 3428 if (limit == bottom) { 3429 if (_cm->verbose_low()) { 3430 gclog_or_tty->print_cr("[%u] found an empty region " 3431 "["PTR_FORMAT", "PTR_FORMAT")", 3432 _worker_id, bottom, limit); 3433 } 3434 // The region was collected underneath our feet. 3435 // We set the finger to bottom to ensure that the bitmap 3436 // iteration that will follow this will not do anything. 3437 // (this is not a condition that holds when we set the region up, 3438 // as the region is not supposed to be empty in the first place) 3439 _finger = bottom; 3440 } else if (limit >= _region_limit) { 3441 assert(limit >= _finger, "peace of mind"); 3442 } else { 3443 assert(limit < _region_limit, "only way to get here"); 3444 // This can happen under some pretty unusual circumstances. An 3445 // evacuation pause empties the region underneath our feet (NTAMS 3446 // at bottom). We then do some allocation in the region (NTAMS 3447 // stays at bottom), followed by the region being used as a GC 3448 // alloc region (NTAMS will move to top() and the objects 3449 // originally below it will be grayed). All objects now marked in 3450 // the region are explicitly grayed, if below the global finger, 3451 // and we do not need in fact to scan anything else. So, we simply 3452 // set _finger to be limit to ensure that the bitmap iteration 3453 // doesn't do anything. 3454 _finger = limit; 3455 } 3456 3457 _region_limit = limit; 3458 } 3459 3460 void CMTask::giveup_current_region() { 3461 assert(_curr_region != NULL, "invariant"); 3462 if (_cm->verbose_low()) { 3463 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT, 3464 _worker_id, _curr_region); 3465 } 3466 clear_region_fields(); 3467 } 3468 3469 void CMTask::clear_region_fields() { 3470 // Values for these three fields that indicate that we're not 3471 // holding on to a region. 3472 _curr_region = NULL; 3473 _finger = NULL; 3474 _region_limit = NULL; 3475 } 3476 3477 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3478 if (cm_oop_closure == NULL) { 3479 assert(_cm_oop_closure != NULL, "invariant"); 3480 } else { 3481 assert(_cm_oop_closure == NULL, "invariant"); 3482 } 3483 _cm_oop_closure = cm_oop_closure; 3484 } 3485 3486 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3487 guarantee(nextMarkBitMap != NULL, "invariant"); 3488 3489 if (_cm->verbose_low()) { 3490 gclog_or_tty->print_cr("[%u] resetting", _worker_id); 3491 } 3492 3493 _nextMarkBitMap = nextMarkBitMap; 3494 clear_region_fields(); 3495 3496 _calls = 0; 3497 _elapsed_time_ms = 0.0; 3498 _termination_time_ms = 0.0; 3499 _termination_start_time_ms = 0.0; 3500 3501 #if _MARKING_STATS_ 3502 _local_pushes = 0; 3503 _local_pops = 0; 3504 _local_max_size = 0; 3505 _objs_scanned = 0; 3506 _global_pushes = 0; 3507 _global_pops = 0; 3508 _global_max_size = 0; 3509 _global_transfers_to = 0; 3510 _global_transfers_from = 0; 3511 _regions_claimed = 0; 3512 _objs_found_on_bitmap = 0; 3513 _satb_buffers_processed = 0; 3514 _steal_attempts = 0; 3515 _steals = 0; 3516 _aborted = 0; 3517 _aborted_overflow = 0; 3518 _aborted_cm_aborted = 0; 3519 _aborted_yield = 0; 3520 _aborted_timed_out = 0; 3521 _aborted_satb = 0; 3522 _aborted_termination = 0; 3523 #endif // _MARKING_STATS_ 3524 } 3525 3526 bool CMTask::should_exit_termination() { 3527 regular_clock_call(); 3528 // This is called when we are in the termination protocol. We should 3529 // quit if, for some reason, this task wants to abort or the global 3530 // stack is not empty (this means that we can get work from it). 3531 return !_cm->mark_stack_empty() || has_aborted(); 3532 } 3533 3534 void CMTask::reached_limit() { 3535 assert(_words_scanned >= _words_scanned_limit || 3536 _refs_reached >= _refs_reached_limit , 3537 "shouldn't have been called otherwise"); 3538 regular_clock_call(); 3539 } 3540 3541 void CMTask::regular_clock_call() { 3542 if (has_aborted()) return; 3543 3544 // First, we need to recalculate the words scanned and refs reached 3545 // limits for the next clock call. 3546 recalculate_limits(); 3547 3548 // During the regular clock call we do the following 3549 3550 // (1) If an overflow has been flagged, then we abort. 3551 if (_cm->has_overflown()) { 3552 set_has_aborted(); 3553 return; 3554 } 3555 3556 // If we are not concurrent (i.e. we're doing remark) we don't need 3557 // to check anything else. The other steps are only needed during 3558 // the concurrent marking phase. 3559 if (!concurrent()) return; 3560 3561 // (2) If marking has been aborted for Full GC, then we also abort. 3562 if (_cm->has_aborted()) { 3563 set_has_aborted(); 3564 statsOnly( ++_aborted_cm_aborted ); 3565 return; 3566 } 3567 3568 double curr_time_ms = os::elapsedVTime() * 1000.0; 3569 3570 // (3) If marking stats are enabled, then we update the step history. 3571 #if _MARKING_STATS_ 3572 if (_words_scanned >= _words_scanned_limit) { 3573 ++_clock_due_to_scanning; 3574 } 3575 if (_refs_reached >= _refs_reached_limit) { 3576 ++_clock_due_to_marking; 3577 } 3578 3579 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3580 _interval_start_time_ms = curr_time_ms; 3581 _all_clock_intervals_ms.add(last_interval_ms); 3582 3583 if (_cm->verbose_medium()) { 3584 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, " 3585 "scanned = %d%s, refs reached = %d%s", 3586 _worker_id, last_interval_ms, 3587 _words_scanned, 3588 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3589 _refs_reached, 3590 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3591 } 3592 #endif // _MARKING_STATS_ 3593 3594 // (4) We check whether we should yield. If we have to, then we abort. 3595 if (_cm->should_yield()) { 3596 // We should yield. To do this we abort the task. The caller is 3597 // responsible for yielding. 3598 set_has_aborted(); 3599 statsOnly( ++_aborted_yield ); 3600 return; 3601 } 3602 3603 // (5) We check whether we've reached our time quota. If we have, 3604 // then we abort. 3605 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3606 if (elapsed_time_ms > _time_target_ms) { 3607 set_has_aborted(); 3608 _has_timed_out = true; 3609 statsOnly( ++_aborted_timed_out ); 3610 return; 3611 } 3612 3613 // (6) Finally, we check whether there are enough completed STAB 3614 // buffers available for processing. If there are, we abort. 3615 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3616 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3617 if (_cm->verbose_low()) { 3618 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers", 3619 _worker_id); 3620 } 3621 // we do need to process SATB buffers, we'll abort and restart 3622 // the marking task to do so 3623 set_has_aborted(); 3624 statsOnly( ++_aborted_satb ); 3625 return; 3626 } 3627 } 3628 3629 void CMTask::recalculate_limits() { 3630 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3631 _words_scanned_limit = _real_words_scanned_limit; 3632 3633 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3634 _refs_reached_limit = _real_refs_reached_limit; 3635 } 3636 3637 void CMTask::decrease_limits() { 3638 // This is called when we believe that we're going to do an infrequent 3639 // operation which will increase the per byte scanned cost (i.e. move 3640 // entries to/from the global stack). It basically tries to decrease the 3641 // scanning limit so that the clock is called earlier. 3642 3643 if (_cm->verbose_medium()) { 3644 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id); 3645 } 3646 3647 _words_scanned_limit = _real_words_scanned_limit - 3648 3 * words_scanned_period / 4; 3649 _refs_reached_limit = _real_refs_reached_limit - 3650 3 * refs_reached_period / 4; 3651 } 3652 3653 void CMTask::move_entries_to_global_stack() { 3654 // local array where we'll store the entries that will be popped 3655 // from the local queue 3656 oop buffer[global_stack_transfer_size]; 3657 3658 int n = 0; 3659 oop obj; 3660 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3661 buffer[n] = obj; 3662 ++n; 3663 } 3664 3665 if (n > 0) { 3666 // we popped at least one entry from the local queue 3667 3668 statsOnly( ++_global_transfers_to; _local_pops += n ); 3669 3670 if (!_cm->mark_stack_push(buffer, n)) { 3671 if (_cm->verbose_low()) { 3672 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow", 3673 _worker_id); 3674 } 3675 set_has_aborted(); 3676 } else { 3677 // the transfer was successful 3678 3679 if (_cm->verbose_medium()) { 3680 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack", 3681 _worker_id, n); 3682 } 3683 statsOnly( int tmp_size = _cm->mark_stack_size(); 3684 if (tmp_size > _global_max_size) { 3685 _global_max_size = tmp_size; 3686 } 3687 _global_pushes += n ); 3688 } 3689 } 3690 3691 // this operation was quite expensive, so decrease the limits 3692 decrease_limits(); 3693 } 3694 3695 void CMTask::get_entries_from_global_stack() { 3696 // local array where we'll store the entries that will be popped 3697 // from the global stack. 3698 oop buffer[global_stack_transfer_size]; 3699 int n; 3700 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3701 assert(n <= global_stack_transfer_size, 3702 "we should not pop more than the given limit"); 3703 if (n > 0) { 3704 // yes, we did actually pop at least one entry 3705 3706 statsOnly( ++_global_transfers_from; _global_pops += n ); 3707 if (_cm->verbose_medium()) { 3708 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack", 3709 _worker_id, n); 3710 } 3711 for (int i = 0; i < n; ++i) { 3712 bool success = _task_queue->push(buffer[i]); 3713 // We only call this when the local queue is empty or under a 3714 // given target limit. So, we do not expect this push to fail. 3715 assert(success, "invariant"); 3716 } 3717 3718 statsOnly( int tmp_size = _task_queue->size(); 3719 if (tmp_size > _local_max_size) { 3720 _local_max_size = tmp_size; 3721 } 3722 _local_pushes += n ); 3723 } 3724 3725 // this operation was quite expensive, so decrease the limits 3726 decrease_limits(); 3727 } 3728 3729 void CMTask::drain_local_queue(bool partially) { 3730 if (has_aborted()) return; 3731 3732 // Decide what the target size is, depending whether we're going to 3733 // drain it partially (so that other tasks can steal if they run out 3734 // of things to do) or totally (at the very end). 3735 size_t target_size; 3736 if (partially) { 3737 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3738 } else { 3739 target_size = 0; 3740 } 3741 3742 if (_task_queue->size() > target_size) { 3743 if (_cm->verbose_high()) { 3744 gclog_or_tty->print_cr("[%u] draining local queue, target size = %d", 3745 _worker_id, target_size); 3746 } 3747 3748 oop obj; 3749 bool ret = _task_queue->pop_local(obj); 3750 while (ret) { 3751 statsOnly( ++_local_pops ); 3752 3753 if (_cm->verbose_high()) { 3754 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id, 3755 (void*) obj); 3756 } 3757 3758 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3759 assert(!_g1h->is_on_master_free_list( 3760 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3761 3762 scan_object(obj); 3763 3764 if (_task_queue->size() <= target_size || has_aborted()) { 3765 ret = false; 3766 } else { 3767 ret = _task_queue->pop_local(obj); 3768 } 3769 } 3770 3771 if (_cm->verbose_high()) { 3772 gclog_or_tty->print_cr("[%u] drained local queue, size = %d", 3773 _worker_id, _task_queue->size()); 3774 } 3775 } 3776 } 3777 3778 void CMTask::drain_global_stack(bool partially) { 3779 if (has_aborted()) return; 3780 3781 // We have a policy to drain the local queue before we attempt to 3782 // drain the global stack. 3783 assert(partially || _task_queue->size() == 0, "invariant"); 3784 3785 // Decide what the target size is, depending whether we're going to 3786 // drain it partially (so that other tasks can steal if they run out 3787 // of things to do) or totally (at the very end). Notice that, 3788 // because we move entries from the global stack in chunks or 3789 // because another task might be doing the same, we might in fact 3790 // drop below the target. But, this is not a problem. 3791 size_t target_size; 3792 if (partially) { 3793 target_size = _cm->partial_mark_stack_size_target(); 3794 } else { 3795 target_size = 0; 3796 } 3797 3798 if (_cm->mark_stack_size() > target_size) { 3799 if (_cm->verbose_low()) { 3800 gclog_or_tty->print_cr("[%u] draining global_stack, target size %d", 3801 _worker_id, target_size); 3802 } 3803 3804 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3805 get_entries_from_global_stack(); 3806 drain_local_queue(partially); 3807 } 3808 3809 if (_cm->verbose_low()) { 3810 gclog_or_tty->print_cr("[%u] drained global stack, size = %d", 3811 _worker_id, _cm->mark_stack_size()); 3812 } 3813 } 3814 } 3815 3816 // SATB Queue has several assumptions on whether to call the par or 3817 // non-par versions of the methods. this is why some of the code is 3818 // replicated. We should really get rid of the single-threaded version 3819 // of the code to simplify things. 3820 void CMTask::drain_satb_buffers() { 3821 if (has_aborted()) return; 3822 3823 // We set this so that the regular clock knows that we're in the 3824 // middle of draining buffers and doesn't set the abort flag when it 3825 // notices that SATB buffers are available for draining. It'd be 3826 // very counter productive if it did that. :-) 3827 _draining_satb_buffers = true; 3828 3829 CMObjectClosure oc(this); 3830 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3831 if (G1CollectedHeap::use_parallel_gc_threads()) { 3832 satb_mq_set.set_par_closure(_worker_id, &oc); 3833 } else { 3834 satb_mq_set.set_closure(&oc); 3835 } 3836 3837 // This keeps claiming and applying the closure to completed buffers 3838 // until we run out of buffers or we need to abort. 3839 if (G1CollectedHeap::use_parallel_gc_threads()) { 3840 while (!has_aborted() && 3841 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) { 3842 if (_cm->verbose_medium()) { 3843 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3844 } 3845 statsOnly( ++_satb_buffers_processed ); 3846 regular_clock_call(); 3847 } 3848 } else { 3849 while (!has_aborted() && 3850 satb_mq_set.apply_closure_to_completed_buffer()) { 3851 if (_cm->verbose_medium()) { 3852 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3853 } 3854 statsOnly( ++_satb_buffers_processed ); 3855 regular_clock_call(); 3856 } 3857 } 3858 3859 if (!concurrent() && !has_aborted()) { 3860 // We should only do this during remark. 3861 if (G1CollectedHeap::use_parallel_gc_threads()) { 3862 satb_mq_set.par_iterate_closure_all_threads(_worker_id); 3863 } else { 3864 satb_mq_set.iterate_closure_all_threads(); 3865 } 3866 } 3867 3868 _draining_satb_buffers = false; 3869 3870 assert(has_aborted() || 3871 concurrent() || 3872 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3873 3874 if (G1CollectedHeap::use_parallel_gc_threads()) { 3875 satb_mq_set.set_par_closure(_worker_id, NULL); 3876 } else { 3877 satb_mq_set.set_closure(NULL); 3878 } 3879 3880 // again, this was a potentially expensive operation, decrease the 3881 // limits to get the regular clock call early 3882 decrease_limits(); 3883 } 3884 3885 void CMTask::print_stats() { 3886 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 3887 _worker_id, _calls); 3888 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3889 _elapsed_time_ms, _termination_time_ms); 3890 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3891 _step_times_ms.num(), _step_times_ms.avg(), 3892 _step_times_ms.sd()); 3893 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3894 _step_times_ms.maximum(), _step_times_ms.sum()); 3895 3896 #if _MARKING_STATS_ 3897 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3898 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3899 _all_clock_intervals_ms.sd()); 3900 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3901 _all_clock_intervals_ms.maximum(), 3902 _all_clock_intervals_ms.sum()); 3903 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3904 _clock_due_to_scanning, _clock_due_to_marking); 3905 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3906 _objs_scanned, _objs_found_on_bitmap); 3907 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3908 _local_pushes, _local_pops, _local_max_size); 3909 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3910 _global_pushes, _global_pops, _global_max_size); 3911 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3912 _global_transfers_to,_global_transfers_from); 3913 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 3914 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3915 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3916 _steal_attempts, _steals); 3917 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3918 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3919 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3920 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3921 _aborted_timed_out, _aborted_satb, _aborted_termination); 3922 #endif // _MARKING_STATS_ 3923 } 3924 3925 /***************************************************************************** 3926 3927 The do_marking_step(time_target_ms, ...) method is the building 3928 block of the parallel marking framework. It can be called in parallel 3929 with other invocations of do_marking_step() on different tasks 3930 (but only one per task, obviously) and concurrently with the 3931 mutator threads, or during remark, hence it eliminates the need 3932 for two versions of the code. When called during remark, it will 3933 pick up from where the task left off during the concurrent marking 3934 phase. Interestingly, tasks are also claimable during evacuation 3935 pauses too, since do_marking_step() ensures that it aborts before 3936 it needs to yield. 3937 3938 The data structures that it uses to do marking work are the 3939 following: 3940 3941 (1) Marking Bitmap. If there are gray objects that appear only 3942 on the bitmap (this happens either when dealing with an overflow 3943 or when the initial marking phase has simply marked the roots 3944 and didn't push them on the stack), then tasks claim heap 3945 regions whose bitmap they then scan to find gray objects. A 3946 global finger indicates where the end of the last claimed region 3947 is. A local finger indicates how far into the region a task has 3948 scanned. The two fingers are used to determine how to gray an 3949 object (i.e. whether simply marking it is OK, as it will be 3950 visited by a task in the future, or whether it needs to be also 3951 pushed on a stack). 3952 3953 (2) Local Queue. The local queue of the task which is accessed 3954 reasonably efficiently by the task. Other tasks can steal from 3955 it when they run out of work. Throughout the marking phase, a 3956 task attempts to keep its local queue short but not totally 3957 empty, so that entries are available for stealing by other 3958 tasks. Only when there is no more work, a task will totally 3959 drain its local queue. 3960 3961 (3) Global Mark Stack. This handles local queue overflow. During 3962 marking only sets of entries are moved between it and the local 3963 queues, as access to it requires a mutex and more fine-grain 3964 interaction with it which might cause contention. If it 3965 overflows, then the marking phase should restart and iterate 3966 over the bitmap to identify gray objects. Throughout the marking 3967 phase, tasks attempt to keep the global mark stack at a small 3968 length but not totally empty, so that entries are available for 3969 popping by other tasks. Only when there is no more work, tasks 3970 will totally drain the global mark stack. 3971 3972 (4) SATB Buffer Queue. This is where completed SATB buffers are 3973 made available. Buffers are regularly removed from this queue 3974 and scanned for roots, so that the queue doesn't get too 3975 long. During remark, all completed buffers are processed, as 3976 well as the filled in parts of any uncompleted buffers. 3977 3978 The do_marking_step() method tries to abort when the time target 3979 has been reached. There are a few other cases when the 3980 do_marking_step() method also aborts: 3981 3982 (1) When the marking phase has been aborted (after a Full GC). 3983 3984 (2) When a global overflow (on the global stack) has been 3985 triggered. Before the task aborts, it will actually sync up with 3986 the other tasks to ensure that all the marking data structures 3987 (local queues, stacks, fingers etc.) are re-initialized so that 3988 when do_marking_step() completes, the marking phase can 3989 immediately restart. 3990 3991 (3) When enough completed SATB buffers are available. The 3992 do_marking_step() method only tries to drain SATB buffers right 3993 at the beginning. So, if enough buffers are available, the 3994 marking step aborts and the SATB buffers are processed at 3995 the beginning of the next invocation. 3996 3997 (4) To yield. when we have to yield then we abort and yield 3998 right at the end of do_marking_step(). This saves us from a lot 3999 of hassle as, by yielding we might allow a Full GC. If this 4000 happens then objects will be compacted underneath our feet, the 4001 heap might shrink, etc. We save checking for this by just 4002 aborting and doing the yield right at the end. 4003 4004 From the above it follows that the do_marking_step() method should 4005 be called in a loop (or, otherwise, regularly) until it completes. 4006 4007 If a marking step completes without its has_aborted() flag being 4008 true, it means it has completed the current marking phase (and 4009 also all other marking tasks have done so and have all synced up). 4010 4011 A method called regular_clock_call() is invoked "regularly" (in 4012 sub ms intervals) throughout marking. It is this clock method that 4013 checks all the abort conditions which were mentioned above and 4014 decides when the task should abort. A work-based scheme is used to 4015 trigger this clock method: when the number of object words the 4016 marking phase has scanned or the number of references the marking 4017 phase has visited reach a given limit. Additional invocations to 4018 the method clock have been planted in a few other strategic places 4019 too. The initial reason for the clock method was to avoid calling 4020 vtime too regularly, as it is quite expensive. So, once it was in 4021 place, it was natural to piggy-back all the other conditions on it 4022 too and not constantly check them throughout the code. 4023 4024 If do_stealing is true then do_marking_step will attempt to steal 4025 work from the other CMTasks. It only makes sense to enable 4026 stealing when being called by multiple threads. 4027 4028 If do_termination is true then do_marking_step will enter its 4029 termination protocol. 4030 4031 The value of is_serial must be true when do_marking_step is being 4032 called serially (i.e. by the VMThread) and do_marking_step should 4033 skip any synchronization in the termination and overflow code. 4034 Examples include the serial remark code and the serial reference 4035 processing closures. 4036 4037 The value of is_serial must be false when do_marking_step is 4038 being called by any of the worker threads in a work gang. 4039 Examples include the concurrent marking code (CMMarkingTask), 4040 the MT remark code, and the MT reference processing closures. 4041 4042 *****************************************************************************/ 4043 4044 void CMTask::do_marking_step(double time_target_ms, 4045 bool do_stealing, 4046 bool do_termination, 4047 bool is_serial) { 4048 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 4049 assert(concurrent() == _cm->concurrent(), "they should be the same"); 4050 4051 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4052 assert(_task_queues != NULL, "invariant"); 4053 assert(_task_queue != NULL, "invariant"); 4054 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 4055 4056 assert(!_claimed, 4057 "only one thread should claim this task at any one time"); 4058 4059 // OK, this doesn't safeguard again all possible scenarios, as it is 4060 // possible for two threads to set the _claimed flag at the same 4061 // time. But it is only for debugging purposes anyway and it will 4062 // catch most problems. 4063 _claimed = true; 4064 4065 _start_time_ms = os::elapsedVTime() * 1000.0; 4066 statsOnly( _interval_start_time_ms = _start_time_ms ); 4067 4068 double diff_prediction_ms = 4069 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4070 _time_target_ms = time_target_ms - diff_prediction_ms; 4071 4072 // set up the variables that are used in the work-based scheme to 4073 // call the regular clock method 4074 _words_scanned = 0; 4075 _refs_reached = 0; 4076 recalculate_limits(); 4077 4078 // clear all flags 4079 clear_has_aborted(); 4080 _has_timed_out = false; 4081 _draining_satb_buffers = false; 4082 4083 ++_calls; 4084 4085 if (_cm->verbose_low()) { 4086 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, " 4087 "target = %1.2lfms >>>>>>>>>>", 4088 _worker_id, _calls, _time_target_ms); 4089 } 4090 4091 // Set up the bitmap and oop closures. Anything that uses them is 4092 // eventually called from this method, so it is OK to allocate these 4093 // statically. 4094 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4095 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 4096 set_cm_oop_closure(&cm_oop_closure); 4097 4098 if (_cm->has_overflown()) { 4099 // This can happen if the mark stack overflows during a GC pause 4100 // and this task, after a yield point, restarts. We have to abort 4101 // as we need to get into the overflow protocol which happens 4102 // right at the end of this task. 4103 set_has_aborted(); 4104 } 4105 4106 // First drain any available SATB buffers. After this, we will not 4107 // look at SATB buffers before the next invocation of this method. 4108 // If enough completed SATB buffers are queued up, the regular clock 4109 // will abort this task so that it restarts. 4110 drain_satb_buffers(); 4111 // ...then partially drain the local queue and the global stack 4112 drain_local_queue(true); 4113 drain_global_stack(true); 4114 4115 do { 4116 if (!has_aborted() && _curr_region != NULL) { 4117 // This means that we're already holding on to a region. 4118 assert(_finger != NULL, "if region is not NULL, then the finger " 4119 "should not be NULL either"); 4120 4121 // We might have restarted this task after an evacuation pause 4122 // which might have evacuated the region we're holding on to 4123 // underneath our feet. Let's read its limit again to make sure 4124 // that we do not iterate over a region of the heap that 4125 // contains garbage (update_region_limit() will also move 4126 // _finger to the start of the region if it is found empty). 4127 update_region_limit(); 4128 // We will start from _finger not from the start of the region, 4129 // as we might be restarting this task after aborting half-way 4130 // through scanning this region. In this case, _finger points to 4131 // the address where we last found a marked object. If this is a 4132 // fresh region, _finger points to start(). 4133 MemRegion mr = MemRegion(_finger, _region_limit); 4134 4135 if (_cm->verbose_low()) { 4136 gclog_or_tty->print_cr("[%u] we're scanning part " 4137 "["PTR_FORMAT", "PTR_FORMAT") " 4138 "of region "HR_FORMAT, 4139 _worker_id, _finger, _region_limit, 4140 HR_FORMAT_PARAMS(_curr_region)); 4141 } 4142 4143 assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(), 4144 "humongous regions should go around loop once only"); 4145 4146 // Some special cases: 4147 // If the memory region is empty, we can just give up the region. 4148 // If the current region is humongous then we only need to check 4149 // the bitmap for the bit associated with the start of the object, 4150 // scan the object if it's live, and give up the region. 4151 // Otherwise, let's iterate over the bitmap of the part of the region 4152 // that is left. 4153 // If the iteration is successful, give up the region. 4154 if (mr.is_empty()) { 4155 giveup_current_region(); 4156 regular_clock_call(); 4157 } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) { 4158 if (_nextMarkBitMap->isMarked(mr.start())) { 4159 // The object is marked - apply the closure 4160 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 4161 bitmap_closure.do_bit(offset); 4162 } 4163 // Even if this task aborted while scanning the humongous object 4164 // we can (and should) give up the current region. 4165 giveup_current_region(); 4166 regular_clock_call(); 4167 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4168 giveup_current_region(); 4169 regular_clock_call(); 4170 } else { 4171 assert(has_aborted(), "currently the only way to do so"); 4172 // The only way to abort the bitmap iteration is to return 4173 // false from the do_bit() method. However, inside the 4174 // do_bit() method we move the _finger to point to the 4175 // object currently being looked at. So, if we bail out, we 4176 // have definitely set _finger to something non-null. 4177 assert(_finger != NULL, "invariant"); 4178 4179 // Region iteration was actually aborted. So now _finger 4180 // points to the address of the object we last scanned. If we 4181 // leave it there, when we restart this task, we will rescan 4182 // the object. It is easy to avoid this. We move the finger by 4183 // enough to point to the next possible object header (the 4184 // bitmap knows by how much we need to move it as it knows its 4185 // granularity). 4186 assert(_finger < _region_limit, "invariant"); 4187 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 4188 // Check if bitmap iteration was aborted while scanning the last object 4189 if (new_finger >= _region_limit) { 4190 giveup_current_region(); 4191 } else { 4192 move_finger_to(new_finger); 4193 } 4194 } 4195 } 4196 // At this point we have either completed iterating over the 4197 // region we were holding on to, or we have aborted. 4198 4199 // We then partially drain the local queue and the global stack. 4200 // (Do we really need this?) 4201 drain_local_queue(true); 4202 drain_global_stack(true); 4203 4204 // Read the note on the claim_region() method on why it might 4205 // return NULL with potentially more regions available for 4206 // claiming and why we have to check out_of_regions() to determine 4207 // whether we're done or not. 4208 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4209 // We are going to try to claim a new region. We should have 4210 // given up on the previous one. 4211 // Separated the asserts so that we know which one fires. 4212 assert(_curr_region == NULL, "invariant"); 4213 assert(_finger == NULL, "invariant"); 4214 assert(_region_limit == NULL, "invariant"); 4215 if (_cm->verbose_low()) { 4216 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id); 4217 } 4218 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 4219 if (claimed_region != NULL) { 4220 // Yes, we managed to claim one 4221 statsOnly( ++_regions_claimed ); 4222 4223 if (_cm->verbose_low()) { 4224 gclog_or_tty->print_cr("[%u] we successfully claimed " 4225 "region "PTR_FORMAT, 4226 _worker_id, claimed_region); 4227 } 4228 4229 setup_for_region(claimed_region); 4230 assert(_curr_region == claimed_region, "invariant"); 4231 } 4232 // It is important to call the regular clock here. It might take 4233 // a while to claim a region if, for example, we hit a large 4234 // block of empty regions. So we need to call the regular clock 4235 // method once round the loop to make sure it's called 4236 // frequently enough. 4237 regular_clock_call(); 4238 } 4239 4240 if (!has_aborted() && _curr_region == NULL) { 4241 assert(_cm->out_of_regions(), 4242 "at this point we should be out of regions"); 4243 } 4244 } while ( _curr_region != NULL && !has_aborted()); 4245 4246 if (!has_aborted()) { 4247 // We cannot check whether the global stack is empty, since other 4248 // tasks might be pushing objects to it concurrently. 4249 assert(_cm->out_of_regions(), 4250 "at this point we should be out of regions"); 4251 4252 if (_cm->verbose_low()) { 4253 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id); 4254 } 4255 4256 // Try to reduce the number of available SATB buffers so that 4257 // remark has less work to do. 4258 drain_satb_buffers(); 4259 } 4260 4261 // Since we've done everything else, we can now totally drain the 4262 // local queue and global stack. 4263 drain_local_queue(false); 4264 drain_global_stack(false); 4265 4266 // Attempt at work stealing from other task's queues. 4267 if (do_stealing && !has_aborted()) { 4268 // We have not aborted. This means that we have finished all that 4269 // we could. Let's try to do some stealing... 4270 4271 // We cannot check whether the global stack is empty, since other 4272 // tasks might be pushing objects to it concurrently. 4273 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4274 "only way to reach here"); 4275 4276 if (_cm->verbose_low()) { 4277 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id); 4278 } 4279 4280 while (!has_aborted()) { 4281 oop obj; 4282 statsOnly( ++_steal_attempts ); 4283 4284 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 4285 if (_cm->verbose_medium()) { 4286 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully", 4287 _worker_id, (void*) obj); 4288 } 4289 4290 statsOnly( ++_steals ); 4291 4292 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4293 "any stolen object should be marked"); 4294 scan_object(obj); 4295 4296 // And since we're towards the end, let's totally drain the 4297 // local queue and global stack. 4298 drain_local_queue(false); 4299 drain_global_stack(false); 4300 } else { 4301 break; 4302 } 4303 } 4304 } 4305 4306 // If we are about to wrap up and go into termination, check if we 4307 // should raise the overflow flag. 4308 if (do_termination && !has_aborted()) { 4309 if (_cm->force_overflow()->should_force()) { 4310 _cm->set_has_overflown(); 4311 regular_clock_call(); 4312 } 4313 } 4314 4315 // We still haven't aborted. Now, let's try to get into the 4316 // termination protocol. 4317 if (do_termination && !has_aborted()) { 4318 // We cannot check whether the global stack is empty, since other 4319 // tasks might be concurrently pushing objects on it. 4320 // Separated the asserts so that we know which one fires. 4321 assert(_cm->out_of_regions(), "only way to reach here"); 4322 assert(_task_queue->size() == 0, "only way to reach here"); 4323 4324 if (_cm->verbose_low()) { 4325 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id); 4326 } 4327 4328 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4329 4330 // The CMTask class also extends the TerminatorTerminator class, 4331 // hence its should_exit_termination() method will also decide 4332 // whether to exit the termination protocol or not. 4333 bool finished = (is_serial || 4334 _cm->terminator()->offer_termination(this)); 4335 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4336 _termination_time_ms += 4337 termination_end_time_ms - _termination_start_time_ms; 4338 4339 if (finished) { 4340 // We're all done. 4341 4342 if (_worker_id == 0) { 4343 // let's allow task 0 to do this 4344 if (concurrent()) { 4345 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4346 // we need to set this to false before the next 4347 // safepoint. This way we ensure that the marking phase 4348 // doesn't observe any more heap expansions. 4349 _cm->clear_concurrent_marking_in_progress(); 4350 } 4351 } 4352 4353 // We can now guarantee that the global stack is empty, since 4354 // all other tasks have finished. We separated the guarantees so 4355 // that, if a condition is false, we can immediately find out 4356 // which one. 4357 guarantee(_cm->out_of_regions(), "only way to reach here"); 4358 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4359 guarantee(_task_queue->size() == 0, "only way to reach here"); 4360 guarantee(!_cm->has_overflown(), "only way to reach here"); 4361 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4362 4363 if (_cm->verbose_low()) { 4364 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id); 4365 } 4366 } else { 4367 // Apparently there's more work to do. Let's abort this task. It 4368 // will restart it and we can hopefully find more things to do. 4369 4370 if (_cm->verbose_low()) { 4371 gclog_or_tty->print_cr("[%u] apparently there is more work to do", 4372 _worker_id); 4373 } 4374 4375 set_has_aborted(); 4376 statsOnly( ++_aborted_termination ); 4377 } 4378 } 4379 4380 // Mainly for debugging purposes to make sure that a pointer to the 4381 // closure which was statically allocated in this frame doesn't 4382 // escape it by accident. 4383 set_cm_oop_closure(NULL); 4384 double end_time_ms = os::elapsedVTime() * 1000.0; 4385 double elapsed_time_ms = end_time_ms - _start_time_ms; 4386 // Update the step history. 4387 _step_times_ms.add(elapsed_time_ms); 4388 4389 if (has_aborted()) { 4390 // The task was aborted for some reason. 4391 4392 statsOnly( ++_aborted ); 4393 4394 if (_has_timed_out) { 4395 double diff_ms = elapsed_time_ms - _time_target_ms; 4396 // Keep statistics of how well we did with respect to hitting 4397 // our target only if we actually timed out (if we aborted for 4398 // other reasons, then the results might get skewed). 4399 _marking_step_diffs_ms.add(diff_ms); 4400 } 4401 4402 if (_cm->has_overflown()) { 4403 // This is the interesting one. We aborted because a global 4404 // overflow was raised. This means we have to restart the 4405 // marking phase and start iterating over regions. However, in 4406 // order to do this we have to make sure that all tasks stop 4407 // what they are doing and re-initialise in a safe manner. We 4408 // will achieve this with the use of two barrier sync points. 4409 4410 if (_cm->verbose_low()) { 4411 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id); 4412 } 4413 4414 if (!is_serial) { 4415 // We only need to enter the sync barrier if being called 4416 // from a parallel context 4417 _cm->enter_first_sync_barrier(_worker_id); 4418 4419 // When we exit this sync barrier we know that all tasks have 4420 // stopped doing marking work. So, it's now safe to 4421 // re-initialise our data structures. At the end of this method, 4422 // task 0 will clear the global data structures. 4423 } 4424 4425 statsOnly( ++_aborted_overflow ); 4426 4427 // We clear the local state of this task... 4428 clear_region_fields(); 4429 4430 if (!is_serial) { 4431 // ...and enter the second barrier. 4432 _cm->enter_second_sync_barrier(_worker_id); 4433 } 4434 // At this point, if we're during the concurrent phase of 4435 // marking, everything has been re-initialized and we're 4436 // ready to restart. 4437 } 4438 4439 if (_cm->verbose_low()) { 4440 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4441 "elapsed = %1.2lfms <<<<<<<<<<", 4442 _worker_id, _time_target_ms, elapsed_time_ms); 4443 if (_cm->has_aborted()) { 4444 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========", 4445 _worker_id); 4446 } 4447 } 4448 } else { 4449 if (_cm->verbose_low()) { 4450 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4451 "elapsed = %1.2lfms <<<<<<<<<<", 4452 _worker_id, _time_target_ms, elapsed_time_ms); 4453 } 4454 } 4455 4456 _claimed = false; 4457 } 4458 4459 CMTask::CMTask(uint worker_id, 4460 ConcurrentMark* cm, 4461 size_t* marked_bytes, 4462 BitMap* card_bm, 4463 CMTaskQueue* task_queue, 4464 CMTaskQueueSet* task_queues) 4465 : _g1h(G1CollectedHeap::heap()), 4466 _worker_id(worker_id), _cm(cm), 4467 _claimed(false), 4468 _nextMarkBitMap(NULL), _hash_seed(17), 4469 _task_queue(task_queue), 4470 _task_queues(task_queues), 4471 _cm_oop_closure(NULL), 4472 _marked_bytes_array(marked_bytes), 4473 _card_bm(card_bm) { 4474 guarantee(task_queue != NULL, "invariant"); 4475 guarantee(task_queues != NULL, "invariant"); 4476 4477 statsOnly( _clock_due_to_scanning = 0; 4478 _clock_due_to_marking = 0 ); 4479 4480 _marking_step_diffs_ms.add(0.5); 4481 } 4482 4483 // These are formatting macros that are used below to ensure 4484 // consistent formatting. The *_H_* versions are used to format the 4485 // header for a particular value and they should be kept consistent 4486 // with the corresponding macro. Also note that most of the macros add 4487 // the necessary white space (as a prefix) which makes them a bit 4488 // easier to compose. 4489 4490 // All the output lines are prefixed with this string to be able to 4491 // identify them easily in a large log file. 4492 #define G1PPRL_LINE_PREFIX "###" 4493 4494 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4495 #ifdef _LP64 4496 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4497 #else // _LP64 4498 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4499 #endif // _LP64 4500 4501 // For per-region info 4502 #define G1PPRL_TYPE_FORMAT " %-4s" 4503 #define G1PPRL_TYPE_H_FORMAT " %4s" 4504 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4505 #define G1PPRL_BYTE_H_FORMAT " %9s" 4506 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4507 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4508 4509 // For summary info 4510 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4511 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4512 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4513 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4514 4515 G1PrintRegionLivenessInfoClosure:: 4516 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4517 : _out(out), 4518 _total_used_bytes(0), _total_capacity_bytes(0), 4519 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4520 _hum_used_bytes(0), _hum_capacity_bytes(0), 4521 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) { 4522 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4523 MemRegion g1_committed = g1h->g1_committed(); 4524 MemRegion g1_reserved = g1h->g1_reserved(); 4525 double now = os::elapsedTime(); 4526 4527 // Print the header of the output. 4528 _out->cr(); 4529 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4530 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4531 G1PPRL_SUM_ADDR_FORMAT("committed") 4532 G1PPRL_SUM_ADDR_FORMAT("reserved") 4533 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4534 g1_committed.start(), g1_committed.end(), 4535 g1_reserved.start(), g1_reserved.end(), 4536 HeapRegion::GrainBytes); 4537 _out->print_cr(G1PPRL_LINE_PREFIX); 4538 _out->print_cr(G1PPRL_LINE_PREFIX 4539 G1PPRL_TYPE_H_FORMAT 4540 G1PPRL_ADDR_BASE_H_FORMAT 4541 G1PPRL_BYTE_H_FORMAT 4542 G1PPRL_BYTE_H_FORMAT 4543 G1PPRL_BYTE_H_FORMAT 4544 G1PPRL_DOUBLE_H_FORMAT, 4545 "type", "address-range", 4546 "used", "prev-live", "next-live", "gc-eff"); 4547 _out->print_cr(G1PPRL_LINE_PREFIX 4548 G1PPRL_TYPE_H_FORMAT 4549 G1PPRL_ADDR_BASE_H_FORMAT 4550 G1PPRL_BYTE_H_FORMAT 4551 G1PPRL_BYTE_H_FORMAT 4552 G1PPRL_BYTE_H_FORMAT 4553 G1PPRL_DOUBLE_H_FORMAT, 4554 "", "", 4555 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)"); 4556 } 4557 4558 // It takes as a parameter a reference to one of the _hum_* fields, it 4559 // deduces the corresponding value for a region in a humongous region 4560 // series (either the region size, or what's left if the _hum_* field 4561 // is < the region size), and updates the _hum_* field accordingly. 4562 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4563 size_t bytes = 0; 4564 // The > 0 check is to deal with the prev and next live bytes which 4565 // could be 0. 4566 if (*hum_bytes > 0) { 4567 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4568 *hum_bytes -= bytes; 4569 } 4570 return bytes; 4571 } 4572 4573 // It deduces the values for a region in a humongous region series 4574 // from the _hum_* fields and updates those accordingly. It assumes 4575 // that that _hum_* fields have already been set up from the "starts 4576 // humongous" region and we visit the regions in address order. 4577 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4578 size_t* capacity_bytes, 4579 size_t* prev_live_bytes, 4580 size_t* next_live_bytes) { 4581 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4582 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4583 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4584 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4585 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4586 } 4587 4588 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4589 const char* type = ""; 4590 HeapWord* bottom = r->bottom(); 4591 HeapWord* end = r->end(); 4592 size_t capacity_bytes = r->capacity(); 4593 size_t used_bytes = r->used(); 4594 size_t prev_live_bytes = r->live_bytes(); 4595 size_t next_live_bytes = r->next_live_bytes(); 4596 double gc_eff = r->gc_efficiency(); 4597 if (r->used() == 0) { 4598 type = "FREE"; 4599 } else if (r->is_survivor()) { 4600 type = "SURV"; 4601 } else if (r->is_young()) { 4602 type = "EDEN"; 4603 } else if (r->startsHumongous()) { 4604 type = "HUMS"; 4605 4606 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4607 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4608 "they should have been zeroed after the last time we used them"); 4609 // Set up the _hum_* fields. 4610 _hum_capacity_bytes = capacity_bytes; 4611 _hum_used_bytes = used_bytes; 4612 _hum_prev_live_bytes = prev_live_bytes; 4613 _hum_next_live_bytes = next_live_bytes; 4614 get_hum_bytes(&used_bytes, &capacity_bytes, 4615 &prev_live_bytes, &next_live_bytes); 4616 end = bottom + HeapRegion::GrainWords; 4617 } else if (r->continuesHumongous()) { 4618 type = "HUMC"; 4619 get_hum_bytes(&used_bytes, &capacity_bytes, 4620 &prev_live_bytes, &next_live_bytes); 4621 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4622 } else { 4623 type = "OLD"; 4624 } 4625 4626 _total_used_bytes += used_bytes; 4627 _total_capacity_bytes += capacity_bytes; 4628 _total_prev_live_bytes += prev_live_bytes; 4629 _total_next_live_bytes += next_live_bytes; 4630 4631 // Print a line for this particular region. 4632 _out->print_cr(G1PPRL_LINE_PREFIX 4633 G1PPRL_TYPE_FORMAT 4634 G1PPRL_ADDR_BASE_FORMAT 4635 G1PPRL_BYTE_FORMAT 4636 G1PPRL_BYTE_FORMAT 4637 G1PPRL_BYTE_FORMAT 4638 G1PPRL_DOUBLE_FORMAT, 4639 type, bottom, end, 4640 used_bytes, prev_live_bytes, next_live_bytes, gc_eff); 4641 4642 return false; 4643 } 4644 4645 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4646 // Print the footer of the output. 4647 _out->print_cr(G1PPRL_LINE_PREFIX); 4648 _out->print_cr(G1PPRL_LINE_PREFIX 4649 " SUMMARY" 4650 G1PPRL_SUM_MB_FORMAT("capacity") 4651 G1PPRL_SUM_MB_PERC_FORMAT("used") 4652 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4653 G1PPRL_SUM_MB_PERC_FORMAT("next-live"), 4654 bytes_to_mb(_total_capacity_bytes), 4655 bytes_to_mb(_total_used_bytes), 4656 perc(_total_used_bytes, _total_capacity_bytes), 4657 bytes_to_mb(_total_prev_live_bytes), 4658 perc(_total_prev_live_bytes, _total_capacity_bytes), 4659 bytes_to_mb(_total_next_live_bytes), 4660 perc(_total_next_live_bytes, _total_capacity_bytes)); 4661 _out->cr(); 4662 }