1 /* 2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 32 #include "gc_implementation/g1/g1Log.hpp" 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 34 #include "gc_implementation/g1/g1RemSet.hpp" 35 #include "gc_implementation/g1/heapRegion.inline.hpp" 36 #include "gc_implementation/g1/heapRegionRemSet.hpp" 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 38 #include "gc_implementation/shared/vmGCOperations.hpp" 39 #include "gc_implementation/shared/gcTimer.hpp" 40 #include "gc_implementation/shared/gcTrace.hpp" 41 #include "gc_implementation/shared/gcTraceTime.hpp" 42 #include "memory/genOopClosures.inline.hpp" 43 #include "memory/referencePolicy.hpp" 44 #include "memory/resourceArea.hpp" 45 #include "oops/oop.inline.hpp" 46 #include "runtime/handles.inline.hpp" 47 #include "runtime/java.hpp" 48 #include "services/memTracker.hpp" 49 50 // Concurrent marking bit map wrapper 51 52 CMBitMapRO::CMBitMapRO(int shifter) : 53 _bm(), 54 _shifter(shifter) { 55 _bmStartWord = 0; 56 _bmWordSize = 0; 57 } 58 59 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 60 HeapWord* limit) const { 61 // First we must round addr *up* to a possible object boundary. 62 addr = (HeapWord*)align_size_up((intptr_t)addr, 63 HeapWordSize << _shifter); 64 size_t addrOffset = heapWordToOffset(addr); 65 if (limit == NULL) { 66 limit = _bmStartWord + _bmWordSize; 67 } 68 size_t limitOffset = heapWordToOffset(limit); 69 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 70 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 71 assert(nextAddr >= addr, "get_next_one postcondition"); 72 assert(nextAddr == limit || isMarked(nextAddr), 73 "get_next_one postcondition"); 74 return nextAddr; 75 } 76 77 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 78 HeapWord* limit) const { 79 size_t addrOffset = heapWordToOffset(addr); 80 if (limit == NULL) { 81 limit = _bmStartWord + _bmWordSize; 82 } 83 size_t limitOffset = heapWordToOffset(limit); 84 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 85 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 86 assert(nextAddr >= addr, "get_next_one postcondition"); 87 assert(nextAddr == limit || !isMarked(nextAddr), 88 "get_next_one postcondition"); 89 return nextAddr; 90 } 91 92 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 93 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 94 return (int) (diff >> _shifter); 95 } 96 97 #ifndef PRODUCT 98 bool CMBitMapRO::covers(ReservedSpace heap_rs) const { 99 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 100 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 101 "size inconsistency"); 102 return _bmStartWord == (HeapWord*)(heap_rs.base()) && 103 _bmWordSize == heap_rs.size()>>LogHeapWordSize; 104 } 105 #endif 106 107 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 108 _bm.print_on_error(st, prefix); 109 } 110 111 bool CMBitMap::allocate(ReservedSpace heap_rs) { 112 _bmStartWord = (HeapWord*)(heap_rs.base()); 113 _bmWordSize = heap_rs.size()/HeapWordSize; // heap_rs.size() is in bytes 114 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 115 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 116 if (!brs.is_reserved()) { 117 warning("ConcurrentMark marking bit map allocation failure"); 118 return false; 119 } 120 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC); 121 // For now we'll just commit all of the bit map up front. 122 // Later on we'll try to be more parsimonious with swap. 123 if (!_virtual_space.initialize(brs, brs.size())) { 124 warning("ConcurrentMark marking bit map backing store failure"); 125 return false; 126 } 127 assert(_virtual_space.committed_size() == brs.size(), 128 "didn't reserve backing store for all of concurrent marking bit map?"); 129 _bm.set_map((uintptr_t*)_virtual_space.low()); 130 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 131 _bmWordSize, "inconsistency in bit map sizing"); 132 _bm.set_size(_bmWordSize >> _shifter); 133 return true; 134 } 135 136 void CMBitMap::clearAll() { 137 _bm.clear(); 138 return; 139 } 140 141 void CMBitMap::markRange(MemRegion mr) { 142 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 143 assert(!mr.is_empty(), "unexpected empty region"); 144 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 145 ((HeapWord *) mr.end())), 146 "markRange memory region end is not card aligned"); 147 // convert address range into offset range 148 _bm.at_put_range(heapWordToOffset(mr.start()), 149 heapWordToOffset(mr.end()), true); 150 } 151 152 void CMBitMap::clearRange(MemRegion mr) { 153 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 154 assert(!mr.is_empty(), "unexpected empty region"); 155 // convert address range into offset range 156 _bm.at_put_range(heapWordToOffset(mr.start()), 157 heapWordToOffset(mr.end()), false); 158 } 159 160 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 161 HeapWord* end_addr) { 162 HeapWord* start = getNextMarkedWordAddress(addr); 163 start = MIN2(start, end_addr); 164 HeapWord* end = getNextUnmarkedWordAddress(start); 165 end = MIN2(end, end_addr); 166 assert(start <= end, "Consistency check"); 167 MemRegion mr(start, end); 168 if (!mr.is_empty()) { 169 clearRange(mr); 170 } 171 return mr; 172 } 173 174 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 175 _base(NULL), _cm(cm) 176 #ifdef ASSERT 177 , _drain_in_progress(false) 178 , _drain_in_progress_yields(false) 179 #endif 180 {} 181 182 bool CMMarkStack::allocate(size_t capacity) { 183 // allocate a stack of the requisite depth 184 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 185 if (!rs.is_reserved()) { 186 warning("ConcurrentMark MarkStack allocation failure"); 187 return false; 188 } 189 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 190 if (!_virtual_space.initialize(rs, rs.size())) { 191 warning("ConcurrentMark MarkStack backing store failure"); 192 // Release the virtual memory reserved for the marking stack 193 rs.release(); 194 return false; 195 } 196 assert(_virtual_space.committed_size() == rs.size(), 197 "Didn't reserve backing store for all of ConcurrentMark stack?"); 198 _base = (oop*) _virtual_space.low(); 199 setEmpty(); 200 _capacity = (jint) capacity; 201 _saved_index = -1; 202 _should_expand = false; 203 NOT_PRODUCT(_max_depth = 0); 204 return true; 205 } 206 207 void CMMarkStack::expand() { 208 // Called, during remark, if we've overflown the marking stack during marking. 209 assert(isEmpty(), "stack should been emptied while handling overflow"); 210 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 211 // Clear expansion flag 212 _should_expand = false; 213 if (_capacity == (jint) MarkStackSizeMax) { 214 if (PrintGCDetails && Verbose) { 215 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 216 } 217 return; 218 } 219 // Double capacity if possible 220 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 221 // Do not give up existing stack until we have managed to 222 // get the double capacity that we desired. 223 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 224 sizeof(oop))); 225 if (rs.is_reserved()) { 226 // Release the backing store associated with old stack 227 _virtual_space.release(); 228 // Reinitialize virtual space for new stack 229 if (!_virtual_space.initialize(rs, rs.size())) { 230 fatal("Not enough swap for expanded marking stack capacity"); 231 } 232 _base = (oop*)(_virtual_space.low()); 233 _index = 0; 234 _capacity = new_capacity; 235 } else { 236 if (PrintGCDetails && Verbose) { 237 // Failed to double capacity, continue; 238 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 239 SIZE_FORMAT"K to " SIZE_FORMAT"K", 240 _capacity / K, new_capacity / K); 241 } 242 } 243 } 244 245 void CMMarkStack::set_should_expand() { 246 // If we're resetting the marking state because of an 247 // marking stack overflow, record that we should, if 248 // possible, expand the stack. 249 _should_expand = _cm->has_overflown(); 250 } 251 252 CMMarkStack::~CMMarkStack() { 253 if (_base != NULL) { 254 _base = NULL; 255 _virtual_space.release(); 256 } 257 } 258 259 void CMMarkStack::par_push(oop ptr) { 260 while (true) { 261 if (isFull()) { 262 _overflow = true; 263 return; 264 } 265 // Otherwise... 266 jint index = _index; 267 jint next_index = index+1; 268 jint res = Atomic::cmpxchg(next_index, &_index, index); 269 if (res == index) { 270 _base[index] = ptr; 271 // Note that we don't maintain this atomically. We could, but it 272 // doesn't seem necessary. 273 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 274 return; 275 } 276 // Otherwise, we need to try again. 277 } 278 } 279 280 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 281 while (true) { 282 if (isFull()) { 283 _overflow = true; 284 return; 285 } 286 // Otherwise... 287 jint index = _index; 288 jint next_index = index + n; 289 if (next_index > _capacity) { 290 _overflow = true; 291 return; 292 } 293 jint res = Atomic::cmpxchg(next_index, &_index, index); 294 if (res == index) { 295 for (int i = 0; i < n; i++) { 296 int ind = index + i; 297 assert(ind < _capacity, "By overflow test above."); 298 _base[ind] = ptr_arr[i]; 299 } 300 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 301 return; 302 } 303 // Otherwise, we need to try again. 304 } 305 } 306 307 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 308 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 309 jint start = _index; 310 jint next_index = start + n; 311 if (next_index > _capacity) { 312 _overflow = true; 313 return; 314 } 315 // Otherwise. 316 _index = next_index; 317 for (int i = 0; i < n; i++) { 318 int ind = start + i; 319 assert(ind < _capacity, "By overflow test above."); 320 _base[ind] = ptr_arr[i]; 321 } 322 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 323 } 324 325 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 326 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 327 jint index = _index; 328 if (index == 0) { 329 *n = 0; 330 return false; 331 } else { 332 int k = MIN2(max, index); 333 jint new_ind = index - k; 334 for (int j = 0; j < k; j++) { 335 ptr_arr[j] = _base[new_ind + j]; 336 } 337 _index = new_ind; 338 *n = k; 339 return true; 340 } 341 } 342 343 template<class OopClosureClass> 344 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 345 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 346 || SafepointSynchronize::is_at_safepoint(), 347 "Drain recursion must be yield-safe."); 348 bool res = true; 349 debug_only(_drain_in_progress = true); 350 debug_only(_drain_in_progress_yields = yield_after); 351 while (!isEmpty()) { 352 oop newOop = pop(); 353 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 354 assert(newOop->is_oop(), "Expected an oop"); 355 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 356 "only grey objects on this stack"); 357 newOop->oop_iterate(cl); 358 if (yield_after && _cm->do_yield_check()) { 359 res = false; 360 break; 361 } 362 } 363 debug_only(_drain_in_progress = false); 364 return res; 365 } 366 367 void CMMarkStack::note_start_of_gc() { 368 assert(_saved_index == -1, 369 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 370 _saved_index = _index; 371 } 372 373 void CMMarkStack::note_end_of_gc() { 374 // This is intentionally a guarantee, instead of an assert. If we 375 // accidentally add something to the mark stack during GC, it 376 // will be a correctness issue so it's better if we crash. we'll 377 // only check this once per GC anyway, so it won't be a performance 378 // issue in any way. 379 guarantee(_saved_index == _index, 380 err_msg("saved index: %d index: %d", _saved_index, _index)); 381 _saved_index = -1; 382 } 383 384 void CMMarkStack::oops_do(OopClosure* f) { 385 assert(_saved_index == _index, 386 err_msg("saved index: %d index: %d", _saved_index, _index)); 387 for (int i = 0; i < _index; i += 1) { 388 f->do_oop(&_base[i]); 389 } 390 } 391 392 bool ConcurrentMark::not_yet_marked(oop obj) const { 393 return _g1h->is_obj_ill(obj); 394 } 395 396 CMRootRegions::CMRootRegions() : 397 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 398 _should_abort(false), _next_survivor(NULL) { } 399 400 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 401 _young_list = g1h->young_list(); 402 _cm = cm; 403 } 404 405 void CMRootRegions::prepare_for_scan() { 406 assert(!scan_in_progress(), "pre-condition"); 407 408 // Currently, only survivors can be root regions. 409 assert(_next_survivor == NULL, "pre-condition"); 410 _next_survivor = _young_list->first_survivor_region(); 411 _scan_in_progress = (_next_survivor != NULL); 412 _should_abort = false; 413 } 414 415 HeapRegion* CMRootRegions::claim_next() { 416 if (_should_abort) { 417 // If someone has set the should_abort flag, we return NULL to 418 // force the caller to bail out of their loop. 419 return NULL; 420 } 421 422 // Currently, only survivors can be root regions. 423 HeapRegion* res = _next_survivor; 424 if (res != NULL) { 425 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 426 // Read it again in case it changed while we were waiting for the lock. 427 res = _next_survivor; 428 if (res != NULL) { 429 if (res == _young_list->last_survivor_region()) { 430 // We just claimed the last survivor so store NULL to indicate 431 // that we're done. 432 _next_survivor = NULL; 433 } else { 434 _next_survivor = res->get_next_young_region(); 435 } 436 } else { 437 // Someone else claimed the last survivor while we were trying 438 // to take the lock so nothing else to do. 439 } 440 } 441 assert(res == NULL || res->is_survivor(), "post-condition"); 442 443 return res; 444 } 445 446 void CMRootRegions::scan_finished() { 447 assert(scan_in_progress(), "pre-condition"); 448 449 // Currently, only survivors can be root regions. 450 if (!_should_abort) { 451 assert(_next_survivor == NULL, "we should have claimed all survivors"); 452 } 453 _next_survivor = NULL; 454 455 { 456 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 457 _scan_in_progress = false; 458 RootRegionScan_lock->notify_all(); 459 } 460 } 461 462 bool CMRootRegions::wait_until_scan_finished() { 463 if (!scan_in_progress()) return false; 464 465 { 466 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 467 while (scan_in_progress()) { 468 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 469 } 470 } 471 return true; 472 } 473 474 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 475 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 476 #endif // _MSC_VER 477 478 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 479 return MAX2((n_par_threads + 2) / 4, 1U); 480 } 481 482 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) : 483 _g1h(g1h), 484 _markBitMap1(LogMinObjAlignment), 485 _markBitMap2(LogMinObjAlignment), 486 _parallel_marking_threads(0), 487 _max_parallel_marking_threads(0), 488 _sleep_factor(0.0), 489 _marking_task_overhead(1.0), 490 _cleanup_sleep_factor(0.0), 491 _cleanup_task_overhead(1.0), 492 _cleanup_list("Cleanup List"), 493 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 494 _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >> 495 CardTableModRefBS::card_shift, 496 false /* in_resource_area*/), 497 498 _prevMarkBitMap(&_markBitMap1), 499 _nextMarkBitMap(&_markBitMap2), 500 501 _markStack(this), 502 // _finger set in set_non_marking_state 503 504 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)), 505 // _active_tasks set in set_non_marking_state 506 // _tasks set inside the constructor 507 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 508 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 509 510 _has_overflown(false), 511 _concurrent(false), 512 _has_aborted(false), 513 _restart_for_overflow(false), 514 _concurrent_marking_in_progress(false), 515 516 // _verbose_level set below 517 518 _init_times(), 519 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 520 _cleanup_times(), 521 _total_counting_time(0.0), 522 _total_rs_scrub_time(0.0), 523 524 _parallel_workers(NULL), 525 526 _count_card_bitmaps(NULL), 527 _count_marked_bytes(NULL), 528 _completed_initialization(false) { 529 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 530 if (verbose_level < no_verbose) { 531 verbose_level = no_verbose; 532 } 533 if (verbose_level > high_verbose) { 534 verbose_level = high_verbose; 535 } 536 _verbose_level = verbose_level; 537 538 if (verbose_low()) { 539 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 540 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 541 } 542 543 if (!_markBitMap1.allocate(heap_rs)) { 544 warning("Failed to allocate first CM bit map"); 545 return; 546 } 547 if (!_markBitMap2.allocate(heap_rs)) { 548 warning("Failed to allocate second CM bit map"); 549 return; 550 } 551 552 // Create & start a ConcurrentMark thread. 553 _cmThread = new ConcurrentMarkThread(this); 554 assert(cmThread() != NULL, "CM Thread should have been created"); 555 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 556 557 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 558 assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency"); 559 assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency"); 560 561 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 562 satb_qs.set_buffer_size(G1SATBBufferSize); 563 564 _root_regions.init(_g1h, this); 565 566 if (ConcGCThreads > ParallelGCThreads) { 567 warning("Can't have more ConcGCThreads (" UINT32_FORMAT ") " 568 "than ParallelGCThreads (" UINT32_FORMAT ").", 569 ConcGCThreads, ParallelGCThreads); 570 return; 571 } 572 if (ParallelGCThreads == 0) { 573 // if we are not running with any parallel GC threads we will not 574 // spawn any marking threads either 575 _parallel_marking_threads = 0; 576 _max_parallel_marking_threads = 0; 577 _sleep_factor = 0.0; 578 _marking_task_overhead = 1.0; 579 } else { 580 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 581 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 582 // if both are set 583 _sleep_factor = 0.0; 584 _marking_task_overhead = 1.0; 585 } else if (G1MarkingOverheadPercent > 0) { 586 // We will calculate the number of parallel marking threads based 587 // on a target overhead with respect to the soft real-time goal 588 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 589 double overall_cm_overhead = 590 (double) MaxGCPauseMillis * marking_overhead / 591 (double) GCPauseIntervalMillis; 592 double cpu_ratio = 1.0 / (double) os::processor_count(); 593 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 594 double marking_task_overhead = 595 overall_cm_overhead / marking_thread_num * 596 (double) os::processor_count(); 597 double sleep_factor = 598 (1.0 - marking_task_overhead) / marking_task_overhead; 599 600 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num); 601 _sleep_factor = sleep_factor; 602 _marking_task_overhead = marking_task_overhead; 603 } else { 604 // Calculate the number of parallel marking threads by scaling 605 // the number of parallel GC threads. 606 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads); 607 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num); 608 _sleep_factor = 0.0; 609 _marking_task_overhead = 1.0; 610 } 611 612 assert(ConcGCThreads > 0, "Should have been set"); 613 _parallel_marking_threads = (uint) ConcGCThreads; 614 _max_parallel_marking_threads = _parallel_marking_threads; 615 616 if (parallel_marking_threads() > 1) { 617 _cleanup_task_overhead = 1.0; 618 } else { 619 _cleanup_task_overhead = marking_task_overhead(); 620 } 621 _cleanup_sleep_factor = 622 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 623 624 #if 0 625 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 626 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 627 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 628 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 629 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 630 #endif 631 632 guarantee(parallel_marking_threads() > 0, "peace of mind"); 633 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 634 _max_parallel_marking_threads, false, true); 635 if (_parallel_workers == NULL) { 636 vm_exit_during_initialization("Failed necessary allocation."); 637 } else { 638 _parallel_workers->initialize_workers(); 639 } 640 } 641 642 if (FLAG_IS_DEFAULT(MarkStackSize)) { 643 uintx mark_stack_size = 644 MIN2(MarkStackSizeMax, 645 MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE))); 646 // Verify that the calculated value for MarkStackSize is in range. 647 // It would be nice to use the private utility routine from Arguments. 648 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 649 warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): " 650 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 651 mark_stack_size, 1, MarkStackSizeMax); 652 return; 653 } 654 FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size); 655 } else { 656 // Verify MarkStackSize is in range. 657 if (FLAG_IS_CMDLINE(MarkStackSize)) { 658 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 659 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 660 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): " 661 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 662 MarkStackSize, 1, MarkStackSizeMax); 663 return; 664 } 665 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 666 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 667 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")" 668 " or for MarkStackSizeMax (" UINTX_FORMAT ")", 669 MarkStackSize, MarkStackSizeMax); 670 return; 671 } 672 } 673 } 674 } 675 676 if (!_markStack.allocate(MarkStackSize)) { 677 warning("Failed to allocate CM marking stack"); 678 return; 679 } 680 681 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 682 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 683 684 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 685 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 686 687 BitMap::idx_t card_bm_size = _card_bm.size(); 688 689 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 690 _active_tasks = _max_worker_id; 691 692 size_t max_regions = (size_t) _g1h->max_regions(); 693 for (uint i = 0; i < _max_worker_id; ++i) { 694 CMTaskQueue* task_queue = new CMTaskQueue(); 695 task_queue->initialize(); 696 _task_queues->register_queue(i, task_queue); 697 698 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 699 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 700 701 _tasks[i] = new CMTask(i, this, 702 _count_marked_bytes[i], 703 &_count_card_bitmaps[i], 704 task_queue, _task_queues); 705 706 _accum_task_vtime[i] = 0.0; 707 } 708 709 // Calculate the card number for the bottom of the heap. Used 710 // in biasing indexes into the accounting card bitmaps. 711 _heap_bottom_card_num = 712 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 713 CardTableModRefBS::card_shift); 714 715 // Clear all the liveness counting data 716 clear_all_count_data(); 717 718 // so that the call below can read a sensible value 719 _heap_start = (HeapWord*) heap_rs.base(); 720 set_non_marking_state(); 721 _completed_initialization = true; 722 } 723 724 void ConcurrentMark::update_g1_committed(bool force) { 725 // If concurrent marking is not in progress, then we do not need to 726 // update _heap_end. 727 if (!concurrent_marking_in_progress() && !force) return; 728 729 MemRegion committed = _g1h->g1_committed(); 730 assert(committed.start() == _heap_start, "start shouldn't change"); 731 HeapWord* new_end = committed.end(); 732 if (new_end > _heap_end) { 733 // The heap has been expanded. 734 735 _heap_end = new_end; 736 } 737 // Notice that the heap can also shrink. However, this only happens 738 // during a Full GC (at least currently) and the entire marking 739 // phase will bail out and the task will not be restarted. So, let's 740 // do nothing. 741 } 742 743 void ConcurrentMark::reset() { 744 // Starting values for these two. This should be called in a STW 745 // phase. CM will be notified of any future g1_committed expansions 746 // will be at the end of evacuation pauses, when tasks are 747 // inactive. 748 MemRegion committed = _g1h->g1_committed(); 749 _heap_start = committed.start(); 750 _heap_end = committed.end(); 751 752 // Separated the asserts so that we know which one fires. 753 assert(_heap_start != NULL, "heap bounds should look ok"); 754 assert(_heap_end != NULL, "heap bounds should look ok"); 755 assert(_heap_start < _heap_end, "heap bounds should look ok"); 756 757 // Reset all the marking data structures and any necessary flags 758 reset_marking_state(); 759 760 if (verbose_low()) { 761 gclog_or_tty->print_cr("[global] resetting"); 762 } 763 764 // We do reset all of them, since different phases will use 765 // different number of active threads. So, it's easiest to have all 766 // of them ready. 767 for (uint i = 0; i < _max_worker_id; ++i) { 768 _tasks[i]->reset(_nextMarkBitMap); 769 } 770 771 // we need this to make sure that the flag is on during the evac 772 // pause with initial mark piggy-backed 773 set_concurrent_marking_in_progress(); 774 } 775 776 777 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 778 _markStack.set_should_expand(); 779 _markStack.setEmpty(); // Also clears the _markStack overflow flag 780 if (clear_overflow) { 781 clear_has_overflown(); 782 } else { 783 assert(has_overflown(), "pre-condition"); 784 } 785 _finger = _heap_start; 786 787 for (uint i = 0; i < _max_worker_id; ++i) { 788 CMTaskQueue* queue = _task_queues->queue(i); 789 queue->set_empty(); 790 } 791 } 792 793 void ConcurrentMark::set_concurrency(uint active_tasks) { 794 assert(active_tasks <= _max_worker_id, "we should not have more"); 795 796 _active_tasks = active_tasks; 797 // Need to update the three data structures below according to the 798 // number of active threads for this phase. 799 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 800 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 801 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 802 } 803 804 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 805 set_concurrency(active_tasks); 806 807 _concurrent = concurrent; 808 // We propagate this to all tasks, not just the active ones. 809 for (uint i = 0; i < _max_worker_id; ++i) 810 _tasks[i]->set_concurrent(concurrent); 811 812 if (concurrent) { 813 set_concurrent_marking_in_progress(); 814 } else { 815 // We currently assume that the concurrent flag has been set to 816 // false before we start remark. At this point we should also be 817 // in a STW phase. 818 assert(!concurrent_marking_in_progress(), "invariant"); 819 assert(_finger == _heap_end, 820 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT, 821 _finger, _heap_end)); 822 update_g1_committed(true); 823 } 824 } 825 826 void ConcurrentMark::set_non_marking_state() { 827 // We set the global marking state to some default values when we're 828 // not doing marking. 829 reset_marking_state(); 830 _active_tasks = 0; 831 clear_concurrent_marking_in_progress(); 832 } 833 834 ConcurrentMark::~ConcurrentMark() { 835 // The ConcurrentMark instance is never freed. 836 ShouldNotReachHere(); 837 } 838 839 void ConcurrentMark::clearNextBitmap() { 840 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 841 G1CollectorPolicy* g1p = g1h->g1_policy(); 842 843 // Make sure that the concurrent mark thread looks to still be in 844 // the current cycle. 845 guarantee(cmThread()->during_cycle(), "invariant"); 846 847 // We are finishing up the current cycle by clearing the next 848 // marking bitmap and getting it ready for the next cycle. During 849 // this time no other cycle can start. So, let's make sure that this 850 // is the case. 851 guarantee(!g1h->mark_in_progress(), "invariant"); 852 853 // clear the mark bitmap (no grey objects to start with). 854 // We need to do this in chunks and offer to yield in between 855 // each chunk. 856 HeapWord* start = _nextMarkBitMap->startWord(); 857 HeapWord* end = _nextMarkBitMap->endWord(); 858 HeapWord* cur = start; 859 size_t chunkSize = M; 860 while (cur < end) { 861 HeapWord* next = cur + chunkSize; 862 if (next > end) { 863 next = end; 864 } 865 MemRegion mr(cur,next); 866 _nextMarkBitMap->clearRange(mr); 867 cur = next; 868 do_yield_check(); 869 870 // Repeat the asserts from above. We'll do them as asserts here to 871 // minimize their overhead on the product. However, we'll have 872 // them as guarantees at the beginning / end of the bitmap 873 // clearing to get some checking in the product. 874 assert(cmThread()->during_cycle(), "invariant"); 875 assert(!g1h->mark_in_progress(), "invariant"); 876 } 877 878 // Clear the liveness counting data 879 clear_all_count_data(); 880 881 // Repeat the asserts from above. 882 guarantee(cmThread()->during_cycle(), "invariant"); 883 guarantee(!g1h->mark_in_progress(), "invariant"); 884 } 885 886 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 887 public: 888 bool doHeapRegion(HeapRegion* r) { 889 if (!r->continuesHumongous()) { 890 r->note_start_of_marking(); 891 } 892 return false; 893 } 894 }; 895 896 void ConcurrentMark::checkpointRootsInitialPre() { 897 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 898 G1CollectorPolicy* g1p = g1h->g1_policy(); 899 900 _has_aborted = false; 901 902 #ifndef PRODUCT 903 if (G1PrintReachableAtInitialMark) { 904 print_reachable("at-cycle-start", 905 VerifyOption_G1UsePrevMarking, true /* all */); 906 } 907 #endif 908 909 // Initialise marking structures. This has to be done in a STW phase. 910 reset(); 911 912 // For each region note start of marking. 913 NoteStartOfMarkHRClosure startcl; 914 g1h->heap_region_iterate(&startcl); 915 } 916 917 918 void ConcurrentMark::checkpointRootsInitialPost() { 919 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 920 921 // If we force an overflow during remark, the remark operation will 922 // actually abort and we'll restart concurrent marking. If we always 923 // force an oveflow during remark we'll never actually complete the 924 // marking phase. So, we initilize this here, at the start of the 925 // cycle, so that at the remaining overflow number will decrease at 926 // every remark and we'll eventually not need to cause one. 927 force_overflow_stw()->init(); 928 929 // Start Concurrent Marking weak-reference discovery. 930 ReferenceProcessor* rp = g1h->ref_processor_cm(); 931 // enable ("weak") refs discovery 932 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 933 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 934 935 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 936 // This is the start of the marking cycle, we're expected all 937 // threads to have SATB queues with active set to false. 938 satb_mq_set.set_active_all_threads(true, /* new active value */ 939 false /* expected_active */); 940 941 _root_regions.prepare_for_scan(); 942 943 // update_g1_committed() will be called at the end of an evac pause 944 // when marking is on. So, it's also called at the end of the 945 // initial-mark pause to update the heap end, if the heap expands 946 // during it. No need to call it here. 947 } 948 949 /* 950 * Notice that in the next two methods, we actually leave the STS 951 * during the barrier sync and join it immediately afterwards. If we 952 * do not do this, the following deadlock can occur: one thread could 953 * be in the barrier sync code, waiting for the other thread to also 954 * sync up, whereas another one could be trying to yield, while also 955 * waiting for the other threads to sync up too. 956 * 957 * Note, however, that this code is also used during remark and in 958 * this case we should not attempt to leave / enter the STS, otherwise 959 * we'll either hit an asseert (debug / fastdebug) or deadlock 960 * (product). So we should only leave / enter the STS if we are 961 * operating concurrently. 962 * 963 * Because the thread that does the sync barrier has left the STS, it 964 * is possible to be suspended for a Full GC or an evacuation pause 965 * could occur. This is actually safe, since the entering the sync 966 * barrier is one of the last things do_marking_step() does, and it 967 * doesn't manipulate any data structures afterwards. 968 */ 969 970 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 971 if (verbose_low()) { 972 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id); 973 } 974 975 if (concurrent()) { 976 ConcurrentGCThread::stsLeave(); 977 } 978 _first_overflow_barrier_sync.enter(); 979 if (concurrent()) { 980 ConcurrentGCThread::stsJoin(); 981 } 982 // at this point everyone should have synced up and not be doing any 983 // more work 984 985 if (verbose_low()) { 986 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); 987 } 988 989 // If we're executing the concurrent phase of marking, reset the marking 990 // state; otherwise the marking state is reset after reference processing, 991 // during the remark pause. 992 // If we reset here as a result of an overflow during the remark we will 993 // see assertion failures from any subsequent set_concurrency_and_phase() 994 // calls. 995 if (concurrent()) { 996 // let the task associated with with worker 0 do this 997 if (worker_id == 0) { 998 // task 0 is responsible for clearing the global data structures 999 // We should be here because of an overflow. During STW we should 1000 // not clear the overflow flag since we rely on it being true when 1001 // we exit this method to abort the pause and restart concurent 1002 // marking. 1003 reset_marking_state(true /* clear_overflow */); 1004 force_overflow()->update(); 1005 1006 if (G1Log::fine()) { 1007 gclog_or_tty->date_stamp(PrintGCDateStamps); 1008 gclog_or_tty->stamp(PrintGCTimeStamps); 1009 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 1010 } 1011 } 1012 } 1013 1014 // after this, each task should reset its own data structures then 1015 // then go into the second barrier 1016 } 1017 1018 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 1019 if (verbose_low()) { 1020 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id); 1021 } 1022 1023 if (concurrent()) { 1024 ConcurrentGCThread::stsLeave(); 1025 } 1026 _second_overflow_barrier_sync.enter(); 1027 if (concurrent()) { 1028 ConcurrentGCThread::stsJoin(); 1029 } 1030 // at this point everything should be re-initialized and ready to go 1031 1032 if (verbose_low()) { 1033 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); 1034 } 1035 } 1036 1037 #ifndef PRODUCT 1038 void ForceOverflowSettings::init() { 1039 _num_remaining = G1ConcMarkForceOverflow; 1040 _force = false; 1041 update(); 1042 } 1043 1044 void ForceOverflowSettings::update() { 1045 if (_num_remaining > 0) { 1046 _num_remaining -= 1; 1047 _force = true; 1048 } else { 1049 _force = false; 1050 } 1051 } 1052 1053 bool ForceOverflowSettings::should_force() { 1054 if (_force) { 1055 _force = false; 1056 return true; 1057 } else { 1058 return false; 1059 } 1060 } 1061 #endif // !PRODUCT 1062 1063 class CMConcurrentMarkingTask: public AbstractGangTask { 1064 private: 1065 ConcurrentMark* _cm; 1066 ConcurrentMarkThread* _cmt; 1067 1068 public: 1069 void work(uint worker_id) { 1070 assert(Thread::current()->is_ConcurrentGC_thread(), 1071 "this should only be done by a conc GC thread"); 1072 ResourceMark rm; 1073 1074 double start_vtime = os::elapsedVTime(); 1075 1076 ConcurrentGCThread::stsJoin(); 1077 1078 assert(worker_id < _cm->active_tasks(), "invariant"); 1079 CMTask* the_task = _cm->task(worker_id); 1080 the_task->record_start_time(); 1081 if (!_cm->has_aborted()) { 1082 do { 1083 double start_vtime_sec = os::elapsedVTime(); 1084 double start_time_sec = os::elapsedTime(); 1085 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1086 1087 the_task->do_marking_step(mark_step_duration_ms, 1088 true /* do_termination */, 1089 false /* is_serial*/); 1090 1091 double end_time_sec = os::elapsedTime(); 1092 double end_vtime_sec = os::elapsedVTime(); 1093 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1094 double elapsed_time_sec = end_time_sec - start_time_sec; 1095 _cm->clear_has_overflown(); 1096 1097 bool ret = _cm->do_yield_check(worker_id); 1098 1099 jlong sleep_time_ms; 1100 if (!_cm->has_aborted() && the_task->has_aborted()) { 1101 sleep_time_ms = 1102 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1103 ConcurrentGCThread::stsLeave(); 1104 os::sleep(Thread::current(), sleep_time_ms, false); 1105 ConcurrentGCThread::stsJoin(); 1106 } 1107 double end_time2_sec = os::elapsedTime(); 1108 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1109 1110 #if 0 1111 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1112 "overhead %1.4lf", 1113 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1114 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1115 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1116 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1117 #endif 1118 } while (!_cm->has_aborted() && the_task->has_aborted()); 1119 } 1120 the_task->record_end_time(); 1121 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1122 1123 ConcurrentGCThread::stsLeave(); 1124 1125 double end_vtime = os::elapsedVTime(); 1126 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1127 } 1128 1129 CMConcurrentMarkingTask(ConcurrentMark* cm, 1130 ConcurrentMarkThread* cmt) : 1131 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1132 1133 ~CMConcurrentMarkingTask() { } 1134 }; 1135 1136 // Calculates the number of active workers for a concurrent 1137 // phase. 1138 uint ConcurrentMark::calc_parallel_marking_threads() { 1139 if (G1CollectedHeap::use_parallel_gc_threads()) { 1140 uint n_conc_workers = 0; 1141 if (!UseDynamicNumberOfGCThreads || 1142 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1143 !ForceDynamicNumberOfGCThreads)) { 1144 n_conc_workers = max_parallel_marking_threads(); 1145 } else { 1146 n_conc_workers = 1147 AdaptiveSizePolicy::calc_default_active_workers( 1148 max_parallel_marking_threads(), 1149 1, /* Minimum workers */ 1150 parallel_marking_threads(), 1151 Threads::number_of_non_daemon_threads()); 1152 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1153 // that scaling has already gone into "_max_parallel_marking_threads". 1154 } 1155 assert(n_conc_workers > 0, "Always need at least 1"); 1156 return n_conc_workers; 1157 } 1158 // If we are not running with any parallel GC threads we will not 1159 // have spawned any marking threads either. Hence the number of 1160 // concurrent workers should be 0. 1161 return 0; 1162 } 1163 1164 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1165 // Currently, only survivors can be root regions. 1166 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1167 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1168 1169 const uintx interval = PrefetchScanIntervalInBytes; 1170 HeapWord* curr = hr->bottom(); 1171 const HeapWord* end = hr->top(); 1172 while (curr < end) { 1173 Prefetch::read(curr, interval); 1174 oop obj = oop(curr); 1175 int size = obj->oop_iterate(&cl); 1176 assert(size == obj->size(), "sanity"); 1177 curr += size; 1178 } 1179 } 1180 1181 class CMRootRegionScanTask : public AbstractGangTask { 1182 private: 1183 ConcurrentMark* _cm; 1184 1185 public: 1186 CMRootRegionScanTask(ConcurrentMark* cm) : 1187 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1188 1189 void work(uint worker_id) { 1190 assert(Thread::current()->is_ConcurrentGC_thread(), 1191 "this should only be done by a conc GC thread"); 1192 1193 CMRootRegions* root_regions = _cm->root_regions(); 1194 HeapRegion* hr = root_regions->claim_next(); 1195 while (hr != NULL) { 1196 _cm->scanRootRegion(hr, worker_id); 1197 hr = root_regions->claim_next(); 1198 } 1199 } 1200 }; 1201 1202 void ConcurrentMark::scanRootRegions() { 1203 // scan_in_progress() will have been set to true only if there was 1204 // at least one root region to scan. So, if it's false, we 1205 // should not attempt to do any further work. 1206 if (root_regions()->scan_in_progress()) { 1207 _parallel_marking_threads = calc_parallel_marking_threads(); 1208 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1209 "Maximum number of marking threads exceeded"); 1210 uint active_workers = MAX2(1U, parallel_marking_threads()); 1211 1212 CMRootRegionScanTask task(this); 1213 if (use_parallel_marking_threads()) { 1214 _parallel_workers->set_active_workers((int) active_workers); 1215 _parallel_workers->run_task(&task); 1216 } else { 1217 task.work(0); 1218 } 1219 1220 // It's possible that has_aborted() is true here without actually 1221 // aborting the survivor scan earlier. This is OK as it's 1222 // mainly used for sanity checking. 1223 root_regions()->scan_finished(); 1224 } 1225 } 1226 1227 void ConcurrentMark::markFromRoots() { 1228 // we might be tempted to assert that: 1229 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1230 // "inconsistent argument?"); 1231 // However that wouldn't be right, because it's possible that 1232 // a safepoint is indeed in progress as a younger generation 1233 // stop-the-world GC happens even as we mark in this generation. 1234 1235 _restart_for_overflow = false; 1236 force_overflow_conc()->init(); 1237 1238 // _g1h has _n_par_threads 1239 _parallel_marking_threads = calc_parallel_marking_threads(); 1240 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1241 "Maximum number of marking threads exceeded"); 1242 1243 uint active_workers = MAX2(1U, parallel_marking_threads()); 1244 1245 // Parallel task terminator is set in "set_concurrency_and_phase()" 1246 set_concurrency_and_phase(active_workers, true /* concurrent */); 1247 1248 CMConcurrentMarkingTask markingTask(this, cmThread()); 1249 if (use_parallel_marking_threads()) { 1250 _parallel_workers->set_active_workers((int)active_workers); 1251 // Don't set _n_par_threads because it affects MT in proceess_strong_roots() 1252 // and the decisions on that MT processing is made elsewhere. 1253 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1254 _parallel_workers->run_task(&markingTask); 1255 } else { 1256 markingTask.work(0); 1257 } 1258 print_stats(); 1259 } 1260 1261 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1262 // world is stopped at this checkpoint 1263 assert(SafepointSynchronize::is_at_safepoint(), 1264 "world should be stopped"); 1265 1266 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1267 1268 // If a full collection has happened, we shouldn't do this. 1269 if (has_aborted()) { 1270 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1271 return; 1272 } 1273 1274 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1275 1276 if (VerifyDuringGC) { 1277 HandleMark hm; // handle scope 1278 Universe::heap()->prepare_for_verify(); 1279 Universe::verify(VerifyOption_G1UsePrevMarking, 1280 " VerifyDuringGC:(before)"); 1281 } 1282 1283 G1CollectorPolicy* g1p = g1h->g1_policy(); 1284 g1p->record_concurrent_mark_remark_start(); 1285 1286 double start = os::elapsedTime(); 1287 1288 checkpointRootsFinalWork(); 1289 1290 double mark_work_end = os::elapsedTime(); 1291 1292 weakRefsWork(clear_all_soft_refs); 1293 1294 if (has_overflown()) { 1295 // Oops. We overflowed. Restart concurrent marking. 1296 _restart_for_overflow = true; 1297 if (G1TraceMarkStackOverflow) { 1298 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1299 } 1300 1301 // Verify the heap w.r.t. the previous marking bitmap. 1302 if (VerifyDuringGC) { 1303 HandleMark hm; // handle scope 1304 Universe::heap()->prepare_for_verify(); 1305 Universe::verify(VerifyOption_G1UsePrevMarking, 1306 " VerifyDuringGC:(overflow)"); 1307 } 1308 1309 // Clear the marking state because we will be restarting 1310 // marking due to overflowing the global mark stack. 1311 reset_marking_state(); 1312 } else { 1313 // Aggregate the per-task counting data that we have accumulated 1314 // while marking. 1315 aggregate_count_data(); 1316 1317 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1318 // We're done with marking. 1319 // This is the end of the marking cycle, we're expected all 1320 // threads to have SATB queues with active set to true. 1321 satb_mq_set.set_active_all_threads(false, /* new active value */ 1322 true /* expected_active */); 1323 1324 if (VerifyDuringGC) { 1325 HandleMark hm; // handle scope 1326 Universe::heap()->prepare_for_verify(); 1327 Universe::verify(VerifyOption_G1UseNextMarking, 1328 " VerifyDuringGC:(after)"); 1329 } 1330 assert(!restart_for_overflow(), "sanity"); 1331 // Completely reset the marking state since marking completed 1332 set_non_marking_state(); 1333 } 1334 1335 // Expand the marking stack, if we have to and if we can. 1336 if (_markStack.should_expand()) { 1337 _markStack.expand(); 1338 } 1339 1340 // Statistics 1341 double now = os::elapsedTime(); 1342 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1343 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1344 _remark_times.add((now - start) * 1000.0); 1345 1346 g1p->record_concurrent_mark_remark_end(); 1347 1348 G1CMIsAliveClosure is_alive(g1h); 1349 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1350 } 1351 1352 // Base class of the closures that finalize and verify the 1353 // liveness counting data. 1354 class CMCountDataClosureBase: public HeapRegionClosure { 1355 protected: 1356 G1CollectedHeap* _g1h; 1357 ConcurrentMark* _cm; 1358 CardTableModRefBS* _ct_bs; 1359 1360 BitMap* _region_bm; 1361 BitMap* _card_bm; 1362 1363 // Takes a region that's not empty (i.e., it has at least one 1364 // live object in it and sets its corresponding bit on the region 1365 // bitmap to 1. If the region is "starts humongous" it will also set 1366 // to 1 the bits on the region bitmap that correspond to its 1367 // associated "continues humongous" regions. 1368 void set_bit_for_region(HeapRegion* hr) { 1369 assert(!hr->continuesHumongous(), "should have filtered those out"); 1370 1371 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1372 if (!hr->startsHumongous()) { 1373 // Normal (non-humongous) case: just set the bit. 1374 _region_bm->par_at_put(index, true); 1375 } else { 1376 // Starts humongous case: calculate how many regions are part of 1377 // this humongous region and then set the bit range. 1378 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1379 _region_bm->par_at_put_range(index, end_index, true); 1380 } 1381 } 1382 1383 public: 1384 CMCountDataClosureBase(G1CollectedHeap* g1h, 1385 BitMap* region_bm, BitMap* card_bm): 1386 _g1h(g1h), _cm(g1h->concurrent_mark()), 1387 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 1388 _region_bm(region_bm), _card_bm(card_bm) { } 1389 }; 1390 1391 // Closure that calculates the # live objects per region. Used 1392 // for verification purposes during the cleanup pause. 1393 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1394 CMBitMapRO* _bm; 1395 size_t _region_marked_bytes; 1396 1397 public: 1398 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1399 BitMap* region_bm, BitMap* card_bm) : 1400 CMCountDataClosureBase(g1h, region_bm, card_bm), 1401 _bm(bm), _region_marked_bytes(0) { } 1402 1403 bool doHeapRegion(HeapRegion* hr) { 1404 1405 if (hr->continuesHumongous()) { 1406 // We will ignore these here and process them when their 1407 // associated "starts humongous" region is processed (see 1408 // set_bit_for_heap_region()). Note that we cannot rely on their 1409 // associated "starts humongous" region to have their bit set to 1410 // 1 since, due to the region chunking in the parallel region 1411 // iteration, a "continues humongous" region might be visited 1412 // before its associated "starts humongous". 1413 return false; 1414 } 1415 1416 HeapWord* ntams = hr->next_top_at_mark_start(); 1417 HeapWord* start = hr->bottom(); 1418 1419 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1420 err_msg("Preconditions not met - " 1421 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT, 1422 start, ntams, hr->end())); 1423 1424 // Find the first marked object at or after "start". 1425 start = _bm->getNextMarkedWordAddress(start, ntams); 1426 1427 size_t marked_bytes = 0; 1428 1429 while (start < ntams) { 1430 oop obj = oop(start); 1431 int obj_sz = obj->size(); 1432 HeapWord* obj_end = start + obj_sz; 1433 1434 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1435 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1436 1437 // Note: if we're looking at the last region in heap - obj_end 1438 // could be actually just beyond the end of the heap; end_idx 1439 // will then correspond to a (non-existent) card that is also 1440 // just beyond the heap. 1441 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1442 // end of object is not card aligned - increment to cover 1443 // all the cards spanned by the object 1444 end_idx += 1; 1445 } 1446 1447 // Set the bits in the card BM for the cards spanned by this object. 1448 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1449 1450 // Add the size of this object to the number of marked bytes. 1451 marked_bytes += (size_t)obj_sz * HeapWordSize; 1452 1453 // Find the next marked object after this one. 1454 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1455 } 1456 1457 // Mark the allocated-since-marking portion... 1458 HeapWord* top = hr->top(); 1459 if (ntams < top) { 1460 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1461 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1462 1463 // Note: if we're looking at the last region in heap - top 1464 // could be actually just beyond the end of the heap; end_idx 1465 // will then correspond to a (non-existent) card that is also 1466 // just beyond the heap. 1467 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1468 // end of object is not card aligned - increment to cover 1469 // all the cards spanned by the object 1470 end_idx += 1; 1471 } 1472 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1473 1474 // This definitely means the region has live objects. 1475 set_bit_for_region(hr); 1476 } 1477 1478 // Update the live region bitmap. 1479 if (marked_bytes > 0) { 1480 set_bit_for_region(hr); 1481 } 1482 1483 // Set the marked bytes for the current region so that 1484 // it can be queried by a calling verificiation routine 1485 _region_marked_bytes = marked_bytes; 1486 1487 return false; 1488 } 1489 1490 size_t region_marked_bytes() const { return _region_marked_bytes; } 1491 }; 1492 1493 // Heap region closure used for verifying the counting data 1494 // that was accumulated concurrently and aggregated during 1495 // the remark pause. This closure is applied to the heap 1496 // regions during the STW cleanup pause. 1497 1498 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1499 G1CollectedHeap* _g1h; 1500 ConcurrentMark* _cm; 1501 CalcLiveObjectsClosure _calc_cl; 1502 BitMap* _region_bm; // Region BM to be verified 1503 BitMap* _card_bm; // Card BM to be verified 1504 bool _verbose; // verbose output? 1505 1506 BitMap* _exp_region_bm; // Expected Region BM values 1507 BitMap* _exp_card_bm; // Expected card BM values 1508 1509 int _failures; 1510 1511 public: 1512 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1513 BitMap* region_bm, 1514 BitMap* card_bm, 1515 BitMap* exp_region_bm, 1516 BitMap* exp_card_bm, 1517 bool verbose) : 1518 _g1h(g1h), _cm(g1h->concurrent_mark()), 1519 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1520 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1521 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1522 _failures(0) { } 1523 1524 int failures() const { return _failures; } 1525 1526 bool doHeapRegion(HeapRegion* hr) { 1527 if (hr->continuesHumongous()) { 1528 // We will ignore these here and process them when their 1529 // associated "starts humongous" region is processed (see 1530 // set_bit_for_heap_region()). Note that we cannot rely on their 1531 // associated "starts humongous" region to have their bit set to 1532 // 1 since, due to the region chunking in the parallel region 1533 // iteration, a "continues humongous" region might be visited 1534 // before its associated "starts humongous". 1535 return false; 1536 } 1537 1538 int failures = 0; 1539 1540 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1541 // this region and set the corresponding bits in the expected region 1542 // and card bitmaps. 1543 bool res = _calc_cl.doHeapRegion(hr); 1544 assert(res == false, "should be continuing"); 1545 1546 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1547 Mutex::_no_safepoint_check_flag); 1548 1549 // Verify the marked bytes for this region. 1550 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1551 size_t act_marked_bytes = hr->next_marked_bytes(); 1552 1553 // We're not OK if expected marked bytes > actual marked bytes. It means 1554 // we have missed accounting some objects during the actual marking. 1555 if (exp_marked_bytes > act_marked_bytes) { 1556 if (_verbose) { 1557 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1558 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1559 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1560 } 1561 failures += 1; 1562 } 1563 1564 // Verify the bit, for this region, in the actual and expected 1565 // (which was just calculated) region bit maps. 1566 // We're not OK if the bit in the calculated expected region 1567 // bitmap is set and the bit in the actual region bitmap is not. 1568 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1569 1570 bool expected = _exp_region_bm->at(index); 1571 bool actual = _region_bm->at(index); 1572 if (expected && !actual) { 1573 if (_verbose) { 1574 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1575 "expected: %s, actual: %s", 1576 hr->hrs_index(), 1577 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1578 } 1579 failures += 1; 1580 } 1581 1582 // Verify that the card bit maps for the cards spanned by the current 1583 // region match. We have an error if we have a set bit in the expected 1584 // bit map and the corresponding bit in the actual bitmap is not set. 1585 1586 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1587 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1588 1589 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1590 expected = _exp_card_bm->at(i); 1591 actual = _card_bm->at(i); 1592 1593 if (expected && !actual) { 1594 if (_verbose) { 1595 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1596 "expected: %s, actual: %s", 1597 hr->hrs_index(), i, 1598 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1599 } 1600 failures += 1; 1601 } 1602 } 1603 1604 if (failures > 0 && _verbose) { 1605 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1606 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1607 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(), 1608 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1609 } 1610 1611 _failures += failures; 1612 1613 // We could stop iteration over the heap when we 1614 // find the first violating region by returning true. 1615 return false; 1616 } 1617 }; 1618 1619 1620 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1621 protected: 1622 G1CollectedHeap* _g1h; 1623 ConcurrentMark* _cm; 1624 BitMap* _actual_region_bm; 1625 BitMap* _actual_card_bm; 1626 1627 uint _n_workers; 1628 1629 BitMap* _expected_region_bm; 1630 BitMap* _expected_card_bm; 1631 1632 int _failures; 1633 bool _verbose; 1634 1635 public: 1636 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1637 BitMap* region_bm, BitMap* card_bm, 1638 BitMap* expected_region_bm, BitMap* expected_card_bm) 1639 : AbstractGangTask("G1 verify final counting"), 1640 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1641 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1642 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1643 _failures(0), _verbose(false), 1644 _n_workers(0) { 1645 assert(VerifyDuringGC, "don't call this otherwise"); 1646 1647 // Use the value already set as the number of active threads 1648 // in the call to run_task(). 1649 if (G1CollectedHeap::use_parallel_gc_threads()) { 1650 assert( _g1h->workers()->active_workers() > 0, 1651 "Should have been previously set"); 1652 _n_workers = _g1h->workers()->active_workers(); 1653 } else { 1654 _n_workers = 1; 1655 } 1656 1657 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1658 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1659 1660 _verbose = _cm->verbose_medium(); 1661 } 1662 1663 void work(uint worker_id) { 1664 assert(worker_id < _n_workers, "invariant"); 1665 1666 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1667 _actual_region_bm, _actual_card_bm, 1668 _expected_region_bm, 1669 _expected_card_bm, 1670 _verbose); 1671 1672 if (G1CollectedHeap::use_parallel_gc_threads()) { 1673 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1674 worker_id, 1675 _n_workers, 1676 HeapRegion::VerifyCountClaimValue); 1677 } else { 1678 _g1h->heap_region_iterate(&verify_cl); 1679 } 1680 1681 Atomic::add(verify_cl.failures(), &_failures); 1682 } 1683 1684 int failures() const { return _failures; } 1685 }; 1686 1687 // Closure that finalizes the liveness counting data. 1688 // Used during the cleanup pause. 1689 // Sets the bits corresponding to the interval [NTAMS, top] 1690 // (which contains the implicitly live objects) in the 1691 // card liveness bitmap. Also sets the bit for each region, 1692 // containing live data, in the region liveness bitmap. 1693 1694 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1695 public: 1696 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1697 BitMap* region_bm, 1698 BitMap* card_bm) : 1699 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1700 1701 bool doHeapRegion(HeapRegion* hr) { 1702 1703 if (hr->continuesHumongous()) { 1704 // We will ignore these here and process them when their 1705 // associated "starts humongous" region is processed (see 1706 // set_bit_for_heap_region()). Note that we cannot rely on their 1707 // associated "starts humongous" region to have their bit set to 1708 // 1 since, due to the region chunking in the parallel region 1709 // iteration, a "continues humongous" region might be visited 1710 // before its associated "starts humongous". 1711 return false; 1712 } 1713 1714 HeapWord* ntams = hr->next_top_at_mark_start(); 1715 HeapWord* top = hr->top(); 1716 1717 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1718 1719 // Mark the allocated-since-marking portion... 1720 if (ntams < top) { 1721 // This definitely means the region has live objects. 1722 set_bit_for_region(hr); 1723 1724 // Now set the bits in the card bitmap for [ntams, top) 1725 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1726 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1727 1728 // Note: if we're looking at the last region in heap - top 1729 // could be actually just beyond the end of the heap; end_idx 1730 // will then correspond to a (non-existent) card that is also 1731 // just beyond the heap. 1732 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1733 // end of object is not card aligned - increment to cover 1734 // all the cards spanned by the object 1735 end_idx += 1; 1736 } 1737 1738 assert(end_idx <= _card_bm->size(), 1739 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1740 end_idx, _card_bm->size())); 1741 assert(start_idx < _card_bm->size(), 1742 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1743 start_idx, _card_bm->size())); 1744 1745 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1746 } 1747 1748 // Set the bit for the region if it contains live data 1749 if (hr->next_marked_bytes() > 0) { 1750 set_bit_for_region(hr); 1751 } 1752 1753 return false; 1754 } 1755 }; 1756 1757 class G1ParFinalCountTask: public AbstractGangTask { 1758 protected: 1759 G1CollectedHeap* _g1h; 1760 ConcurrentMark* _cm; 1761 BitMap* _actual_region_bm; 1762 BitMap* _actual_card_bm; 1763 1764 uint _n_workers; 1765 1766 public: 1767 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1768 : AbstractGangTask("G1 final counting"), 1769 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1770 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1771 _n_workers(0) { 1772 // Use the value already set as the number of active threads 1773 // in the call to run_task(). 1774 if (G1CollectedHeap::use_parallel_gc_threads()) { 1775 assert( _g1h->workers()->active_workers() > 0, 1776 "Should have been previously set"); 1777 _n_workers = _g1h->workers()->active_workers(); 1778 } else { 1779 _n_workers = 1; 1780 } 1781 } 1782 1783 void work(uint worker_id) { 1784 assert(worker_id < _n_workers, "invariant"); 1785 1786 FinalCountDataUpdateClosure final_update_cl(_g1h, 1787 _actual_region_bm, 1788 _actual_card_bm); 1789 1790 if (G1CollectedHeap::use_parallel_gc_threads()) { 1791 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1792 worker_id, 1793 _n_workers, 1794 HeapRegion::FinalCountClaimValue); 1795 } else { 1796 _g1h->heap_region_iterate(&final_update_cl); 1797 } 1798 } 1799 }; 1800 1801 class G1ParNoteEndTask; 1802 1803 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1804 G1CollectedHeap* _g1; 1805 int _worker_num; 1806 size_t _max_live_bytes; 1807 uint _regions_claimed; 1808 size_t _freed_bytes; 1809 FreeRegionList* _local_cleanup_list; 1810 OldRegionSet* _old_proxy_set; 1811 HumongousRegionSet* _humongous_proxy_set; 1812 HRRSCleanupTask* _hrrs_cleanup_task; 1813 double _claimed_region_time; 1814 double _max_region_time; 1815 1816 public: 1817 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1818 int worker_num, 1819 FreeRegionList* local_cleanup_list, 1820 OldRegionSet* old_proxy_set, 1821 HumongousRegionSet* humongous_proxy_set, 1822 HRRSCleanupTask* hrrs_cleanup_task) : 1823 _g1(g1), _worker_num(worker_num), 1824 _max_live_bytes(0), _regions_claimed(0), 1825 _freed_bytes(0), 1826 _claimed_region_time(0.0), _max_region_time(0.0), 1827 _local_cleanup_list(local_cleanup_list), 1828 _old_proxy_set(old_proxy_set), 1829 _humongous_proxy_set(humongous_proxy_set), 1830 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1831 1832 size_t freed_bytes() { return _freed_bytes; } 1833 1834 bool doHeapRegion(HeapRegion *hr) { 1835 if (hr->continuesHumongous()) { 1836 return false; 1837 } 1838 // We use a claim value of zero here because all regions 1839 // were claimed with value 1 in the FinalCount task. 1840 _g1->reset_gc_time_stamps(hr); 1841 double start = os::elapsedTime(); 1842 _regions_claimed++; 1843 hr->note_end_of_marking(); 1844 _max_live_bytes += hr->max_live_bytes(); 1845 _g1->free_region_if_empty(hr, 1846 &_freed_bytes, 1847 _local_cleanup_list, 1848 _old_proxy_set, 1849 _humongous_proxy_set, 1850 _hrrs_cleanup_task, 1851 true /* par */); 1852 double region_time = (os::elapsedTime() - start); 1853 _claimed_region_time += region_time; 1854 if (region_time > _max_region_time) { 1855 _max_region_time = region_time; 1856 } 1857 return false; 1858 } 1859 1860 size_t max_live_bytes() { return _max_live_bytes; } 1861 uint regions_claimed() { return _regions_claimed; } 1862 double claimed_region_time_sec() { return _claimed_region_time; } 1863 double max_region_time_sec() { return _max_region_time; } 1864 }; 1865 1866 class G1ParNoteEndTask: public AbstractGangTask { 1867 friend class G1NoteEndOfConcMarkClosure; 1868 1869 protected: 1870 G1CollectedHeap* _g1h; 1871 size_t _max_live_bytes; 1872 size_t _freed_bytes; 1873 FreeRegionList* _cleanup_list; 1874 1875 public: 1876 G1ParNoteEndTask(G1CollectedHeap* g1h, 1877 FreeRegionList* cleanup_list) : 1878 AbstractGangTask("G1 note end"), _g1h(g1h), 1879 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1880 1881 void work(uint worker_id) { 1882 double start = os::elapsedTime(); 1883 FreeRegionList local_cleanup_list("Local Cleanup List"); 1884 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set"); 1885 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1886 HRRSCleanupTask hrrs_cleanup_task; 1887 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list, 1888 &old_proxy_set, 1889 &humongous_proxy_set, 1890 &hrrs_cleanup_task); 1891 if (G1CollectedHeap::use_parallel_gc_threads()) { 1892 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1893 _g1h->workers()->active_workers(), 1894 HeapRegion::NoteEndClaimValue); 1895 } else { 1896 _g1h->heap_region_iterate(&g1_note_end); 1897 } 1898 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1899 1900 // Now update the lists 1901 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1902 NULL /* free_list */, 1903 &old_proxy_set, 1904 &humongous_proxy_set, 1905 true /* par */); 1906 { 1907 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1908 _max_live_bytes += g1_note_end.max_live_bytes(); 1909 _freed_bytes += g1_note_end.freed_bytes(); 1910 1911 // If we iterate over the global cleanup list at the end of 1912 // cleanup to do this printing we will not guarantee to only 1913 // generate output for the newly-reclaimed regions (the list 1914 // might not be empty at the beginning of cleanup; we might 1915 // still be working on its previous contents). So we do the 1916 // printing here, before we append the new regions to the global 1917 // cleanup list. 1918 1919 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1920 if (hr_printer->is_active()) { 1921 HeapRegionLinkedListIterator iter(&local_cleanup_list); 1922 while (iter.more_available()) { 1923 HeapRegion* hr = iter.get_next(); 1924 hr_printer->cleanup(hr); 1925 } 1926 } 1927 1928 _cleanup_list->add_as_tail(&local_cleanup_list); 1929 assert(local_cleanup_list.is_empty(), "post-condition"); 1930 1931 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1932 } 1933 } 1934 size_t max_live_bytes() { return _max_live_bytes; } 1935 size_t freed_bytes() { return _freed_bytes; } 1936 }; 1937 1938 class G1ParScrubRemSetTask: public AbstractGangTask { 1939 protected: 1940 G1RemSet* _g1rs; 1941 BitMap* _region_bm; 1942 BitMap* _card_bm; 1943 public: 1944 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1945 BitMap* region_bm, BitMap* card_bm) : 1946 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1947 _region_bm(region_bm), _card_bm(card_bm) { } 1948 1949 void work(uint worker_id) { 1950 if (G1CollectedHeap::use_parallel_gc_threads()) { 1951 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1952 HeapRegion::ScrubRemSetClaimValue); 1953 } else { 1954 _g1rs->scrub(_region_bm, _card_bm); 1955 } 1956 } 1957 1958 }; 1959 1960 void ConcurrentMark::cleanup() { 1961 // world is stopped at this checkpoint 1962 assert(SafepointSynchronize::is_at_safepoint(), 1963 "world should be stopped"); 1964 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1965 1966 // If a full collection has happened, we shouldn't do this. 1967 if (has_aborted()) { 1968 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1969 return; 1970 } 1971 1972 HRSPhaseSetter x(HRSPhaseCleanup); 1973 g1h->verify_region_sets_optional(); 1974 1975 if (VerifyDuringGC) { 1976 HandleMark hm; // handle scope 1977 Universe::heap()->prepare_for_verify(); 1978 Universe::verify(VerifyOption_G1UsePrevMarking, 1979 " VerifyDuringGC:(before)"); 1980 } 1981 1982 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1983 g1p->record_concurrent_mark_cleanup_start(); 1984 1985 double start = os::elapsedTime(); 1986 1987 HeapRegionRemSet::reset_for_cleanup_tasks(); 1988 1989 uint n_workers; 1990 1991 // Do counting once more with the world stopped for good measure. 1992 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1993 1994 if (G1CollectedHeap::use_parallel_gc_threads()) { 1995 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 1996 "sanity check"); 1997 1998 g1h->set_par_threads(); 1999 n_workers = g1h->n_par_threads(); 2000 assert(g1h->n_par_threads() == n_workers, 2001 "Should not have been reset"); 2002 g1h->workers()->run_task(&g1_par_count_task); 2003 // Done with the parallel phase so reset to 0. 2004 g1h->set_par_threads(0); 2005 2006 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 2007 "sanity check"); 2008 } else { 2009 n_workers = 1; 2010 g1_par_count_task.work(0); 2011 } 2012 2013 if (VerifyDuringGC) { 2014 // Verify that the counting data accumulated during marking matches 2015 // that calculated by walking the marking bitmap. 2016 2017 // Bitmaps to hold expected values 2018 BitMap expected_region_bm(_region_bm.size(), false); 2019 BitMap expected_card_bm(_card_bm.size(), false); 2020 2021 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 2022 &_region_bm, 2023 &_card_bm, 2024 &expected_region_bm, 2025 &expected_card_bm); 2026 2027 if (G1CollectedHeap::use_parallel_gc_threads()) { 2028 g1h->set_par_threads((int)n_workers); 2029 g1h->workers()->run_task(&g1_par_verify_task); 2030 // Done with the parallel phase so reset to 0. 2031 g1h->set_par_threads(0); 2032 2033 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 2034 "sanity check"); 2035 } else { 2036 g1_par_verify_task.work(0); 2037 } 2038 2039 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 2040 } 2041 2042 size_t start_used_bytes = g1h->used(); 2043 g1h->set_marking_complete(); 2044 2045 double count_end = os::elapsedTime(); 2046 double this_final_counting_time = (count_end - start); 2047 _total_counting_time += this_final_counting_time; 2048 2049 if (G1PrintRegionLivenessInfo) { 2050 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 2051 _g1h->heap_region_iterate(&cl); 2052 } 2053 2054 // Install newly created mark bitMap as "prev". 2055 swapMarkBitMaps(); 2056 2057 g1h->reset_gc_time_stamp(); 2058 2059 // Note end of marking in all heap regions. 2060 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 2061 if (G1CollectedHeap::use_parallel_gc_threads()) { 2062 g1h->set_par_threads((int)n_workers); 2063 g1h->workers()->run_task(&g1_par_note_end_task); 2064 g1h->set_par_threads(0); 2065 2066 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 2067 "sanity check"); 2068 } else { 2069 g1_par_note_end_task.work(0); 2070 } 2071 g1h->check_gc_time_stamps(); 2072 2073 if (!cleanup_list_is_empty()) { 2074 // The cleanup list is not empty, so we'll have to process it 2075 // concurrently. Notify anyone else that might be wanting free 2076 // regions that there will be more free regions coming soon. 2077 g1h->set_free_regions_coming(); 2078 } 2079 2080 // call below, since it affects the metric by which we sort the heap 2081 // regions. 2082 if (G1ScrubRemSets) { 2083 double rs_scrub_start = os::elapsedTime(); 2084 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 2085 if (G1CollectedHeap::use_parallel_gc_threads()) { 2086 g1h->set_par_threads((int)n_workers); 2087 g1h->workers()->run_task(&g1_par_scrub_rs_task); 2088 g1h->set_par_threads(0); 2089 2090 assert(g1h->check_heap_region_claim_values( 2091 HeapRegion::ScrubRemSetClaimValue), 2092 "sanity check"); 2093 } else { 2094 g1_par_scrub_rs_task.work(0); 2095 } 2096 2097 double rs_scrub_end = os::elapsedTime(); 2098 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 2099 _total_rs_scrub_time += this_rs_scrub_time; 2100 } 2101 2102 // this will also free any regions totally full of garbage objects, 2103 // and sort the regions. 2104 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 2105 2106 // Statistics. 2107 double end = os::elapsedTime(); 2108 _cleanup_times.add((end - start) * 1000.0); 2109 2110 if (G1Log::fine()) { 2111 g1h->print_size_transition(gclog_or_tty, 2112 start_used_bytes, 2113 g1h->used(), 2114 g1h->capacity()); 2115 } 2116 2117 // Clean up will have freed any regions completely full of garbage. 2118 // Update the soft reference policy with the new heap occupancy. 2119 Universe::update_heap_info_at_gc(); 2120 2121 // We need to make this be a "collection" so any collection pause that 2122 // races with it goes around and waits for completeCleanup to finish. 2123 g1h->increment_total_collections(); 2124 2125 // We reclaimed old regions so we should calculate the sizes to make 2126 // sure we update the old gen/space data. 2127 g1h->g1mm()->update_sizes(); 2128 2129 if (VerifyDuringGC) { 2130 HandleMark hm; // handle scope 2131 Universe::heap()->prepare_for_verify(); 2132 Universe::verify(VerifyOption_G1UsePrevMarking, 2133 " VerifyDuringGC:(after)"); 2134 } 2135 2136 g1h->verify_region_sets_optional(); 2137 g1h->trace_heap_after_concurrent_cycle(); 2138 } 2139 2140 void ConcurrentMark::completeCleanup() { 2141 if (has_aborted()) return; 2142 2143 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2144 2145 _cleanup_list.verify_optional(); 2146 FreeRegionList tmp_free_list("Tmp Free List"); 2147 2148 if (G1ConcRegionFreeingVerbose) { 2149 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2150 "cleanup list has %u entries", 2151 _cleanup_list.length()); 2152 } 2153 2154 // Noone else should be accessing the _cleanup_list at this point, 2155 // so it's not necessary to take any locks 2156 while (!_cleanup_list.is_empty()) { 2157 HeapRegion* hr = _cleanup_list.remove_head(); 2158 assert(hr != NULL, "the list was not empty"); 2159 hr->par_clear(); 2160 tmp_free_list.add_as_tail(hr); 2161 2162 // Instead of adding one region at a time to the secondary_free_list, 2163 // we accumulate them in the local list and move them a few at a 2164 // time. This also cuts down on the number of notify_all() calls 2165 // we do during this process. We'll also append the local list when 2166 // _cleanup_list is empty (which means we just removed the last 2167 // region from the _cleanup_list). 2168 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2169 _cleanup_list.is_empty()) { 2170 if (G1ConcRegionFreeingVerbose) { 2171 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2172 "appending %u entries to the secondary_free_list, " 2173 "cleanup list still has %u entries", 2174 tmp_free_list.length(), 2175 _cleanup_list.length()); 2176 } 2177 2178 { 2179 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2180 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 2181 SecondaryFreeList_lock->notify_all(); 2182 } 2183 2184 if (G1StressConcRegionFreeing) { 2185 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2186 os::sleep(Thread::current(), (jlong) 1, false); 2187 } 2188 } 2189 } 2190 } 2191 assert(tmp_free_list.is_empty(), "post-condition"); 2192 } 2193 2194 // Supporting Object and Oop closures for reference discovery 2195 // and processing in during marking 2196 2197 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2198 HeapWord* addr = (HeapWord*)obj; 2199 return addr != NULL && 2200 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2201 } 2202 2203 // 'Keep Alive' oop closure used by both serial parallel reference processing. 2204 // Uses the CMTask associated with a worker thread (for serial reference 2205 // processing the CMTask for worker 0 is used) to preserve (mark) and 2206 // trace referent objects. 2207 // 2208 // Using the CMTask and embedded local queues avoids having the worker 2209 // threads operating on the global mark stack. This reduces the risk 2210 // of overflowing the stack - which we would rather avoid at this late 2211 // state. Also using the tasks' local queues removes the potential 2212 // of the workers interfering with each other that could occur if 2213 // operating on the global stack. 2214 2215 class G1CMKeepAliveAndDrainClosure: public OopClosure { 2216 ConcurrentMark* _cm; 2217 CMTask* _task; 2218 int _ref_counter_limit; 2219 int _ref_counter; 2220 bool _is_serial; 2221 public: 2222 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2223 _cm(cm), _task(task), _is_serial(is_serial), 2224 _ref_counter_limit(G1RefProcDrainInterval) { 2225 assert(_ref_counter_limit > 0, "sanity"); 2226 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2227 _ref_counter = _ref_counter_limit; 2228 } 2229 2230 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2231 virtual void do_oop( oop* p) { do_oop_work(p); } 2232 2233 template <class T> void do_oop_work(T* p) { 2234 if (!_cm->has_overflown()) { 2235 oop obj = oopDesc::load_decode_heap_oop(p); 2236 if (_cm->verbose_high()) { 2237 gclog_or_tty->print_cr("\t[%u] we're looking at location " 2238 "*"PTR_FORMAT" = "PTR_FORMAT, 2239 _task->worker_id(), p, (void*) obj); 2240 } 2241 2242 _task->deal_with_reference(obj); 2243 _ref_counter--; 2244 2245 if (_ref_counter == 0) { 2246 // We have dealt with _ref_counter_limit references, pushing them 2247 // and objects reachable from them on to the local stack (and 2248 // possibly the global stack). Call CMTask::do_marking_step() to 2249 // process these entries. 2250 // 2251 // We call CMTask::do_marking_step() in a loop, which we'll exit if 2252 // there's nothing more to do (i.e. we're done with the entries that 2253 // were pushed as a result of the CMTask::deal_with_reference() calls 2254 // above) or we overflow. 2255 // 2256 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2257 // flag while there may still be some work to do. (See the comment at 2258 // the beginning of CMTask::do_marking_step() for those conditions - 2259 // one of which is reaching the specified time target.) It is only 2260 // when CMTask::do_marking_step() returns without setting the 2261 // has_aborted() flag that the marking step has completed. 2262 do { 2263 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2264 _task->do_marking_step(mark_step_duration_ms, 2265 false /* do_termination */, 2266 _is_serial); 2267 } while (_task->has_aborted() && !_cm->has_overflown()); 2268 _ref_counter = _ref_counter_limit; 2269 } 2270 } else { 2271 if (_cm->verbose_high()) { 2272 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id()); 2273 } 2274 } 2275 } 2276 }; 2277 2278 // 'Drain' oop closure used by both serial and parallel reference processing. 2279 // Uses the CMTask associated with a given worker thread (for serial 2280 // reference processing the CMtask for worker 0 is used). Calls the 2281 // do_marking_step routine, with an unbelievably large timeout value, 2282 // to drain the marking data structures of the remaining entries 2283 // added by the 'keep alive' oop closure above. 2284 2285 class G1CMDrainMarkingStackClosure: public VoidClosure { 2286 ConcurrentMark* _cm; 2287 CMTask* _task; 2288 bool _is_serial; 2289 public: 2290 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2291 _cm(cm), _task(task), _is_serial(is_serial) { 2292 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2293 } 2294 2295 void do_void() { 2296 do { 2297 if (_cm->verbose_high()) { 2298 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s", 2299 _task->worker_id(), BOOL_TO_STR(_is_serial)); 2300 } 2301 2302 // We call CMTask::do_marking_step() to completely drain the local 2303 // and global marking stacks of entries pushed by the 'keep alive' 2304 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2305 // 2306 // CMTask::do_marking_step() is called in a loop, which we'll exit 2307 // if there's nothing more to do (i.e. we'completely drained the 2308 // entries that were pushed as a a result of applying the 'keep alive' 2309 // closure to the entries on the discovered ref lists) or we overflow 2310 // the global marking stack. 2311 // 2312 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2313 // flag while there may still be some work to do. (See the comment at 2314 // the beginning of CMTask::do_marking_step() for those conditions - 2315 // one of which is reaching the specified time target.) It is only 2316 // when CMTask::do_marking_step() returns without setting the 2317 // has_aborted() flag that the marking step has completed. 2318 2319 _task->do_marking_step(1000000000.0 /* something very large */, 2320 true /* do_termination */, 2321 _is_serial); 2322 } while (_task->has_aborted() && !_cm->has_overflown()); 2323 } 2324 }; 2325 2326 // Implementation of AbstractRefProcTaskExecutor for parallel 2327 // reference processing at the end of G1 concurrent marking 2328 2329 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2330 private: 2331 G1CollectedHeap* _g1h; 2332 ConcurrentMark* _cm; 2333 WorkGang* _workers; 2334 int _active_workers; 2335 2336 public: 2337 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2338 ConcurrentMark* cm, 2339 WorkGang* workers, 2340 int n_workers) : 2341 _g1h(g1h), _cm(cm), 2342 _workers(workers), _active_workers(n_workers) { } 2343 2344 // Executes the given task using concurrent marking worker threads. 2345 virtual void execute(ProcessTask& task); 2346 virtual void execute(EnqueueTask& task); 2347 }; 2348 2349 class G1CMRefProcTaskProxy: public AbstractGangTask { 2350 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2351 ProcessTask& _proc_task; 2352 G1CollectedHeap* _g1h; 2353 ConcurrentMark* _cm; 2354 2355 public: 2356 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2357 G1CollectedHeap* g1h, 2358 ConcurrentMark* cm) : 2359 AbstractGangTask("Process reference objects in parallel"), 2360 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2361 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2362 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2363 } 2364 2365 virtual void work(uint worker_id) { 2366 CMTask* task = _cm->task(worker_id); 2367 G1CMIsAliveClosure g1_is_alive(_g1h); 2368 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2369 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2370 2371 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2372 } 2373 }; 2374 2375 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2376 assert(_workers != NULL, "Need parallel worker threads."); 2377 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2378 2379 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2380 2381 // We need to reset the concurrency level before each 2382 // proxy task execution, so that the termination protocol 2383 // and overflow handling in CMTask::do_marking_step() knows 2384 // how many workers to wait for. 2385 _cm->set_concurrency(_active_workers); 2386 _g1h->set_par_threads(_active_workers); 2387 _workers->run_task(&proc_task_proxy); 2388 _g1h->set_par_threads(0); 2389 } 2390 2391 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2392 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2393 EnqueueTask& _enq_task; 2394 2395 public: 2396 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2397 AbstractGangTask("Enqueue reference objects in parallel"), 2398 _enq_task(enq_task) { } 2399 2400 virtual void work(uint worker_id) { 2401 _enq_task.work(worker_id); 2402 } 2403 }; 2404 2405 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2406 assert(_workers != NULL, "Need parallel worker threads."); 2407 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2408 2409 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2410 2411 // Not strictly necessary but... 2412 // 2413 // We need to reset the concurrency level before each 2414 // proxy task execution, so that the termination protocol 2415 // and overflow handling in CMTask::do_marking_step() knows 2416 // how many workers to wait for. 2417 _cm->set_concurrency(_active_workers); 2418 _g1h->set_par_threads(_active_workers); 2419 _workers->run_task(&enq_task_proxy); 2420 _g1h->set_par_threads(0); 2421 } 2422 2423 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2424 if (has_overflown()) { 2425 // Skip processing the discovered references if we have 2426 // overflown the global marking stack. Reference objects 2427 // only get discovered once so it is OK to not 2428 // de-populate the discovered reference lists. We could have, 2429 // but the only benefit would be that, when marking restarts, 2430 // less reference objects are discovered. 2431 return; 2432 } 2433 2434 ResourceMark rm; 2435 HandleMark hm; 2436 2437 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2438 2439 // Is alive closure. 2440 G1CMIsAliveClosure g1_is_alive(g1h); 2441 2442 // Inner scope to exclude the cleaning of the string and symbol 2443 // tables from the displayed time. 2444 { 2445 if (G1Log::finer()) { 2446 gclog_or_tty->put(' '); 2447 } 2448 GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm()); 2449 2450 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2451 2452 // See the comment in G1CollectedHeap::ref_processing_init() 2453 // about how reference processing currently works in G1. 2454 2455 // Set the soft reference policy 2456 rp->setup_policy(clear_all_soft_refs); 2457 assert(_markStack.isEmpty(), "mark stack should be empty"); 2458 2459 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2460 // in serial reference processing. Note these closures are also 2461 // used for serially processing (by the the current thread) the 2462 // JNI references during parallel reference processing. 2463 // 2464 // These closures do not need to synchronize with the worker 2465 // threads involved in parallel reference processing as these 2466 // instances are executed serially by the current thread (e.g. 2467 // reference processing is not multi-threaded and is thus 2468 // performed by the current thread instead of a gang worker). 2469 // 2470 // The gang tasks involved in parallel reference procssing create 2471 // their own instances of these closures, which do their own 2472 // synchronization among themselves. 2473 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2474 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2475 2476 // We need at least one active thread. If reference processing 2477 // is not multi-threaded we use the current (VMThread) thread, 2478 // otherwise we use the work gang from the G1CollectedHeap and 2479 // we utilize all the worker threads we can. 2480 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL; 2481 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2482 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2483 2484 // Parallel processing task executor. 2485 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2486 g1h->workers(), active_workers); 2487 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2488 2489 // Set the concurrency level. The phase was already set prior to 2490 // executing the remark task. 2491 set_concurrency(active_workers); 2492 2493 // Set the degree of MT processing here. If the discovery was done MT, 2494 // the number of threads involved during discovery could differ from 2495 // the number of active workers. This is OK as long as the discovered 2496 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2497 rp->set_active_mt_degree(active_workers); 2498 2499 // Process the weak references. 2500 const ReferenceProcessorStats& stats = 2501 rp->process_discovered_references(&g1_is_alive, 2502 &g1_keep_alive, 2503 &g1_drain_mark_stack, 2504 executor, 2505 g1h->gc_timer_cm()); 2506 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2507 2508 // The do_oop work routines of the keep_alive and drain_marking_stack 2509 // oop closures will set the has_overflown flag if we overflow the 2510 // global marking stack. 2511 2512 assert(_markStack.overflow() || _markStack.isEmpty(), 2513 "mark stack should be empty (unless it overflowed)"); 2514 2515 if (_markStack.overflow()) { 2516 // This should have been done already when we tried to push an 2517 // entry on to the global mark stack. But let's do it again. 2518 set_has_overflown(); 2519 } 2520 2521 assert(rp->num_q() == active_workers, "why not"); 2522 2523 rp->enqueue_discovered_references(executor); 2524 2525 rp->verify_no_references_recorded(); 2526 assert(!rp->discovery_enabled(), "Post condition"); 2527 } 2528 2529 // Now clean up stale oops in StringTable 2530 StringTable::unlink(&g1_is_alive); 2531 // Clean up unreferenced symbols in symbol table. 2532 SymbolTable::unlink(); 2533 } 2534 2535 void ConcurrentMark::swapMarkBitMaps() { 2536 CMBitMapRO* temp = _prevMarkBitMap; 2537 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2538 _nextMarkBitMap = (CMBitMap*) temp; 2539 } 2540 2541 class CMRemarkTask: public AbstractGangTask { 2542 private: 2543 ConcurrentMark* _cm; 2544 bool _is_serial; 2545 public: 2546 void work(uint worker_id) { 2547 // Since all available tasks are actually started, we should 2548 // only proceed if we're supposed to be actived. 2549 if (worker_id < _cm->active_tasks()) { 2550 CMTask* task = _cm->task(worker_id); 2551 task->record_start_time(); 2552 do { 2553 task->do_marking_step(1000000000.0 /* something very large */, 2554 true /* do_termination */, 2555 _is_serial); 2556 } while (task->has_aborted() && !_cm->has_overflown()); 2557 // If we overflow, then we do not want to restart. We instead 2558 // want to abort remark and do concurrent marking again. 2559 task->record_end_time(); 2560 } 2561 } 2562 2563 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) : 2564 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) { 2565 _cm->terminator()->reset_for_reuse(active_workers); 2566 } 2567 }; 2568 2569 void ConcurrentMark::checkpointRootsFinalWork() { 2570 ResourceMark rm; 2571 HandleMark hm; 2572 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2573 2574 g1h->ensure_parsability(false); 2575 2576 if (G1CollectedHeap::use_parallel_gc_threads()) { 2577 G1CollectedHeap::StrongRootsScope srs(g1h); 2578 // this is remark, so we'll use up all active threads 2579 uint active_workers = g1h->workers()->active_workers(); 2580 if (active_workers == 0) { 2581 assert(active_workers > 0, "Should have been set earlier"); 2582 active_workers = (uint) ParallelGCThreads; 2583 g1h->workers()->set_active_workers(active_workers); 2584 } 2585 set_concurrency_and_phase(active_workers, false /* concurrent */); 2586 // Leave _parallel_marking_threads at it's 2587 // value originally calculated in the ConcurrentMark 2588 // constructor and pass values of the active workers 2589 // through the gang in the task. 2590 2591 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */); 2592 // We will start all available threads, even if we decide that the 2593 // active_workers will be fewer. The extra ones will just bail out 2594 // immediately. 2595 g1h->set_par_threads(active_workers); 2596 g1h->workers()->run_task(&remarkTask); 2597 g1h->set_par_threads(0); 2598 } else { 2599 G1CollectedHeap::StrongRootsScope srs(g1h); 2600 uint active_workers = 1; 2601 set_concurrency_and_phase(active_workers, false /* concurrent */); 2602 2603 // Note - if there's no work gang then the VMThread will be 2604 // the thread to execute the remark - serially. We have 2605 // to pass true for the is_serial parameter so that 2606 // CMTask::do_marking_step() doesn't enter the sync 2607 // barriers in the event of an overflow. Doing so will 2608 // cause an assert that the current thread is not a 2609 // concurrent GC thread. 2610 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/); 2611 remarkTask.work(0); 2612 } 2613 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2614 guarantee(has_overflown() || 2615 satb_mq_set.completed_buffers_num() == 0, 2616 err_msg("Invariant: has_overflown = %s, num buffers = %d", 2617 BOOL_TO_STR(has_overflown()), 2618 satb_mq_set.completed_buffers_num())); 2619 2620 print_stats(); 2621 } 2622 2623 #ifndef PRODUCT 2624 2625 class PrintReachableOopClosure: public OopClosure { 2626 private: 2627 G1CollectedHeap* _g1h; 2628 outputStream* _out; 2629 VerifyOption _vo; 2630 bool _all; 2631 2632 public: 2633 PrintReachableOopClosure(outputStream* out, 2634 VerifyOption vo, 2635 bool all) : 2636 _g1h(G1CollectedHeap::heap()), 2637 _out(out), _vo(vo), _all(all) { } 2638 2639 void do_oop(narrowOop* p) { do_oop_work(p); } 2640 void do_oop( oop* p) { do_oop_work(p); } 2641 2642 template <class T> void do_oop_work(T* p) { 2643 oop obj = oopDesc::load_decode_heap_oop(p); 2644 const char* str = NULL; 2645 const char* str2 = ""; 2646 2647 if (obj == NULL) { 2648 str = ""; 2649 } else if (!_g1h->is_in_g1_reserved(obj)) { 2650 str = " O"; 2651 } else { 2652 HeapRegion* hr = _g1h->heap_region_containing(obj); 2653 guarantee(hr != NULL, "invariant"); 2654 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2655 bool marked = _g1h->is_marked(obj, _vo); 2656 2657 if (over_tams) { 2658 str = " >"; 2659 if (marked) { 2660 str2 = " AND MARKED"; 2661 } 2662 } else if (marked) { 2663 str = " M"; 2664 } else { 2665 str = " NOT"; 2666 } 2667 } 2668 2669 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2670 p, (void*) obj, str, str2); 2671 } 2672 }; 2673 2674 class PrintReachableObjectClosure : public ObjectClosure { 2675 private: 2676 G1CollectedHeap* _g1h; 2677 outputStream* _out; 2678 VerifyOption _vo; 2679 bool _all; 2680 HeapRegion* _hr; 2681 2682 public: 2683 PrintReachableObjectClosure(outputStream* out, 2684 VerifyOption vo, 2685 bool all, 2686 HeapRegion* hr) : 2687 _g1h(G1CollectedHeap::heap()), 2688 _out(out), _vo(vo), _all(all), _hr(hr) { } 2689 2690 void do_object(oop o) { 2691 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2692 bool marked = _g1h->is_marked(o, _vo); 2693 bool print_it = _all || over_tams || marked; 2694 2695 if (print_it) { 2696 _out->print_cr(" "PTR_FORMAT"%s", 2697 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2698 PrintReachableOopClosure oopCl(_out, _vo, _all); 2699 o->oop_iterate_no_header(&oopCl); 2700 } 2701 } 2702 }; 2703 2704 class PrintReachableRegionClosure : public HeapRegionClosure { 2705 private: 2706 G1CollectedHeap* _g1h; 2707 outputStream* _out; 2708 VerifyOption _vo; 2709 bool _all; 2710 2711 public: 2712 bool doHeapRegion(HeapRegion* hr) { 2713 HeapWord* b = hr->bottom(); 2714 HeapWord* e = hr->end(); 2715 HeapWord* t = hr->top(); 2716 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2717 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2718 "TAMS: "PTR_FORMAT, b, e, t, p); 2719 _out->cr(); 2720 2721 HeapWord* from = b; 2722 HeapWord* to = t; 2723 2724 if (to > from) { 2725 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2726 _out->cr(); 2727 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2728 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2729 _out->cr(); 2730 } 2731 2732 return false; 2733 } 2734 2735 PrintReachableRegionClosure(outputStream* out, 2736 VerifyOption vo, 2737 bool all) : 2738 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2739 }; 2740 2741 void ConcurrentMark::print_reachable(const char* str, 2742 VerifyOption vo, 2743 bool all) { 2744 gclog_or_tty->cr(); 2745 gclog_or_tty->print_cr("== Doing heap dump... "); 2746 2747 if (G1PrintReachableBaseFile == NULL) { 2748 gclog_or_tty->print_cr(" #### error: no base file defined"); 2749 return; 2750 } 2751 2752 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2753 (JVM_MAXPATHLEN - 1)) { 2754 gclog_or_tty->print_cr(" #### error: file name too long"); 2755 return; 2756 } 2757 2758 char file_name[JVM_MAXPATHLEN]; 2759 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2760 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2761 2762 fileStream fout(file_name); 2763 if (!fout.is_open()) { 2764 gclog_or_tty->print_cr(" #### error: could not open file"); 2765 return; 2766 } 2767 2768 outputStream* out = &fout; 2769 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2770 out->cr(); 2771 2772 out->print_cr("--- ITERATING OVER REGIONS"); 2773 out->cr(); 2774 PrintReachableRegionClosure rcl(out, vo, all); 2775 _g1h->heap_region_iterate(&rcl); 2776 out->cr(); 2777 2778 gclog_or_tty->print_cr(" done"); 2779 gclog_or_tty->flush(); 2780 } 2781 2782 #endif // PRODUCT 2783 2784 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2785 // Note we are overriding the read-only view of the prev map here, via 2786 // the cast. 2787 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2788 } 2789 2790 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2791 _nextMarkBitMap->clearRange(mr); 2792 } 2793 2794 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2795 clearRangePrevBitmap(mr); 2796 clearRangeNextBitmap(mr); 2797 } 2798 2799 HeapRegion* 2800 ConcurrentMark::claim_region(uint worker_id) { 2801 // "checkpoint" the finger 2802 HeapWord* finger = _finger; 2803 2804 // _heap_end will not change underneath our feet; it only changes at 2805 // yield points. 2806 while (finger < _heap_end) { 2807 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2808 2809 // Note on how this code handles humongous regions. In the 2810 // normal case the finger will reach the start of a "starts 2811 // humongous" (SH) region. Its end will either be the end of the 2812 // last "continues humongous" (CH) region in the sequence, or the 2813 // standard end of the SH region (if the SH is the only region in 2814 // the sequence). That way claim_region() will skip over the CH 2815 // regions. However, there is a subtle race between a CM thread 2816 // executing this method and a mutator thread doing a humongous 2817 // object allocation. The two are not mutually exclusive as the CM 2818 // thread does not need to hold the Heap_lock when it gets 2819 // here. So there is a chance that claim_region() will come across 2820 // a free region that's in the progress of becoming a SH or a CH 2821 // region. In the former case, it will either 2822 // a) Miss the update to the region's end, in which case it will 2823 // visit every subsequent CH region, will find their bitmaps 2824 // empty, and do nothing, or 2825 // b) Will observe the update of the region's end (in which case 2826 // it will skip the subsequent CH regions). 2827 // If it comes across a region that suddenly becomes CH, the 2828 // scenario will be similar to b). So, the race between 2829 // claim_region() and a humongous object allocation might force us 2830 // to do a bit of unnecessary work (due to some unnecessary bitmap 2831 // iterations) but it should not introduce and correctness issues. 2832 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2833 HeapWord* bottom = curr_region->bottom(); 2834 HeapWord* end = curr_region->end(); 2835 HeapWord* limit = curr_region->next_top_at_mark_start(); 2836 2837 if (verbose_low()) { 2838 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" " 2839 "["PTR_FORMAT", "PTR_FORMAT"), " 2840 "limit = "PTR_FORMAT, 2841 worker_id, curr_region, bottom, end, limit); 2842 } 2843 2844 // Is the gap between reading the finger and doing the CAS too long? 2845 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2846 if (res == finger) { 2847 // we succeeded 2848 2849 // notice that _finger == end cannot be guaranteed here since, 2850 // someone else might have moved the finger even further 2851 assert(_finger >= end, "the finger should have moved forward"); 2852 2853 if (verbose_low()) { 2854 gclog_or_tty->print_cr("[%u] we were successful with region = " 2855 PTR_FORMAT, worker_id, curr_region); 2856 } 2857 2858 if (limit > bottom) { 2859 if (verbose_low()) { 2860 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, " 2861 "returning it ", worker_id, curr_region); 2862 } 2863 return curr_region; 2864 } else { 2865 assert(limit == bottom, 2866 "the region limit should be at bottom"); 2867 if (verbose_low()) { 2868 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, " 2869 "returning NULL", worker_id, curr_region); 2870 } 2871 // we return NULL and the caller should try calling 2872 // claim_region() again. 2873 return NULL; 2874 } 2875 } else { 2876 assert(_finger > finger, "the finger should have moved forward"); 2877 if (verbose_low()) { 2878 gclog_or_tty->print_cr("[%u] somebody else moved the finger, " 2879 "global finger = "PTR_FORMAT", " 2880 "our finger = "PTR_FORMAT, 2881 worker_id, _finger, finger); 2882 } 2883 2884 // read it again 2885 finger = _finger; 2886 } 2887 } 2888 2889 return NULL; 2890 } 2891 2892 #ifndef PRODUCT 2893 enum VerifyNoCSetOopsPhase { 2894 VerifyNoCSetOopsStack, 2895 VerifyNoCSetOopsQueues, 2896 VerifyNoCSetOopsSATBCompleted, 2897 VerifyNoCSetOopsSATBThread 2898 }; 2899 2900 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 2901 private: 2902 G1CollectedHeap* _g1h; 2903 VerifyNoCSetOopsPhase _phase; 2904 int _info; 2905 2906 const char* phase_str() { 2907 switch (_phase) { 2908 case VerifyNoCSetOopsStack: return "Stack"; 2909 case VerifyNoCSetOopsQueues: return "Queue"; 2910 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 2911 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 2912 default: ShouldNotReachHere(); 2913 } 2914 return NULL; 2915 } 2916 2917 void do_object_work(oop obj) { 2918 guarantee(!_g1h->obj_in_cs(obj), 2919 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 2920 (void*) obj, phase_str(), _info)); 2921 } 2922 2923 public: 2924 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 2925 2926 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 2927 _phase = phase; 2928 _info = info; 2929 } 2930 2931 virtual void do_oop(oop* p) { 2932 oop obj = oopDesc::load_decode_heap_oop(p); 2933 do_object_work(obj); 2934 } 2935 2936 virtual void do_oop(narrowOop* p) { 2937 // We should not come across narrow oops while scanning marking 2938 // stacks and SATB buffers. 2939 ShouldNotReachHere(); 2940 } 2941 2942 virtual void do_object(oop obj) { 2943 do_object_work(obj); 2944 } 2945 }; 2946 2947 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 2948 bool verify_enqueued_buffers, 2949 bool verify_thread_buffers, 2950 bool verify_fingers) { 2951 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2952 if (!G1CollectedHeap::heap()->mark_in_progress()) { 2953 return; 2954 } 2955 2956 VerifyNoCSetOopsClosure cl; 2957 2958 if (verify_stacks) { 2959 // Verify entries on the global mark stack 2960 cl.set_phase(VerifyNoCSetOopsStack); 2961 _markStack.oops_do(&cl); 2962 2963 // Verify entries on the task queues 2964 for (uint i = 0; i < _max_worker_id; i += 1) { 2965 cl.set_phase(VerifyNoCSetOopsQueues, i); 2966 CMTaskQueue* queue = _task_queues->queue(i); 2967 queue->oops_do(&cl); 2968 } 2969 } 2970 2971 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 2972 2973 // Verify entries on the enqueued SATB buffers 2974 if (verify_enqueued_buffers) { 2975 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 2976 satb_qs.iterate_completed_buffers_read_only(&cl); 2977 } 2978 2979 // Verify entries on the per-thread SATB buffers 2980 if (verify_thread_buffers) { 2981 cl.set_phase(VerifyNoCSetOopsSATBThread); 2982 satb_qs.iterate_thread_buffers_read_only(&cl); 2983 } 2984 2985 if (verify_fingers) { 2986 // Verify the global finger 2987 HeapWord* global_finger = finger(); 2988 if (global_finger != NULL && global_finger < _heap_end) { 2989 // The global finger always points to a heap region boundary. We 2990 // use heap_region_containing_raw() to get the containing region 2991 // given that the global finger could be pointing to a free region 2992 // which subsequently becomes continues humongous. If that 2993 // happens, heap_region_containing() will return the bottom of the 2994 // corresponding starts humongous region and the check below will 2995 // not hold any more. 2996 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 2997 guarantee(global_finger == global_hr->bottom(), 2998 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 2999 global_finger, HR_FORMAT_PARAMS(global_hr))); 3000 } 3001 3002 // Verify the task fingers 3003 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 3004 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 3005 CMTask* task = _tasks[i]; 3006 HeapWord* task_finger = task->finger(); 3007 if (task_finger != NULL && task_finger < _heap_end) { 3008 // See above note on the global finger verification. 3009 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 3010 guarantee(task_finger == task_hr->bottom() || 3011 !task_hr->in_collection_set(), 3012 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 3013 task_finger, HR_FORMAT_PARAMS(task_hr))); 3014 } 3015 } 3016 } 3017 } 3018 #endif // PRODUCT 3019 3020 // Aggregate the counting data that was constructed concurrently 3021 // with marking. 3022 class AggregateCountDataHRClosure: public HeapRegionClosure { 3023 G1CollectedHeap* _g1h; 3024 ConcurrentMark* _cm; 3025 CardTableModRefBS* _ct_bs; 3026 BitMap* _cm_card_bm; 3027 uint _max_worker_id; 3028 3029 public: 3030 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 3031 BitMap* cm_card_bm, 3032 uint max_worker_id) : 3033 _g1h(g1h), _cm(g1h->concurrent_mark()), 3034 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 3035 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 3036 3037 bool doHeapRegion(HeapRegion* hr) { 3038 if (hr->continuesHumongous()) { 3039 // We will ignore these here and process them when their 3040 // associated "starts humongous" region is processed. 3041 // Note that we cannot rely on their associated 3042 // "starts humongous" region to have their bit set to 1 3043 // since, due to the region chunking in the parallel region 3044 // iteration, a "continues humongous" region might be visited 3045 // before its associated "starts humongous". 3046 return false; 3047 } 3048 3049 HeapWord* start = hr->bottom(); 3050 HeapWord* limit = hr->next_top_at_mark_start(); 3051 HeapWord* end = hr->end(); 3052 3053 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 3054 err_msg("Preconditions not met - " 3055 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 3056 "top: "PTR_FORMAT", end: "PTR_FORMAT, 3057 start, limit, hr->top(), hr->end())); 3058 3059 assert(hr->next_marked_bytes() == 0, "Precondition"); 3060 3061 if (start == limit) { 3062 // NTAMS of this region has not been set so nothing to do. 3063 return false; 3064 } 3065 3066 // 'start' should be in the heap. 3067 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 3068 // 'end' *may* be just beyone the end of the heap (if hr is the last region) 3069 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 3070 3071 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 3072 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 3073 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 3074 3075 // If ntams is not card aligned then we bump card bitmap index 3076 // for limit so that we get the all the cards spanned by 3077 // the object ending at ntams. 3078 // Note: if this is the last region in the heap then ntams 3079 // could be actually just beyond the end of the the heap; 3080 // limit_idx will then correspond to a (non-existent) card 3081 // that is also outside the heap. 3082 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 3083 limit_idx += 1; 3084 } 3085 3086 assert(limit_idx <= end_idx, "or else use atomics"); 3087 3088 // Aggregate the "stripe" in the count data associated with hr. 3089 uint hrs_index = hr->hrs_index(); 3090 size_t marked_bytes = 0; 3091 3092 for (uint i = 0; i < _max_worker_id; i += 1) { 3093 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 3094 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 3095 3096 // Fetch the marked_bytes in this region for task i and 3097 // add it to the running total for this region. 3098 marked_bytes += marked_bytes_array[hrs_index]; 3099 3100 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 3101 // into the global card bitmap. 3102 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 3103 3104 while (scan_idx < limit_idx) { 3105 assert(task_card_bm->at(scan_idx) == true, "should be"); 3106 _cm_card_bm->set_bit(scan_idx); 3107 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 3108 3109 // BitMap::get_next_one_offset() can handle the case when 3110 // its left_offset parameter is greater than its right_offset 3111 // parameter. It does, however, have an early exit if 3112 // left_offset == right_offset. So let's limit the value 3113 // passed in for left offset here. 3114 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 3115 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 3116 } 3117 } 3118 3119 // Update the marked bytes for this region. 3120 hr->add_to_marked_bytes(marked_bytes); 3121 3122 // Next heap region 3123 return false; 3124 } 3125 }; 3126 3127 class G1AggregateCountDataTask: public AbstractGangTask { 3128 protected: 3129 G1CollectedHeap* _g1h; 3130 ConcurrentMark* _cm; 3131 BitMap* _cm_card_bm; 3132 uint _max_worker_id; 3133 int _active_workers; 3134 3135 public: 3136 G1AggregateCountDataTask(G1CollectedHeap* g1h, 3137 ConcurrentMark* cm, 3138 BitMap* cm_card_bm, 3139 uint max_worker_id, 3140 int n_workers) : 3141 AbstractGangTask("Count Aggregation"), 3142 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 3143 _max_worker_id(max_worker_id), 3144 _active_workers(n_workers) { } 3145 3146 void work(uint worker_id) { 3147 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 3148 3149 if (G1CollectedHeap::use_parallel_gc_threads()) { 3150 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 3151 _active_workers, 3152 HeapRegion::AggregateCountClaimValue); 3153 } else { 3154 _g1h->heap_region_iterate(&cl); 3155 } 3156 } 3157 }; 3158 3159 3160 void ConcurrentMark::aggregate_count_data() { 3161 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3162 _g1h->workers()->active_workers() : 3163 1); 3164 3165 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3166 _max_worker_id, n_workers); 3167 3168 if (G1CollectedHeap::use_parallel_gc_threads()) { 3169 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3170 "sanity check"); 3171 _g1h->set_par_threads(n_workers); 3172 _g1h->workers()->run_task(&g1_par_agg_task); 3173 _g1h->set_par_threads(0); 3174 3175 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3176 "sanity check"); 3177 _g1h->reset_heap_region_claim_values(); 3178 } else { 3179 g1_par_agg_task.work(0); 3180 } 3181 } 3182 3183 // Clear the per-worker arrays used to store the per-region counting data 3184 void ConcurrentMark::clear_all_count_data() { 3185 // Clear the global card bitmap - it will be filled during 3186 // liveness count aggregation (during remark) and the 3187 // final counting task. 3188 _card_bm.clear(); 3189 3190 // Clear the global region bitmap - it will be filled as part 3191 // of the final counting task. 3192 _region_bm.clear(); 3193 3194 uint max_regions = _g1h->max_regions(); 3195 assert(_max_worker_id > 0, "uninitialized"); 3196 3197 for (uint i = 0; i < _max_worker_id; i += 1) { 3198 BitMap* task_card_bm = count_card_bitmap_for(i); 3199 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3200 3201 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3202 assert(marked_bytes_array != NULL, "uninitialized"); 3203 3204 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3205 task_card_bm->clear(); 3206 } 3207 } 3208 3209 void ConcurrentMark::print_stats() { 3210 if (verbose_stats()) { 3211 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3212 for (size_t i = 0; i < _active_tasks; ++i) { 3213 _tasks[i]->print_stats(); 3214 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3215 } 3216 } 3217 } 3218 3219 // abandon current marking iteration due to a Full GC 3220 void ConcurrentMark::abort() { 3221 // Clear all marks to force marking thread to do nothing 3222 _nextMarkBitMap->clearAll(); 3223 // Clear the liveness counting data 3224 clear_all_count_data(); 3225 // Empty mark stack 3226 reset_marking_state(); 3227 for (uint i = 0; i < _max_worker_id; ++i) { 3228 _tasks[i]->clear_region_fields(); 3229 } 3230 _has_aborted = true; 3231 3232 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3233 satb_mq_set.abandon_partial_marking(); 3234 // This can be called either during or outside marking, we'll read 3235 // the expected_active value from the SATB queue set. 3236 satb_mq_set.set_active_all_threads( 3237 false, /* new active value */ 3238 satb_mq_set.is_active() /* expected_active */); 3239 3240 _g1h->trace_heap_after_concurrent_cycle(); 3241 _g1h->register_concurrent_cycle_end(); 3242 } 3243 3244 static void print_ms_time_info(const char* prefix, const char* name, 3245 NumberSeq& ns) { 3246 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3247 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3248 if (ns.num() > 0) { 3249 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3250 prefix, ns.sd(), ns.maximum()); 3251 } 3252 } 3253 3254 void ConcurrentMark::print_summary_info() { 3255 gclog_or_tty->print_cr(" Concurrent marking:"); 3256 print_ms_time_info(" ", "init marks", _init_times); 3257 print_ms_time_info(" ", "remarks", _remark_times); 3258 { 3259 print_ms_time_info(" ", "final marks", _remark_mark_times); 3260 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3261 3262 } 3263 print_ms_time_info(" ", "cleanups", _cleanup_times); 3264 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3265 _total_counting_time, 3266 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3267 (double)_cleanup_times.num() 3268 : 0.0)); 3269 if (G1ScrubRemSets) { 3270 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3271 _total_rs_scrub_time, 3272 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3273 (double)_cleanup_times.num() 3274 : 0.0)); 3275 } 3276 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3277 (_init_times.sum() + _remark_times.sum() + 3278 _cleanup_times.sum())/1000.0); 3279 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3280 "(%8.2f s marking).", 3281 cmThread()->vtime_accum(), 3282 cmThread()->vtime_mark_accum()); 3283 } 3284 3285 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3286 if (use_parallel_marking_threads()) { 3287 _parallel_workers->print_worker_threads_on(st); 3288 } 3289 } 3290 3291 void ConcurrentMark::print_on_error(outputStream* st) const { 3292 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 3293 _prevMarkBitMap, _nextMarkBitMap); 3294 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 3295 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 3296 } 3297 3298 // We take a break if someone is trying to stop the world. 3299 bool ConcurrentMark::do_yield_check(uint worker_id) { 3300 if (should_yield()) { 3301 if (worker_id == 0) { 3302 _g1h->g1_policy()->record_concurrent_pause(); 3303 } 3304 cmThread()->yield(); 3305 return true; 3306 } else { 3307 return false; 3308 } 3309 } 3310 3311 bool ConcurrentMark::should_yield() { 3312 return cmThread()->should_yield(); 3313 } 3314 3315 bool ConcurrentMark::containing_card_is_marked(void* p) { 3316 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3317 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3318 } 3319 3320 bool ConcurrentMark::containing_cards_are_marked(void* start, 3321 void* last) { 3322 return containing_card_is_marked(start) && 3323 containing_card_is_marked(last); 3324 } 3325 3326 #ifndef PRODUCT 3327 // for debugging purposes 3328 void ConcurrentMark::print_finger() { 3329 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3330 _heap_start, _heap_end, _finger); 3331 for (uint i = 0; i < _max_worker_id; ++i) { 3332 gclog_or_tty->print(" %u: "PTR_FORMAT, i, _tasks[i]->finger()); 3333 } 3334 gclog_or_tty->print_cr(""); 3335 } 3336 #endif 3337 3338 void CMTask::scan_object(oop obj) { 3339 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3340 3341 if (_cm->verbose_high()) { 3342 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT, 3343 _worker_id, (void*) obj); 3344 } 3345 3346 size_t obj_size = obj->size(); 3347 _words_scanned += obj_size; 3348 3349 obj->oop_iterate(_cm_oop_closure); 3350 statsOnly( ++_objs_scanned ); 3351 check_limits(); 3352 } 3353 3354 // Closure for iteration over bitmaps 3355 class CMBitMapClosure : public BitMapClosure { 3356 private: 3357 // the bitmap that is being iterated over 3358 CMBitMap* _nextMarkBitMap; 3359 ConcurrentMark* _cm; 3360 CMTask* _task; 3361 3362 public: 3363 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3364 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3365 3366 bool do_bit(size_t offset) { 3367 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3368 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3369 assert( addr < _cm->finger(), "invariant"); 3370 3371 statsOnly( _task->increase_objs_found_on_bitmap() ); 3372 assert(addr >= _task->finger(), "invariant"); 3373 3374 // We move that task's local finger along. 3375 _task->move_finger_to(addr); 3376 3377 _task->scan_object(oop(addr)); 3378 // we only partially drain the local queue and global stack 3379 _task->drain_local_queue(true); 3380 _task->drain_global_stack(true); 3381 3382 // if the has_aborted flag has been raised, we need to bail out of 3383 // the iteration 3384 return !_task->has_aborted(); 3385 } 3386 }; 3387 3388 // Closure for iterating over objects, currently only used for 3389 // processing SATB buffers. 3390 class CMObjectClosure : public ObjectClosure { 3391 private: 3392 CMTask* _task; 3393 3394 public: 3395 void do_object(oop obj) { 3396 _task->deal_with_reference(obj); 3397 } 3398 3399 CMObjectClosure(CMTask* task) : _task(task) { } 3400 }; 3401 3402 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3403 ConcurrentMark* cm, 3404 CMTask* task) 3405 : _g1h(g1h), _cm(cm), _task(task) { 3406 assert(_ref_processor == NULL, "should be initialized to NULL"); 3407 3408 if (G1UseConcMarkReferenceProcessing) { 3409 _ref_processor = g1h->ref_processor_cm(); 3410 assert(_ref_processor != NULL, "should not be NULL"); 3411 } 3412 } 3413 3414 void CMTask::setup_for_region(HeapRegion* hr) { 3415 // Separated the asserts so that we know which one fires. 3416 assert(hr != NULL, 3417 "claim_region() should have filtered out continues humongous regions"); 3418 assert(!hr->continuesHumongous(), 3419 "claim_region() should have filtered out continues humongous regions"); 3420 3421 if (_cm->verbose_low()) { 3422 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT, 3423 _worker_id, hr); 3424 } 3425 3426 _curr_region = hr; 3427 _finger = hr->bottom(); 3428 update_region_limit(); 3429 } 3430 3431 void CMTask::update_region_limit() { 3432 HeapRegion* hr = _curr_region; 3433 HeapWord* bottom = hr->bottom(); 3434 HeapWord* limit = hr->next_top_at_mark_start(); 3435 3436 if (limit == bottom) { 3437 if (_cm->verbose_low()) { 3438 gclog_or_tty->print_cr("[%u] found an empty region " 3439 "["PTR_FORMAT", "PTR_FORMAT")", 3440 _worker_id, bottom, limit); 3441 } 3442 // The region was collected underneath our feet. 3443 // We set the finger to bottom to ensure that the bitmap 3444 // iteration that will follow this will not do anything. 3445 // (this is not a condition that holds when we set the region up, 3446 // as the region is not supposed to be empty in the first place) 3447 _finger = bottom; 3448 } else if (limit >= _region_limit) { 3449 assert(limit >= _finger, "peace of mind"); 3450 } else { 3451 assert(limit < _region_limit, "only way to get here"); 3452 // This can happen under some pretty unusual circumstances. An 3453 // evacuation pause empties the region underneath our feet (NTAMS 3454 // at bottom). We then do some allocation in the region (NTAMS 3455 // stays at bottom), followed by the region being used as a GC 3456 // alloc region (NTAMS will move to top() and the objects 3457 // originally below it will be grayed). All objects now marked in 3458 // the region are explicitly grayed, if below the global finger, 3459 // and we do not need in fact to scan anything else. So, we simply 3460 // set _finger to be limit to ensure that the bitmap iteration 3461 // doesn't do anything. 3462 _finger = limit; 3463 } 3464 3465 _region_limit = limit; 3466 } 3467 3468 void CMTask::giveup_current_region() { 3469 assert(_curr_region != NULL, "invariant"); 3470 if (_cm->verbose_low()) { 3471 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT, 3472 _worker_id, _curr_region); 3473 } 3474 clear_region_fields(); 3475 } 3476 3477 void CMTask::clear_region_fields() { 3478 // Values for these three fields that indicate that we're not 3479 // holding on to a region. 3480 _curr_region = NULL; 3481 _finger = NULL; 3482 _region_limit = NULL; 3483 } 3484 3485 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3486 if (cm_oop_closure == NULL) { 3487 assert(_cm_oop_closure != NULL, "invariant"); 3488 } else { 3489 assert(_cm_oop_closure == NULL, "invariant"); 3490 } 3491 _cm_oop_closure = cm_oop_closure; 3492 } 3493 3494 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3495 guarantee(nextMarkBitMap != NULL, "invariant"); 3496 3497 if (_cm->verbose_low()) { 3498 gclog_or_tty->print_cr("[%u] resetting", _worker_id); 3499 } 3500 3501 _nextMarkBitMap = nextMarkBitMap; 3502 clear_region_fields(); 3503 3504 _calls = 0; 3505 _elapsed_time_ms = 0.0; 3506 _termination_time_ms = 0.0; 3507 _termination_start_time_ms = 0.0; 3508 3509 #if _MARKING_STATS_ 3510 _local_pushes = 0; 3511 _local_pops = 0; 3512 _local_max_size = 0; 3513 _objs_scanned = 0; 3514 _global_pushes = 0; 3515 _global_pops = 0; 3516 _global_max_size = 0; 3517 _global_transfers_to = 0; 3518 _global_transfers_from = 0; 3519 _regions_claimed = 0; 3520 _objs_found_on_bitmap = 0; 3521 _satb_buffers_processed = 0; 3522 _steal_attempts = 0; 3523 _steals = 0; 3524 _aborted = 0; 3525 _aborted_overflow = 0; 3526 _aborted_cm_aborted = 0; 3527 _aborted_yield = 0; 3528 _aborted_timed_out = 0; 3529 _aborted_satb = 0; 3530 _aborted_termination = 0; 3531 #endif // _MARKING_STATS_ 3532 } 3533 3534 bool CMTask::should_exit_termination() { 3535 regular_clock_call(); 3536 // This is called when we are in the termination protocol. We should 3537 // quit if, for some reason, this task wants to abort or the global 3538 // stack is not empty (this means that we can get work from it). 3539 return !_cm->mark_stack_empty() || has_aborted(); 3540 } 3541 3542 void CMTask::reached_limit() { 3543 assert(_words_scanned >= _words_scanned_limit || 3544 _refs_reached >= _refs_reached_limit , 3545 "shouldn't have been called otherwise"); 3546 regular_clock_call(); 3547 } 3548 3549 void CMTask::regular_clock_call() { 3550 if (has_aborted()) return; 3551 3552 // First, we need to recalculate the words scanned and refs reached 3553 // limits for the next clock call. 3554 recalculate_limits(); 3555 3556 // During the regular clock call we do the following 3557 3558 // (1) If an overflow has been flagged, then we abort. 3559 if (_cm->has_overflown()) { 3560 set_has_aborted(); 3561 return; 3562 } 3563 3564 // If we are not concurrent (i.e. we're doing remark) we don't need 3565 // to check anything else. The other steps are only needed during 3566 // the concurrent marking phase. 3567 if (!concurrent()) return; 3568 3569 // (2) If marking has been aborted for Full GC, then we also abort. 3570 if (_cm->has_aborted()) { 3571 set_has_aborted(); 3572 statsOnly( ++_aborted_cm_aborted ); 3573 return; 3574 } 3575 3576 double curr_time_ms = os::elapsedVTime() * 1000.0; 3577 3578 // (3) If marking stats are enabled, then we update the step history. 3579 #if _MARKING_STATS_ 3580 if (_words_scanned >= _words_scanned_limit) { 3581 ++_clock_due_to_scanning; 3582 } 3583 if (_refs_reached >= _refs_reached_limit) { 3584 ++_clock_due_to_marking; 3585 } 3586 3587 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3588 _interval_start_time_ms = curr_time_ms; 3589 _all_clock_intervals_ms.add(last_interval_ms); 3590 3591 if (_cm->verbose_medium()) { 3592 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, " 3593 "scanned = %d%s, refs reached = %d%s", 3594 _worker_id, last_interval_ms, 3595 _words_scanned, 3596 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3597 _refs_reached, 3598 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3599 } 3600 #endif // _MARKING_STATS_ 3601 3602 // (4) We check whether we should yield. If we have to, then we abort. 3603 if (_cm->should_yield()) { 3604 // We should yield. To do this we abort the task. The caller is 3605 // responsible for yielding. 3606 set_has_aborted(); 3607 statsOnly( ++_aborted_yield ); 3608 return; 3609 } 3610 3611 // (5) We check whether we've reached our time quota. If we have, 3612 // then we abort. 3613 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3614 if (elapsed_time_ms > _time_target_ms) { 3615 set_has_aborted(); 3616 _has_timed_out = true; 3617 statsOnly( ++_aborted_timed_out ); 3618 return; 3619 } 3620 3621 // (6) Finally, we check whether there are enough completed STAB 3622 // buffers available for processing. If there are, we abort. 3623 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3624 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3625 if (_cm->verbose_low()) { 3626 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers", 3627 _worker_id); 3628 } 3629 // we do need to process SATB buffers, we'll abort and restart 3630 // the marking task to do so 3631 set_has_aborted(); 3632 statsOnly( ++_aborted_satb ); 3633 return; 3634 } 3635 } 3636 3637 void CMTask::recalculate_limits() { 3638 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3639 _words_scanned_limit = _real_words_scanned_limit; 3640 3641 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3642 _refs_reached_limit = _real_refs_reached_limit; 3643 } 3644 3645 void CMTask::decrease_limits() { 3646 // This is called when we believe that we're going to do an infrequent 3647 // operation which will increase the per byte scanned cost (i.e. move 3648 // entries to/from the global stack). It basically tries to decrease the 3649 // scanning limit so that the clock is called earlier. 3650 3651 if (_cm->verbose_medium()) { 3652 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id); 3653 } 3654 3655 _words_scanned_limit = _real_words_scanned_limit - 3656 3 * words_scanned_period / 4; 3657 _refs_reached_limit = _real_refs_reached_limit - 3658 3 * refs_reached_period / 4; 3659 } 3660 3661 void CMTask::move_entries_to_global_stack() { 3662 // local array where we'll store the entries that will be popped 3663 // from the local queue 3664 oop buffer[global_stack_transfer_size]; 3665 3666 int n = 0; 3667 oop obj; 3668 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3669 buffer[n] = obj; 3670 ++n; 3671 } 3672 3673 if (n > 0) { 3674 // we popped at least one entry from the local queue 3675 3676 statsOnly( ++_global_transfers_to; _local_pops += n ); 3677 3678 if (!_cm->mark_stack_push(buffer, n)) { 3679 if (_cm->verbose_low()) { 3680 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow", 3681 _worker_id); 3682 } 3683 set_has_aborted(); 3684 } else { 3685 // the transfer was successful 3686 3687 if (_cm->verbose_medium()) { 3688 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack", 3689 _worker_id, n); 3690 } 3691 statsOnly( int tmp_size = _cm->mark_stack_size(); 3692 if (tmp_size > _global_max_size) { 3693 _global_max_size = tmp_size; 3694 } 3695 _global_pushes += n ); 3696 } 3697 } 3698 3699 // this operation was quite expensive, so decrease the limits 3700 decrease_limits(); 3701 } 3702 3703 void CMTask::get_entries_from_global_stack() { 3704 // local array where we'll store the entries that will be popped 3705 // from the global stack. 3706 oop buffer[global_stack_transfer_size]; 3707 int n; 3708 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3709 assert(n <= global_stack_transfer_size, 3710 "we should not pop more than the given limit"); 3711 if (n > 0) { 3712 // yes, we did actually pop at least one entry 3713 3714 statsOnly( ++_global_transfers_from; _global_pops += n ); 3715 if (_cm->verbose_medium()) { 3716 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack", 3717 _worker_id, n); 3718 } 3719 for (int i = 0; i < n; ++i) { 3720 bool success = _task_queue->push(buffer[i]); 3721 // We only call this when the local queue is empty or under a 3722 // given target limit. So, we do not expect this push to fail. 3723 assert(success, "invariant"); 3724 } 3725 3726 statsOnly( int tmp_size = _task_queue->size(); 3727 if (tmp_size > _local_max_size) { 3728 _local_max_size = tmp_size; 3729 } 3730 _local_pushes += n ); 3731 } 3732 3733 // this operation was quite expensive, so decrease the limits 3734 decrease_limits(); 3735 } 3736 3737 void CMTask::drain_local_queue(bool partially) { 3738 if (has_aborted()) return; 3739 3740 // Decide what the target size is, depending whether we're going to 3741 // drain it partially (so that other tasks can steal if they run out 3742 // of things to do) or totally (at the very end). 3743 size_t target_size; 3744 if (partially) { 3745 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3746 } else { 3747 target_size = 0; 3748 } 3749 3750 if (_task_queue->size() > target_size) { 3751 if (_cm->verbose_high()) { 3752 gclog_or_tty->print_cr("[%u] draining local queue, target size = %d", 3753 _worker_id, target_size); 3754 } 3755 3756 oop obj; 3757 bool ret = _task_queue->pop_local(obj); 3758 while (ret) { 3759 statsOnly( ++_local_pops ); 3760 3761 if (_cm->verbose_high()) { 3762 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id, 3763 (void*) obj); 3764 } 3765 3766 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3767 assert(!_g1h->is_on_master_free_list( 3768 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3769 3770 scan_object(obj); 3771 3772 if (_task_queue->size() <= target_size || has_aborted()) { 3773 ret = false; 3774 } else { 3775 ret = _task_queue->pop_local(obj); 3776 } 3777 } 3778 3779 if (_cm->verbose_high()) { 3780 gclog_or_tty->print_cr("[%u] drained local queue, size = %d", 3781 _worker_id, _task_queue->size()); 3782 } 3783 } 3784 } 3785 3786 void CMTask::drain_global_stack(bool partially) { 3787 if (has_aborted()) return; 3788 3789 // We have a policy to drain the local queue before we attempt to 3790 // drain the global stack. 3791 assert(partially || _task_queue->size() == 0, "invariant"); 3792 3793 // Decide what the target size is, depending whether we're going to 3794 // drain it partially (so that other tasks can steal if they run out 3795 // of things to do) or totally (at the very end). Notice that, 3796 // because we move entries from the global stack in chunks or 3797 // because another task might be doing the same, we might in fact 3798 // drop below the target. But, this is not a problem. 3799 size_t target_size; 3800 if (partially) { 3801 target_size = _cm->partial_mark_stack_size_target(); 3802 } else { 3803 target_size = 0; 3804 } 3805 3806 if (_cm->mark_stack_size() > target_size) { 3807 if (_cm->verbose_low()) { 3808 gclog_or_tty->print_cr("[%u] draining global_stack, target size %d", 3809 _worker_id, target_size); 3810 } 3811 3812 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3813 get_entries_from_global_stack(); 3814 drain_local_queue(partially); 3815 } 3816 3817 if (_cm->verbose_low()) { 3818 gclog_or_tty->print_cr("[%u] drained global stack, size = %d", 3819 _worker_id, _cm->mark_stack_size()); 3820 } 3821 } 3822 } 3823 3824 // SATB Queue has several assumptions on whether to call the par or 3825 // non-par versions of the methods. this is why some of the code is 3826 // replicated. We should really get rid of the single-threaded version 3827 // of the code to simplify things. 3828 void CMTask::drain_satb_buffers() { 3829 if (has_aborted()) return; 3830 3831 // We set this so that the regular clock knows that we're in the 3832 // middle of draining buffers and doesn't set the abort flag when it 3833 // notices that SATB buffers are available for draining. It'd be 3834 // very counter productive if it did that. :-) 3835 _draining_satb_buffers = true; 3836 3837 CMObjectClosure oc(this); 3838 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3839 if (G1CollectedHeap::use_parallel_gc_threads()) { 3840 satb_mq_set.set_par_closure(_worker_id, &oc); 3841 } else { 3842 satb_mq_set.set_closure(&oc); 3843 } 3844 3845 // This keeps claiming and applying the closure to completed buffers 3846 // until we run out of buffers or we need to abort. 3847 if (G1CollectedHeap::use_parallel_gc_threads()) { 3848 while (!has_aborted() && 3849 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) { 3850 if (_cm->verbose_medium()) { 3851 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3852 } 3853 statsOnly( ++_satb_buffers_processed ); 3854 regular_clock_call(); 3855 } 3856 } else { 3857 while (!has_aborted() && 3858 satb_mq_set.apply_closure_to_completed_buffer()) { 3859 if (_cm->verbose_medium()) { 3860 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3861 } 3862 statsOnly( ++_satb_buffers_processed ); 3863 regular_clock_call(); 3864 } 3865 } 3866 3867 if (!concurrent() && !has_aborted()) { 3868 // We should only do this during remark. 3869 if (G1CollectedHeap::use_parallel_gc_threads()) { 3870 satb_mq_set.par_iterate_closure_all_threads(_worker_id); 3871 } else { 3872 satb_mq_set.iterate_closure_all_threads(); 3873 } 3874 } 3875 3876 _draining_satb_buffers = false; 3877 3878 assert(has_aborted() || 3879 concurrent() || 3880 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3881 3882 if (G1CollectedHeap::use_parallel_gc_threads()) { 3883 satb_mq_set.set_par_closure(_worker_id, NULL); 3884 } else { 3885 satb_mq_set.set_closure(NULL); 3886 } 3887 3888 // again, this was a potentially expensive operation, decrease the 3889 // limits to get the regular clock call early 3890 decrease_limits(); 3891 } 3892 3893 void CMTask::print_stats() { 3894 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 3895 _worker_id, _calls); 3896 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3897 _elapsed_time_ms, _termination_time_ms); 3898 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3899 _step_times_ms.num(), _step_times_ms.avg(), 3900 _step_times_ms.sd()); 3901 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3902 _step_times_ms.maximum(), _step_times_ms.sum()); 3903 3904 #if _MARKING_STATS_ 3905 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3906 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3907 _all_clock_intervals_ms.sd()); 3908 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3909 _all_clock_intervals_ms.maximum(), 3910 _all_clock_intervals_ms.sum()); 3911 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3912 _clock_due_to_scanning, _clock_due_to_marking); 3913 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3914 _objs_scanned, _objs_found_on_bitmap); 3915 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3916 _local_pushes, _local_pops, _local_max_size); 3917 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3918 _global_pushes, _global_pops, _global_max_size); 3919 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3920 _global_transfers_to,_global_transfers_from); 3921 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 3922 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3923 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3924 _steal_attempts, _steals); 3925 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3926 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3927 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3928 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3929 _aborted_timed_out, _aborted_satb, _aborted_termination); 3930 #endif // _MARKING_STATS_ 3931 } 3932 3933 /***************************************************************************** 3934 3935 The do_marking_step(time_target_ms, ...) method is the building 3936 block of the parallel marking framework. It can be called in parallel 3937 with other invocations of do_marking_step() on different tasks 3938 (but only one per task, obviously) and concurrently with the 3939 mutator threads, or during remark, hence it eliminates the need 3940 for two versions of the code. When called during remark, it will 3941 pick up from where the task left off during the concurrent marking 3942 phase. Interestingly, tasks are also claimable during evacuation 3943 pauses too, since do_marking_step() ensures that it aborts before 3944 it needs to yield. 3945 3946 The data structures that it uses to do marking work are the 3947 following: 3948 3949 (1) Marking Bitmap. If there are gray objects that appear only 3950 on the bitmap (this happens either when dealing with an overflow 3951 or when the initial marking phase has simply marked the roots 3952 and didn't push them on the stack), then tasks claim heap 3953 regions whose bitmap they then scan to find gray objects. A 3954 global finger indicates where the end of the last claimed region 3955 is. A local finger indicates how far into the region a task has 3956 scanned. The two fingers are used to determine how to gray an 3957 object (i.e. whether simply marking it is OK, as it will be 3958 visited by a task in the future, or whether it needs to be also 3959 pushed on a stack). 3960 3961 (2) Local Queue. The local queue of the task which is accessed 3962 reasonably efficiently by the task. Other tasks can steal from 3963 it when they run out of work. Throughout the marking phase, a 3964 task attempts to keep its local queue short but not totally 3965 empty, so that entries are available for stealing by other 3966 tasks. Only when there is no more work, a task will totally 3967 drain its local queue. 3968 3969 (3) Global Mark Stack. This handles local queue overflow. During 3970 marking only sets of entries are moved between it and the local 3971 queues, as access to it requires a mutex and more fine-grain 3972 interaction with it which might cause contention. If it 3973 overflows, then the marking phase should restart and iterate 3974 over the bitmap to identify gray objects. Throughout the marking 3975 phase, tasks attempt to keep the global mark stack at a small 3976 length but not totally empty, so that entries are available for 3977 popping by other tasks. Only when there is no more work, tasks 3978 will totally drain the global mark stack. 3979 3980 (4) SATB Buffer Queue. This is where completed SATB buffers are 3981 made available. Buffers are regularly removed from this queue 3982 and scanned for roots, so that the queue doesn't get too 3983 long. During remark, all completed buffers are processed, as 3984 well as the filled in parts of any uncompleted buffers. 3985 3986 The do_marking_step() method tries to abort when the time target 3987 has been reached. There are a few other cases when the 3988 do_marking_step() method also aborts: 3989 3990 (1) When the marking phase has been aborted (after a Full GC). 3991 3992 (2) When a global overflow (on the global stack) has been 3993 triggered. Before the task aborts, it will actually sync up with 3994 the other tasks to ensure that all the marking data structures 3995 (local queues, stacks, fingers etc.) are re-initialized so that 3996 when do_marking_step() completes, the marking phase can 3997 immediately restart. 3998 3999 (3) When enough completed SATB buffers are available. The 4000 do_marking_step() method only tries to drain SATB buffers right 4001 at the beginning. So, if enough buffers are available, the 4002 marking step aborts and the SATB buffers are processed at 4003 the beginning of the next invocation. 4004 4005 (4) To yield. when we have to yield then we abort and yield 4006 right at the end of do_marking_step(). This saves us from a lot 4007 of hassle as, by yielding we might allow a Full GC. If this 4008 happens then objects will be compacted underneath our feet, the 4009 heap might shrink, etc. We save checking for this by just 4010 aborting and doing the yield right at the end. 4011 4012 From the above it follows that the do_marking_step() method should 4013 be called in a loop (or, otherwise, regularly) until it completes. 4014 4015 If a marking step completes without its has_aborted() flag being 4016 true, it means it has completed the current marking phase (and 4017 also all other marking tasks have done so and have all synced up). 4018 4019 A method called regular_clock_call() is invoked "regularly" (in 4020 sub ms intervals) throughout marking. It is this clock method that 4021 checks all the abort conditions which were mentioned above and 4022 decides when the task should abort. A work-based scheme is used to 4023 trigger this clock method: when the number of object words the 4024 marking phase has scanned or the number of references the marking 4025 phase has visited reach a given limit. Additional invocations to 4026 the method clock have been planted in a few other strategic places 4027 too. The initial reason for the clock method was to avoid calling 4028 vtime too regularly, as it is quite expensive. So, once it was in 4029 place, it was natural to piggy-back all the other conditions on it 4030 too and not constantly check them throughout the code. 4031 4032 If do_termination is true then do_marking_step will enter its 4033 termination protocol. 4034 4035 The value of is_serial must be true when do_marking_step is being 4036 called serially (i.e. by the VMThread) and do_marking_step should 4037 skip any synchronization in the termination and overflow code. 4038 Examples include the serial remark code and the serial reference 4039 processing closures. 4040 4041 The value of is_serial must be false when do_marking_step is 4042 being called by any of the worker threads in a work gang. 4043 Examples include the concurrent marking code (CMMarkingTask), 4044 the MT remark code, and the MT reference processing closures. 4045 4046 *****************************************************************************/ 4047 4048 void CMTask::do_marking_step(double time_target_ms, 4049 bool do_termination, 4050 bool is_serial) { 4051 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 4052 assert(concurrent() == _cm->concurrent(), "they should be the same"); 4053 4054 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4055 assert(_task_queues != NULL, "invariant"); 4056 assert(_task_queue != NULL, "invariant"); 4057 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 4058 4059 assert(!_claimed, 4060 "only one thread should claim this task at any one time"); 4061 4062 // OK, this doesn't safeguard again all possible scenarios, as it is 4063 // possible for two threads to set the _claimed flag at the same 4064 // time. But it is only for debugging purposes anyway and it will 4065 // catch most problems. 4066 _claimed = true; 4067 4068 _start_time_ms = os::elapsedVTime() * 1000.0; 4069 statsOnly( _interval_start_time_ms = _start_time_ms ); 4070 4071 // If do_stealing is true then do_marking_step will attempt to 4072 // steal work from the other CMTasks. It only makes sense to 4073 // enable stealing when the termination protocol is enabled 4074 // and do_marking_step() is not being called serially. 4075 bool do_stealing = do_termination && !is_serial; 4076 4077 double diff_prediction_ms = 4078 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4079 _time_target_ms = time_target_ms - diff_prediction_ms; 4080 4081 // set up the variables that are used in the work-based scheme to 4082 // call the regular clock method 4083 _words_scanned = 0; 4084 _refs_reached = 0; 4085 recalculate_limits(); 4086 4087 // clear all flags 4088 clear_has_aborted(); 4089 _has_timed_out = false; 4090 _draining_satb_buffers = false; 4091 4092 ++_calls; 4093 4094 if (_cm->verbose_low()) { 4095 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, " 4096 "target = %1.2lfms >>>>>>>>>>", 4097 _worker_id, _calls, _time_target_ms); 4098 } 4099 4100 // Set up the bitmap and oop closures. Anything that uses them is 4101 // eventually called from this method, so it is OK to allocate these 4102 // statically. 4103 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4104 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 4105 set_cm_oop_closure(&cm_oop_closure); 4106 4107 if (_cm->has_overflown()) { 4108 // This can happen if the mark stack overflows during a GC pause 4109 // and this task, after a yield point, restarts. We have to abort 4110 // as we need to get into the overflow protocol which happens 4111 // right at the end of this task. 4112 set_has_aborted(); 4113 } 4114 4115 // First drain any available SATB buffers. After this, we will not 4116 // look at SATB buffers before the next invocation of this method. 4117 // If enough completed SATB buffers are queued up, the regular clock 4118 // will abort this task so that it restarts. 4119 drain_satb_buffers(); 4120 // ...then partially drain the local queue and the global stack 4121 drain_local_queue(true); 4122 drain_global_stack(true); 4123 4124 do { 4125 if (!has_aborted() && _curr_region != NULL) { 4126 // This means that we're already holding on to a region. 4127 assert(_finger != NULL, "if region is not NULL, then the finger " 4128 "should not be NULL either"); 4129 4130 // We might have restarted this task after an evacuation pause 4131 // which might have evacuated the region we're holding on to 4132 // underneath our feet. Let's read its limit again to make sure 4133 // that we do not iterate over a region of the heap that 4134 // contains garbage (update_region_limit() will also move 4135 // _finger to the start of the region if it is found empty). 4136 update_region_limit(); 4137 // We will start from _finger not from the start of the region, 4138 // as we might be restarting this task after aborting half-way 4139 // through scanning this region. In this case, _finger points to 4140 // the address where we last found a marked object. If this is a 4141 // fresh region, _finger points to start(). 4142 MemRegion mr = MemRegion(_finger, _region_limit); 4143 4144 if (_cm->verbose_low()) { 4145 gclog_or_tty->print_cr("[%u] we're scanning part " 4146 "["PTR_FORMAT", "PTR_FORMAT") " 4147 "of region "HR_FORMAT, 4148 _worker_id, _finger, _region_limit, 4149 HR_FORMAT_PARAMS(_curr_region)); 4150 } 4151 4152 assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(), 4153 "humongous regions should go around loop once only"); 4154 4155 // Some special cases: 4156 // If the memory region is empty, we can just give up the region. 4157 // If the current region is humongous then we only need to check 4158 // the bitmap for the bit associated with the start of the object, 4159 // scan the object if it's live, and give up the region. 4160 // Otherwise, let's iterate over the bitmap of the part of the region 4161 // that is left. 4162 // If the iteration is successful, give up the region. 4163 if (mr.is_empty()) { 4164 giveup_current_region(); 4165 regular_clock_call(); 4166 } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) { 4167 if (_nextMarkBitMap->isMarked(mr.start())) { 4168 // The object is marked - apply the closure 4169 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 4170 bitmap_closure.do_bit(offset); 4171 } 4172 // Even if this task aborted while scanning the humongous object 4173 // we can (and should) give up the current region. 4174 giveup_current_region(); 4175 regular_clock_call(); 4176 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4177 giveup_current_region(); 4178 regular_clock_call(); 4179 } else { 4180 assert(has_aborted(), "currently the only way to do so"); 4181 // The only way to abort the bitmap iteration is to return 4182 // false from the do_bit() method. However, inside the 4183 // do_bit() method we move the _finger to point to the 4184 // object currently being looked at. So, if we bail out, we 4185 // have definitely set _finger to something non-null. 4186 assert(_finger != NULL, "invariant"); 4187 4188 // Region iteration was actually aborted. So now _finger 4189 // points to the address of the object we last scanned. If we 4190 // leave it there, when we restart this task, we will rescan 4191 // the object. It is easy to avoid this. We move the finger by 4192 // enough to point to the next possible object header (the 4193 // bitmap knows by how much we need to move it as it knows its 4194 // granularity). 4195 assert(_finger < _region_limit, "invariant"); 4196 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 4197 // Check if bitmap iteration was aborted while scanning the last object 4198 if (new_finger >= _region_limit) { 4199 giveup_current_region(); 4200 } else { 4201 move_finger_to(new_finger); 4202 } 4203 } 4204 } 4205 // At this point we have either completed iterating over the 4206 // region we were holding on to, or we have aborted. 4207 4208 // We then partially drain the local queue and the global stack. 4209 // (Do we really need this?) 4210 drain_local_queue(true); 4211 drain_global_stack(true); 4212 4213 // Read the note on the claim_region() method on why it might 4214 // return NULL with potentially more regions available for 4215 // claiming and why we have to check out_of_regions() to determine 4216 // whether we're done or not. 4217 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4218 // We are going to try to claim a new region. We should have 4219 // given up on the previous one. 4220 // Separated the asserts so that we know which one fires. 4221 assert(_curr_region == NULL, "invariant"); 4222 assert(_finger == NULL, "invariant"); 4223 assert(_region_limit == NULL, "invariant"); 4224 if (_cm->verbose_low()) { 4225 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id); 4226 } 4227 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 4228 if (claimed_region != NULL) { 4229 // Yes, we managed to claim one 4230 statsOnly( ++_regions_claimed ); 4231 4232 if (_cm->verbose_low()) { 4233 gclog_or_tty->print_cr("[%u] we successfully claimed " 4234 "region "PTR_FORMAT, 4235 _worker_id, claimed_region); 4236 } 4237 4238 setup_for_region(claimed_region); 4239 assert(_curr_region == claimed_region, "invariant"); 4240 } 4241 // It is important to call the regular clock here. It might take 4242 // a while to claim a region if, for example, we hit a large 4243 // block of empty regions. So we need to call the regular clock 4244 // method once round the loop to make sure it's called 4245 // frequently enough. 4246 regular_clock_call(); 4247 } 4248 4249 if (!has_aborted() && _curr_region == NULL) { 4250 assert(_cm->out_of_regions(), 4251 "at this point we should be out of regions"); 4252 } 4253 } while ( _curr_region != NULL && !has_aborted()); 4254 4255 if (!has_aborted()) { 4256 // We cannot check whether the global stack is empty, since other 4257 // tasks might be pushing objects to it concurrently. 4258 assert(_cm->out_of_regions(), 4259 "at this point we should be out of regions"); 4260 4261 if (_cm->verbose_low()) { 4262 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id); 4263 } 4264 4265 // Try to reduce the number of available SATB buffers so that 4266 // remark has less work to do. 4267 drain_satb_buffers(); 4268 } 4269 4270 // Since we've done everything else, we can now totally drain the 4271 // local queue and global stack. 4272 drain_local_queue(false); 4273 drain_global_stack(false); 4274 4275 // Attempt at work stealing from other task's queues. 4276 if (do_stealing && !has_aborted()) { 4277 // We have not aborted. This means that we have finished all that 4278 // we could. Let's try to do some stealing... 4279 4280 // We cannot check whether the global stack is empty, since other 4281 // tasks might be pushing objects to it concurrently. 4282 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4283 "only way to reach here"); 4284 4285 if (_cm->verbose_low()) { 4286 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id); 4287 } 4288 4289 while (!has_aborted()) { 4290 oop obj; 4291 statsOnly( ++_steal_attempts ); 4292 4293 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 4294 if (_cm->verbose_medium()) { 4295 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully", 4296 _worker_id, (void*) obj); 4297 } 4298 4299 statsOnly( ++_steals ); 4300 4301 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4302 "any stolen object should be marked"); 4303 scan_object(obj); 4304 4305 // And since we're towards the end, let's totally drain the 4306 // local queue and global stack. 4307 drain_local_queue(false); 4308 drain_global_stack(false); 4309 } else { 4310 break; 4311 } 4312 } 4313 } 4314 4315 // If we are about to wrap up and go into termination, check if we 4316 // should raise the overflow flag. 4317 if (do_termination && !has_aborted()) { 4318 if (_cm->force_overflow()->should_force()) { 4319 _cm->set_has_overflown(); 4320 regular_clock_call(); 4321 } 4322 } 4323 4324 // We still haven't aborted. Now, let's try to get into the 4325 // termination protocol. 4326 if (do_termination && !has_aborted()) { 4327 // We cannot check whether the global stack is empty, since other 4328 // tasks might be concurrently pushing objects on it. 4329 // Separated the asserts so that we know which one fires. 4330 assert(_cm->out_of_regions(), "only way to reach here"); 4331 assert(_task_queue->size() == 0, "only way to reach here"); 4332 4333 if (_cm->verbose_low()) { 4334 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id); 4335 } 4336 4337 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4338 4339 // The CMTask class also extends the TerminatorTerminator class, 4340 // hence its should_exit_termination() method will also decide 4341 // whether to exit the termination protocol or not. 4342 bool finished = (is_serial || 4343 _cm->terminator()->offer_termination(this)); 4344 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4345 _termination_time_ms += 4346 termination_end_time_ms - _termination_start_time_ms; 4347 4348 if (finished) { 4349 // We're all done. 4350 4351 if (_worker_id == 0) { 4352 // let's allow task 0 to do this 4353 if (concurrent()) { 4354 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4355 // we need to set this to false before the next 4356 // safepoint. This way we ensure that the marking phase 4357 // doesn't observe any more heap expansions. 4358 _cm->clear_concurrent_marking_in_progress(); 4359 } 4360 } 4361 4362 // We can now guarantee that the global stack is empty, since 4363 // all other tasks have finished. We separated the guarantees so 4364 // that, if a condition is false, we can immediately find out 4365 // which one. 4366 guarantee(_cm->out_of_regions(), "only way to reach here"); 4367 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4368 guarantee(_task_queue->size() == 0, "only way to reach here"); 4369 guarantee(!_cm->has_overflown(), "only way to reach here"); 4370 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4371 4372 if (_cm->verbose_low()) { 4373 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id); 4374 } 4375 } else { 4376 // Apparently there's more work to do. Let's abort this task. It 4377 // will restart it and we can hopefully find more things to do. 4378 4379 if (_cm->verbose_low()) { 4380 gclog_or_tty->print_cr("[%u] apparently there is more work to do", 4381 _worker_id); 4382 } 4383 4384 set_has_aborted(); 4385 statsOnly( ++_aborted_termination ); 4386 } 4387 } 4388 4389 // Mainly for debugging purposes to make sure that a pointer to the 4390 // closure which was statically allocated in this frame doesn't 4391 // escape it by accident. 4392 set_cm_oop_closure(NULL); 4393 double end_time_ms = os::elapsedVTime() * 1000.0; 4394 double elapsed_time_ms = end_time_ms - _start_time_ms; 4395 // Update the step history. 4396 _step_times_ms.add(elapsed_time_ms); 4397 4398 if (has_aborted()) { 4399 // The task was aborted for some reason. 4400 4401 statsOnly( ++_aborted ); 4402 4403 if (_has_timed_out) { 4404 double diff_ms = elapsed_time_ms - _time_target_ms; 4405 // Keep statistics of how well we did with respect to hitting 4406 // our target only if we actually timed out (if we aborted for 4407 // other reasons, then the results might get skewed). 4408 _marking_step_diffs_ms.add(diff_ms); 4409 } 4410 4411 if (_cm->has_overflown()) { 4412 // This is the interesting one. We aborted because a global 4413 // overflow was raised. This means we have to restart the 4414 // marking phase and start iterating over regions. However, in 4415 // order to do this we have to make sure that all tasks stop 4416 // what they are doing and re-initialise in a safe manner. We 4417 // will achieve this with the use of two barrier sync points. 4418 4419 if (_cm->verbose_low()) { 4420 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id); 4421 } 4422 4423 if (!is_serial) { 4424 // We only need to enter the sync barrier if being called 4425 // from a parallel context 4426 _cm->enter_first_sync_barrier(_worker_id); 4427 4428 // When we exit this sync barrier we know that all tasks have 4429 // stopped doing marking work. So, it's now safe to 4430 // re-initialise our data structures. At the end of this method, 4431 // task 0 will clear the global data structures. 4432 } 4433 4434 statsOnly( ++_aborted_overflow ); 4435 4436 // We clear the local state of this task... 4437 clear_region_fields(); 4438 4439 if (!is_serial) { 4440 // ...and enter the second barrier. 4441 _cm->enter_second_sync_barrier(_worker_id); 4442 } 4443 // At this point, if we're during the concurrent phase of 4444 // marking, everything has been re-initialized and we're 4445 // ready to restart. 4446 } 4447 4448 if (_cm->verbose_low()) { 4449 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4450 "elapsed = %1.2lfms <<<<<<<<<<", 4451 _worker_id, _time_target_ms, elapsed_time_ms); 4452 if (_cm->has_aborted()) { 4453 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========", 4454 _worker_id); 4455 } 4456 } 4457 } else { 4458 if (_cm->verbose_low()) { 4459 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4460 "elapsed = %1.2lfms <<<<<<<<<<", 4461 _worker_id, _time_target_ms, elapsed_time_ms); 4462 } 4463 } 4464 4465 _claimed = false; 4466 } 4467 4468 CMTask::CMTask(uint worker_id, 4469 ConcurrentMark* cm, 4470 size_t* marked_bytes, 4471 BitMap* card_bm, 4472 CMTaskQueue* task_queue, 4473 CMTaskQueueSet* task_queues) 4474 : _g1h(G1CollectedHeap::heap()), 4475 _worker_id(worker_id), _cm(cm), 4476 _claimed(false), 4477 _nextMarkBitMap(NULL), _hash_seed(17), 4478 _task_queue(task_queue), 4479 _task_queues(task_queues), 4480 _cm_oop_closure(NULL), 4481 _marked_bytes_array(marked_bytes), 4482 _card_bm(card_bm) { 4483 guarantee(task_queue != NULL, "invariant"); 4484 guarantee(task_queues != NULL, "invariant"); 4485 4486 statsOnly( _clock_due_to_scanning = 0; 4487 _clock_due_to_marking = 0 ); 4488 4489 _marking_step_diffs_ms.add(0.5); 4490 } 4491 4492 // These are formatting macros that are used below to ensure 4493 // consistent formatting. The *_H_* versions are used to format the 4494 // header for a particular value and they should be kept consistent 4495 // with the corresponding macro. Also note that most of the macros add 4496 // the necessary white space (as a prefix) which makes them a bit 4497 // easier to compose. 4498 4499 // All the output lines are prefixed with this string to be able to 4500 // identify them easily in a large log file. 4501 #define G1PPRL_LINE_PREFIX "###" 4502 4503 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4504 #ifdef _LP64 4505 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4506 #else // _LP64 4507 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4508 #endif // _LP64 4509 4510 // For per-region info 4511 #define G1PPRL_TYPE_FORMAT " %-4s" 4512 #define G1PPRL_TYPE_H_FORMAT " %4s" 4513 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4514 #define G1PPRL_BYTE_H_FORMAT " %9s" 4515 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4516 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4517 4518 // For summary info 4519 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4520 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4521 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4522 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4523 4524 G1PrintRegionLivenessInfoClosure:: 4525 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4526 : _out(out), 4527 _total_used_bytes(0), _total_capacity_bytes(0), 4528 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4529 _hum_used_bytes(0), _hum_capacity_bytes(0), 4530 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 4531 _total_remset_bytes(0) { 4532 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4533 MemRegion g1_committed = g1h->g1_committed(); 4534 MemRegion g1_reserved = g1h->g1_reserved(); 4535 double now = os::elapsedTime(); 4536 4537 // Print the header of the output. 4538 _out->cr(); 4539 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4540 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4541 G1PPRL_SUM_ADDR_FORMAT("committed") 4542 G1PPRL_SUM_ADDR_FORMAT("reserved") 4543 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4544 g1_committed.start(), g1_committed.end(), 4545 g1_reserved.start(), g1_reserved.end(), 4546 HeapRegion::GrainBytes); 4547 _out->print_cr(G1PPRL_LINE_PREFIX); 4548 _out->print_cr(G1PPRL_LINE_PREFIX 4549 G1PPRL_TYPE_H_FORMAT 4550 G1PPRL_ADDR_BASE_H_FORMAT 4551 G1PPRL_BYTE_H_FORMAT 4552 G1PPRL_BYTE_H_FORMAT 4553 G1PPRL_BYTE_H_FORMAT 4554 G1PPRL_DOUBLE_H_FORMAT 4555 G1PPRL_BYTE_H_FORMAT, 4556 "type", "address-range", 4557 "used", "prev-live", "next-live", "gc-eff", "remset"); 4558 _out->print_cr(G1PPRL_LINE_PREFIX 4559 G1PPRL_TYPE_H_FORMAT 4560 G1PPRL_ADDR_BASE_H_FORMAT 4561 G1PPRL_BYTE_H_FORMAT 4562 G1PPRL_BYTE_H_FORMAT 4563 G1PPRL_BYTE_H_FORMAT 4564 G1PPRL_DOUBLE_H_FORMAT 4565 G1PPRL_BYTE_H_FORMAT, 4566 "", "", 4567 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", "(bytes)"); 4568 } 4569 4570 // It takes as a parameter a reference to one of the _hum_* fields, it 4571 // deduces the corresponding value for a region in a humongous region 4572 // series (either the region size, or what's left if the _hum_* field 4573 // is < the region size), and updates the _hum_* field accordingly. 4574 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4575 size_t bytes = 0; 4576 // The > 0 check is to deal with the prev and next live bytes which 4577 // could be 0. 4578 if (*hum_bytes > 0) { 4579 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4580 *hum_bytes -= bytes; 4581 } 4582 return bytes; 4583 } 4584 4585 // It deduces the values for a region in a humongous region series 4586 // from the _hum_* fields and updates those accordingly. It assumes 4587 // that that _hum_* fields have already been set up from the "starts 4588 // humongous" region and we visit the regions in address order. 4589 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4590 size_t* capacity_bytes, 4591 size_t* prev_live_bytes, 4592 size_t* next_live_bytes) { 4593 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4594 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4595 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4596 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4597 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4598 } 4599 4600 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4601 const char* type = ""; 4602 HeapWord* bottom = r->bottom(); 4603 HeapWord* end = r->end(); 4604 size_t capacity_bytes = r->capacity(); 4605 size_t used_bytes = r->used(); 4606 size_t prev_live_bytes = r->live_bytes(); 4607 size_t next_live_bytes = r->next_live_bytes(); 4608 double gc_eff = r->gc_efficiency(); 4609 size_t remset_bytes = r->rem_set()->mem_size(); 4610 if (r->used() == 0) { 4611 type = "FREE"; 4612 } else if (r->is_survivor()) { 4613 type = "SURV"; 4614 } else if (r->is_young()) { 4615 type = "EDEN"; 4616 } else if (r->startsHumongous()) { 4617 type = "HUMS"; 4618 4619 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4620 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4621 "they should have been zeroed after the last time we used them"); 4622 // Set up the _hum_* fields. 4623 _hum_capacity_bytes = capacity_bytes; 4624 _hum_used_bytes = used_bytes; 4625 _hum_prev_live_bytes = prev_live_bytes; 4626 _hum_next_live_bytes = next_live_bytes; 4627 get_hum_bytes(&used_bytes, &capacity_bytes, 4628 &prev_live_bytes, &next_live_bytes); 4629 end = bottom + HeapRegion::GrainWords; 4630 } else if (r->continuesHumongous()) { 4631 type = "HUMC"; 4632 get_hum_bytes(&used_bytes, &capacity_bytes, 4633 &prev_live_bytes, &next_live_bytes); 4634 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4635 } else { 4636 type = "OLD"; 4637 } 4638 4639 _total_used_bytes += used_bytes; 4640 _total_capacity_bytes += capacity_bytes; 4641 _total_prev_live_bytes += prev_live_bytes; 4642 _total_next_live_bytes += next_live_bytes; 4643 _total_remset_bytes += remset_bytes; 4644 4645 // Print a line for this particular region. 4646 _out->print_cr(G1PPRL_LINE_PREFIX 4647 G1PPRL_TYPE_FORMAT 4648 G1PPRL_ADDR_BASE_FORMAT 4649 G1PPRL_BYTE_FORMAT 4650 G1PPRL_BYTE_FORMAT 4651 G1PPRL_BYTE_FORMAT 4652 G1PPRL_DOUBLE_FORMAT 4653 G1PPRL_BYTE_FORMAT, 4654 type, bottom, end, 4655 used_bytes, prev_live_bytes, next_live_bytes, gc_eff , remset_bytes); 4656 4657 return false; 4658 } 4659 4660 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4661 // add static memory usages to remembered set sizes 4662 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 4663 // Print the footer of the output. 4664 _out->print_cr(G1PPRL_LINE_PREFIX); 4665 _out->print_cr(G1PPRL_LINE_PREFIX 4666 " SUMMARY" 4667 G1PPRL_SUM_MB_FORMAT("capacity") 4668 G1PPRL_SUM_MB_PERC_FORMAT("used") 4669 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4670 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 4671 G1PPRL_SUM_MB_FORMAT("remset"), 4672 bytes_to_mb(_total_capacity_bytes), 4673 bytes_to_mb(_total_used_bytes), 4674 perc(_total_used_bytes, _total_capacity_bytes), 4675 bytes_to_mb(_total_prev_live_bytes), 4676 perc(_total_prev_live_bytes, _total_capacity_bytes), 4677 bytes_to_mb(_total_next_live_bytes), 4678 perc(_total_next_live_bytes, _total_capacity_bytes), 4679 bytes_to_mb(_total_remset_bytes)); 4680 _out->cr(); 4681 }