1 /* 2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 32 #include "gc_implementation/g1/g1Log.hpp" 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 34 #include "gc_implementation/g1/g1RemSet.hpp" 35 #include "gc_implementation/g1/heapRegion.inline.hpp" 36 #include "gc_implementation/g1/heapRegionRemSet.hpp" 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 38 #include "gc_implementation/shared/vmGCOperations.hpp" 39 #include "memory/genOopClosures.inline.hpp" 40 #include "memory/referencePolicy.hpp" 41 #include "memory/resourceArea.hpp" 42 #include "oops/oop.inline.hpp" 43 #include "runtime/handles.inline.hpp" 44 #include "runtime/java.hpp" 45 #include "services/memTracker.hpp" 46 47 // Concurrent marking bit map wrapper 48 49 CMBitMapRO::CMBitMapRO(int shifter) : 50 _bm(), 51 _shifter(shifter) { 52 _bmStartWord = 0; 53 _bmWordSize = 0; 54 } 55 56 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 57 HeapWord* limit) const { 58 // First we must round addr *up* to a possible object boundary. 59 addr = (HeapWord*)align_size_up((intptr_t)addr, 60 HeapWordSize << _shifter); 61 size_t addrOffset = heapWordToOffset(addr); 62 if (limit == NULL) { 63 limit = _bmStartWord + _bmWordSize; 64 } 65 size_t limitOffset = heapWordToOffset(limit); 66 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 67 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 68 assert(nextAddr >= addr, "get_next_one postcondition"); 69 assert(nextAddr == limit || isMarked(nextAddr), 70 "get_next_one postcondition"); 71 return nextAddr; 72 } 73 74 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 75 HeapWord* limit) const { 76 size_t addrOffset = heapWordToOffset(addr); 77 if (limit == NULL) { 78 limit = _bmStartWord + _bmWordSize; 79 } 80 size_t limitOffset = heapWordToOffset(limit); 81 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 82 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 83 assert(nextAddr >= addr, "get_next_one postcondition"); 84 assert(nextAddr == limit || !isMarked(nextAddr), 85 "get_next_one postcondition"); 86 return nextAddr; 87 } 88 89 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 90 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 91 return (int) (diff >> _shifter); 92 } 93 94 #ifndef PRODUCT 95 bool CMBitMapRO::covers(ReservedSpace heap_rs) const { 96 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 97 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 98 "size inconsistency"); 99 return _bmStartWord == (HeapWord*)(heap_rs.base()) && 100 _bmWordSize == heap_rs.size()>>LogHeapWordSize; 101 } 102 #endif 103 104 bool CMBitMap::allocate(ReservedSpace heap_rs) { 105 _bmStartWord = (HeapWord*)(heap_rs.base()); 106 _bmWordSize = heap_rs.size()/HeapWordSize; // heap_rs.size() is in bytes 107 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 108 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 109 if (!brs.is_reserved()) { 110 warning("ConcurrentMark marking bit map allocation failure"); 111 return false; 112 } 113 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC); 114 // For now we'll just commit all of the bit map up front. 115 // Later on we'll try to be more parsimonious with swap. 116 if (!_virtual_space.initialize(brs, brs.size())) { 117 warning("ConcurrentMark marking bit map backing store failure"); 118 return false; 119 } 120 assert(_virtual_space.committed_size() == brs.size(), 121 "didn't reserve backing store for all of concurrent marking bit map?"); 122 _bm.set_map((uintptr_t*)_virtual_space.low()); 123 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 124 _bmWordSize, "inconsistency in bit map sizing"); 125 _bm.set_size(_bmWordSize >> _shifter); 126 return true; 127 } 128 129 void CMBitMap::clearAll() { 130 _bm.clear(); 131 return; 132 } 133 134 void CMBitMap::markRange(MemRegion mr) { 135 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 136 assert(!mr.is_empty(), "unexpected empty region"); 137 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 138 ((HeapWord *) mr.end())), 139 "markRange memory region end is not card aligned"); 140 // convert address range into offset range 141 _bm.at_put_range(heapWordToOffset(mr.start()), 142 heapWordToOffset(mr.end()), true); 143 } 144 145 void CMBitMap::clearRange(MemRegion mr) { 146 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 147 assert(!mr.is_empty(), "unexpected empty region"); 148 // convert address range into offset range 149 _bm.at_put_range(heapWordToOffset(mr.start()), 150 heapWordToOffset(mr.end()), false); 151 } 152 153 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 154 HeapWord* end_addr) { 155 HeapWord* start = getNextMarkedWordAddress(addr); 156 start = MIN2(start, end_addr); 157 HeapWord* end = getNextUnmarkedWordAddress(start); 158 end = MIN2(end, end_addr); 159 assert(start <= end, "Consistency check"); 160 MemRegion mr(start, end); 161 if (!mr.is_empty()) { 162 clearRange(mr); 163 } 164 return mr; 165 } 166 167 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 168 _base(NULL), _cm(cm) 169 #ifdef ASSERT 170 , _drain_in_progress(false) 171 , _drain_in_progress_yields(false) 172 #endif 173 {} 174 175 bool CMMarkStack::allocate(size_t capacity) { 176 // allocate a stack of the requisite depth 177 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 178 if (!rs.is_reserved()) { 179 warning("ConcurrentMark MarkStack allocation failure"); 180 return false; 181 } 182 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 183 if (!_virtual_space.initialize(rs, rs.size())) { 184 warning("ConcurrentMark MarkStack backing store failure"); 185 // Release the virtual memory reserved for the marking stack 186 rs.release(); 187 return false; 188 } 189 assert(_virtual_space.committed_size() == rs.size(), 190 "Didn't reserve backing store for all of ConcurrentMark stack?"); 191 _base = (oop*) _virtual_space.low(); 192 setEmpty(); 193 _capacity = (jint) capacity; 194 _saved_index = -1; 195 _should_expand = false; 196 NOT_PRODUCT(_max_depth = 0); 197 return true; 198 } 199 200 void CMMarkStack::expand() { 201 // Called, during remark, if we've overflown the marking stack during marking. 202 assert(isEmpty(), "stack should been emptied while handling overflow"); 203 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 204 // Clear expansion flag 205 _should_expand = false; 206 if (_capacity == (jint) MarkStackSizeMax) { 207 if (PrintGCDetails && Verbose) { 208 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 209 } 210 return; 211 } 212 // Double capacity if possible 213 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 214 // Do not give up existing stack until we have managed to 215 // get the double capacity that we desired. 216 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 217 sizeof(oop))); 218 if (rs.is_reserved()) { 219 // Release the backing store associated with old stack 220 _virtual_space.release(); 221 // Reinitialize virtual space for new stack 222 if (!_virtual_space.initialize(rs, rs.size())) { 223 fatal("Not enough swap for expanded marking stack capacity"); 224 } 225 _base = (oop*)(_virtual_space.low()); 226 _index = 0; 227 _capacity = new_capacity; 228 } else { 229 if (PrintGCDetails && Verbose) { 230 // Failed to double capacity, continue; 231 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 232 SIZE_FORMAT"K to " SIZE_FORMAT"K", 233 _capacity / K, new_capacity / K); 234 } 235 } 236 } 237 238 void CMMarkStack::set_should_expand() { 239 // If we're resetting the marking state because of an 240 // marking stack overflow, record that we should, if 241 // possible, expand the stack. 242 _should_expand = _cm->has_overflown(); 243 } 244 245 CMMarkStack::~CMMarkStack() { 246 if (_base != NULL) { 247 _base = NULL; 248 _virtual_space.release(); 249 } 250 } 251 252 void CMMarkStack::par_push(oop ptr) { 253 while (true) { 254 if (isFull()) { 255 _overflow = true; 256 return; 257 } 258 // Otherwise... 259 jint index = _index; 260 jint next_index = index+1; 261 jint res = Atomic::cmpxchg(next_index, &_index, index); 262 if (res == index) { 263 _base[index] = ptr; 264 // Note that we don't maintain this atomically. We could, but it 265 // doesn't seem necessary. 266 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 267 return; 268 } 269 // Otherwise, we need to try again. 270 } 271 } 272 273 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 274 while (true) { 275 if (isFull()) { 276 _overflow = true; 277 return; 278 } 279 // Otherwise... 280 jint index = _index; 281 jint next_index = index + n; 282 if (next_index > _capacity) { 283 _overflow = true; 284 return; 285 } 286 jint res = Atomic::cmpxchg(next_index, &_index, index); 287 if (res == index) { 288 for (int i = 0; i < n; i++) { 289 int ind = index + i; 290 assert(ind < _capacity, "By overflow test above."); 291 _base[ind] = ptr_arr[i]; 292 } 293 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 294 return; 295 } 296 // Otherwise, we need to try again. 297 } 298 } 299 300 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 301 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 302 jint start = _index; 303 jint next_index = start + n; 304 if (next_index > _capacity) { 305 _overflow = true; 306 return; 307 } 308 // Otherwise. 309 _index = next_index; 310 for (int i = 0; i < n; i++) { 311 int ind = start + i; 312 assert(ind < _capacity, "By overflow test above."); 313 _base[ind] = ptr_arr[i]; 314 } 315 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 316 } 317 318 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 319 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 320 jint index = _index; 321 if (index == 0) { 322 *n = 0; 323 return false; 324 } else { 325 int k = MIN2(max, index); 326 jint new_ind = index - k; 327 for (int j = 0; j < k; j++) { 328 ptr_arr[j] = _base[new_ind + j]; 329 } 330 _index = new_ind; 331 *n = k; 332 return true; 333 } 334 } 335 336 template<class OopClosureClass> 337 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 338 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 339 || SafepointSynchronize::is_at_safepoint(), 340 "Drain recursion must be yield-safe."); 341 bool res = true; 342 debug_only(_drain_in_progress = true); 343 debug_only(_drain_in_progress_yields = yield_after); 344 while (!isEmpty()) { 345 oop newOop = pop(); 346 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 347 assert(newOop->is_oop(), "Expected an oop"); 348 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 349 "only grey objects on this stack"); 350 newOop->oop_iterate(cl); 351 if (yield_after && _cm->do_yield_check()) { 352 res = false; 353 break; 354 } 355 } 356 debug_only(_drain_in_progress = false); 357 return res; 358 } 359 360 void CMMarkStack::note_start_of_gc() { 361 assert(_saved_index == -1, 362 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 363 _saved_index = _index; 364 } 365 366 void CMMarkStack::note_end_of_gc() { 367 // This is intentionally a guarantee, instead of an assert. If we 368 // accidentally add something to the mark stack during GC, it 369 // will be a correctness issue so it's better if we crash. we'll 370 // only check this once per GC anyway, so it won't be a performance 371 // issue in any way. 372 guarantee(_saved_index == _index, 373 err_msg("saved index: %d index: %d", _saved_index, _index)); 374 _saved_index = -1; 375 } 376 377 void CMMarkStack::oops_do(OopClosure* f) { 378 assert(_saved_index == _index, 379 err_msg("saved index: %d index: %d", _saved_index, _index)); 380 for (int i = 0; i < _index; i += 1) { 381 f->do_oop(&_base[i]); 382 } 383 } 384 385 bool ConcurrentMark::not_yet_marked(oop obj) const { 386 return _g1h->is_obj_ill(obj); 387 } 388 389 CMRootRegions::CMRootRegions() : 390 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 391 _should_abort(false), _next_survivor(NULL) { } 392 393 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 394 _young_list = g1h->young_list(); 395 _cm = cm; 396 } 397 398 void CMRootRegions::prepare_for_scan() { 399 assert(!scan_in_progress(), "pre-condition"); 400 401 // Currently, only survivors can be root regions. 402 assert(_next_survivor == NULL, "pre-condition"); 403 _next_survivor = _young_list->first_survivor_region(); 404 _scan_in_progress = (_next_survivor != NULL); 405 _should_abort = false; 406 } 407 408 HeapRegion* CMRootRegions::claim_next() { 409 if (_should_abort) { 410 // If someone has set the should_abort flag, we return NULL to 411 // force the caller to bail out of their loop. 412 return NULL; 413 } 414 415 // Currently, only survivors can be root regions. 416 HeapRegion* res = _next_survivor; 417 if (res != NULL) { 418 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 419 // Read it again in case it changed while we were waiting for the lock. 420 res = _next_survivor; 421 if (res != NULL) { 422 if (res == _young_list->last_survivor_region()) { 423 // We just claimed the last survivor so store NULL to indicate 424 // that we're done. 425 _next_survivor = NULL; 426 } else { 427 _next_survivor = res->get_next_young_region(); 428 } 429 } else { 430 // Someone else claimed the last survivor while we were trying 431 // to take the lock so nothing else to do. 432 } 433 } 434 assert(res == NULL || res->is_survivor(), "post-condition"); 435 436 return res; 437 } 438 439 void CMRootRegions::scan_finished() { 440 assert(scan_in_progress(), "pre-condition"); 441 442 // Currently, only survivors can be root regions. 443 if (!_should_abort) { 444 assert(_next_survivor == NULL, "we should have claimed all survivors"); 445 } 446 _next_survivor = NULL; 447 448 { 449 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 450 _scan_in_progress = false; 451 RootRegionScan_lock->notify_all(); 452 } 453 } 454 455 bool CMRootRegions::wait_until_scan_finished() { 456 if (!scan_in_progress()) return false; 457 458 { 459 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 460 while (scan_in_progress()) { 461 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 462 } 463 } 464 return true; 465 } 466 467 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 468 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 469 #endif // _MSC_VER 470 471 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 472 return MAX2((n_par_threads + 2) / 4, 1U); 473 } 474 475 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) : 476 _g1h(g1h), 477 _markBitMap1(MinObjAlignment - 1), 478 _markBitMap2(MinObjAlignment - 1), 479 480 _parallel_marking_threads(0), 481 _max_parallel_marking_threads(0), 482 _sleep_factor(0.0), 483 _marking_task_overhead(1.0), 484 _cleanup_sleep_factor(0.0), 485 _cleanup_task_overhead(1.0), 486 _cleanup_list("Cleanup List"), 487 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 488 _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >> 489 CardTableModRefBS::card_shift, 490 false /* in_resource_area*/), 491 492 _prevMarkBitMap(&_markBitMap1), 493 _nextMarkBitMap(&_markBitMap2), 494 495 _markStack(this), 496 // _finger set in set_non_marking_state 497 498 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)), 499 // _active_tasks set in set_non_marking_state 500 // _tasks set inside the constructor 501 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 502 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 503 504 _has_overflown(false), 505 _concurrent(false), 506 _has_aborted(false), 507 _restart_for_overflow(false), 508 _concurrent_marking_in_progress(false), 509 510 // _verbose_level set below 511 512 _init_times(), 513 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 514 _cleanup_times(), 515 _total_counting_time(0.0), 516 _total_rs_scrub_time(0.0), 517 518 _parallel_workers(NULL), 519 520 _count_card_bitmaps(NULL), 521 _count_marked_bytes(NULL), 522 _completed_initialization(false) { 523 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 524 if (verbose_level < no_verbose) { 525 verbose_level = no_verbose; 526 } 527 if (verbose_level > high_verbose) { 528 verbose_level = high_verbose; 529 } 530 _verbose_level = verbose_level; 531 532 if (verbose_low()) { 533 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 534 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 535 } 536 537 if (!_markBitMap1.allocate(heap_rs)) { 538 warning("Failed to allocate first CM bit map"); 539 return; 540 } 541 if (!_markBitMap2.allocate(heap_rs)) { 542 warning("Failed to allocate second CM bit map"); 543 return; 544 } 545 546 // Create & start a ConcurrentMark thread. 547 _cmThread = new ConcurrentMarkThread(this); 548 assert(cmThread() != NULL, "CM Thread should have been created"); 549 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 550 551 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 552 assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency"); 553 assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency"); 554 555 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 556 satb_qs.set_buffer_size(G1SATBBufferSize); 557 558 _root_regions.init(_g1h, this); 559 560 if (ConcGCThreads > ParallelGCThreads) { 561 warning("Can't have more ConcGCThreads (" UINT32_FORMAT ") " 562 "than ParallelGCThreads (" UINT32_FORMAT ").", 563 ConcGCThreads, ParallelGCThreads); 564 return; 565 } 566 if (ParallelGCThreads == 0) { 567 // if we are not running with any parallel GC threads we will not 568 // spawn any marking threads either 569 _parallel_marking_threads = 0; 570 _max_parallel_marking_threads = 0; 571 _sleep_factor = 0.0; 572 _marking_task_overhead = 1.0; 573 } else { 574 if (ConcGCThreads > 0) { 575 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 576 // if both are set 577 578 _parallel_marking_threads = (uint) ConcGCThreads; 579 _max_parallel_marking_threads = _parallel_marking_threads; 580 _sleep_factor = 0.0; 581 _marking_task_overhead = 1.0; 582 } else if (G1MarkingOverheadPercent > 0) { 583 // we will calculate the number of parallel marking threads 584 // based on a target overhead with respect to the soft real-time 585 // goal 586 587 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 588 double overall_cm_overhead = 589 (double) MaxGCPauseMillis * marking_overhead / 590 (double) GCPauseIntervalMillis; 591 double cpu_ratio = 1.0 / (double) os::processor_count(); 592 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 593 double marking_task_overhead = 594 overall_cm_overhead / marking_thread_num * 595 (double) os::processor_count(); 596 double sleep_factor = 597 (1.0 - marking_task_overhead) / marking_task_overhead; 598 599 _parallel_marking_threads = (uint) marking_thread_num; 600 _max_parallel_marking_threads = _parallel_marking_threads; 601 _sleep_factor = sleep_factor; 602 _marking_task_overhead = marking_task_overhead; 603 } else { 604 _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads); 605 _max_parallel_marking_threads = _parallel_marking_threads; 606 _sleep_factor = 0.0; 607 _marking_task_overhead = 1.0; 608 } 609 610 if (parallel_marking_threads() > 1) { 611 _cleanup_task_overhead = 1.0; 612 } else { 613 _cleanup_task_overhead = marking_task_overhead(); 614 } 615 _cleanup_sleep_factor = 616 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 617 618 #if 0 619 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 620 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 621 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 622 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 623 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 624 #endif 625 626 guarantee(parallel_marking_threads() > 0, "peace of mind"); 627 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 628 _max_parallel_marking_threads, false, true); 629 if (_parallel_workers == NULL) { 630 vm_exit_during_initialization("Failed necessary allocation."); 631 } else { 632 _parallel_workers->initialize_workers(); 633 } 634 } 635 636 if (FLAG_IS_DEFAULT(MarkStackSize)) { 637 uintx mark_stack_size = 638 MIN2(MarkStackSizeMax, 639 MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE))); 640 // Verify that the calculated value for MarkStackSize is in range. 641 // It would be nice to use the private utility routine from Arguments. 642 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 643 warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): " 644 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 645 mark_stack_size, 1, MarkStackSizeMax); 646 return; 647 } 648 FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size); 649 } else { 650 // Verify MarkStackSize is in range. 651 if (FLAG_IS_CMDLINE(MarkStackSize)) { 652 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 653 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 654 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): " 655 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 656 MarkStackSize, 1, MarkStackSizeMax); 657 return; 658 } 659 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 660 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 661 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")" 662 " or for MarkStackSizeMax (" UINTX_FORMAT ")", 663 MarkStackSize, MarkStackSizeMax); 664 return; 665 } 666 } 667 } 668 } 669 670 if (!_markStack.allocate(MarkStackSize)) { 671 warning("Failed to allocate CM marking stack"); 672 return; 673 } 674 675 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 676 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 677 678 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 679 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 680 681 BitMap::idx_t card_bm_size = _card_bm.size(); 682 683 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 684 _active_tasks = _max_worker_id; 685 686 size_t max_regions = (size_t) _g1h->max_regions(); 687 for (uint i = 0; i < _max_worker_id; ++i) { 688 CMTaskQueue* task_queue = new CMTaskQueue(); 689 task_queue->initialize(); 690 _task_queues->register_queue(i, task_queue); 691 692 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 693 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 694 695 _tasks[i] = new CMTask(i, this, 696 _count_marked_bytes[i], 697 &_count_card_bitmaps[i], 698 task_queue, _task_queues); 699 700 _accum_task_vtime[i] = 0.0; 701 } 702 703 // Calculate the card number for the bottom of the heap. Used 704 // in biasing indexes into the accounting card bitmaps. 705 _heap_bottom_card_num = 706 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 707 CardTableModRefBS::card_shift); 708 709 // Clear all the liveness counting data 710 clear_all_count_data(); 711 712 // so that the call below can read a sensible value 713 _heap_start = (HeapWord*) heap_rs.base(); 714 set_non_marking_state(); 715 _completed_initialization = true; 716 } 717 718 void ConcurrentMark::update_g1_committed(bool force) { 719 // If concurrent marking is not in progress, then we do not need to 720 // update _heap_end. 721 if (!concurrent_marking_in_progress() && !force) return; 722 723 MemRegion committed = _g1h->g1_committed(); 724 assert(committed.start() == _heap_start, "start shouldn't change"); 725 HeapWord* new_end = committed.end(); 726 if (new_end > _heap_end) { 727 // The heap has been expanded. 728 729 _heap_end = new_end; 730 } 731 // Notice that the heap can also shrink. However, this only happens 732 // during a Full GC (at least currently) and the entire marking 733 // phase will bail out and the task will not be restarted. So, let's 734 // do nothing. 735 } 736 737 void ConcurrentMark::reset() { 738 // Starting values for these two. This should be called in a STW 739 // phase. CM will be notified of any future g1_committed expansions 740 // will be at the end of evacuation pauses, when tasks are 741 // inactive. 742 MemRegion committed = _g1h->g1_committed(); 743 _heap_start = committed.start(); 744 _heap_end = committed.end(); 745 746 // Separated the asserts so that we know which one fires. 747 assert(_heap_start != NULL, "heap bounds should look ok"); 748 assert(_heap_end != NULL, "heap bounds should look ok"); 749 assert(_heap_start < _heap_end, "heap bounds should look ok"); 750 751 // Reset all the marking data structures and any necessary flags 752 reset_marking_state(); 753 754 if (verbose_low()) { 755 gclog_or_tty->print_cr("[global] resetting"); 756 } 757 758 // We do reset all of them, since different phases will use 759 // different number of active threads. So, it's easiest to have all 760 // of them ready. 761 for (uint i = 0; i < _max_worker_id; ++i) { 762 _tasks[i]->reset(_nextMarkBitMap); 763 } 764 765 // we need this to make sure that the flag is on during the evac 766 // pause with initial mark piggy-backed 767 set_concurrent_marking_in_progress(); 768 } 769 770 771 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 772 _markStack.set_should_expand(); 773 _markStack.setEmpty(); // Also clears the _markStack overflow flag 774 if (clear_overflow) { 775 clear_has_overflown(); 776 } else { 777 assert(has_overflown(), "pre-condition"); 778 } 779 _finger = _heap_start; 780 781 for (uint i = 0; i < _max_worker_id; ++i) { 782 CMTaskQueue* queue = _task_queues->queue(i); 783 queue->set_empty(); 784 } 785 } 786 787 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) { 788 assert(active_tasks <= _max_worker_id, "we should not have more"); 789 790 _active_tasks = active_tasks; 791 // Need to update the three data structures below according to the 792 // number of active threads for this phase. 793 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 794 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 795 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 796 797 _concurrent = concurrent; 798 // We propagate this to all tasks, not just the active ones. 799 for (uint i = 0; i < _max_worker_id; ++i) 800 _tasks[i]->set_concurrent(concurrent); 801 802 if (concurrent) { 803 set_concurrent_marking_in_progress(); 804 } else { 805 // We currently assume that the concurrent flag has been set to 806 // false before we start remark. At this point we should also be 807 // in a STW phase. 808 assert(!concurrent_marking_in_progress(), "invariant"); 809 assert(_finger == _heap_end, "only way to get here"); 810 update_g1_committed(true); 811 } 812 } 813 814 void ConcurrentMark::set_non_marking_state() { 815 // We set the global marking state to some default values when we're 816 // not doing marking. 817 reset_marking_state(); 818 _active_tasks = 0; 819 clear_concurrent_marking_in_progress(); 820 } 821 822 ConcurrentMark::~ConcurrentMark() { 823 // The ConcurrentMark instance is never freed. 824 ShouldNotReachHere(); 825 } 826 827 void ConcurrentMark::clearNextBitmap() { 828 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 829 G1CollectorPolicy* g1p = g1h->g1_policy(); 830 831 // Make sure that the concurrent mark thread looks to still be in 832 // the current cycle. 833 guarantee(cmThread()->during_cycle(), "invariant"); 834 835 // We are finishing up the current cycle by clearing the next 836 // marking bitmap and getting it ready for the next cycle. During 837 // this time no other cycle can start. So, let's make sure that this 838 // is the case. 839 guarantee(!g1h->mark_in_progress(), "invariant"); 840 841 // clear the mark bitmap (no grey objects to start with). 842 // We need to do this in chunks and offer to yield in between 843 // each chunk. 844 HeapWord* start = _nextMarkBitMap->startWord(); 845 HeapWord* end = _nextMarkBitMap->endWord(); 846 HeapWord* cur = start; 847 size_t chunkSize = M; 848 while (cur < end) { 849 HeapWord* next = cur + chunkSize; 850 if (next > end) { 851 next = end; 852 } 853 MemRegion mr(cur,next); 854 _nextMarkBitMap->clearRange(mr); 855 cur = next; 856 do_yield_check(); 857 858 // Repeat the asserts from above. We'll do them as asserts here to 859 // minimize their overhead on the product. However, we'll have 860 // them as guarantees at the beginning / end of the bitmap 861 // clearing to get some checking in the product. 862 assert(cmThread()->during_cycle(), "invariant"); 863 assert(!g1h->mark_in_progress(), "invariant"); 864 } 865 866 // Clear the liveness counting data 867 clear_all_count_data(); 868 869 // Repeat the asserts from above. 870 guarantee(cmThread()->during_cycle(), "invariant"); 871 guarantee(!g1h->mark_in_progress(), "invariant"); 872 } 873 874 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 875 public: 876 bool doHeapRegion(HeapRegion* r) { 877 if (!r->continuesHumongous()) { 878 r->note_start_of_marking(); 879 } 880 return false; 881 } 882 }; 883 884 void ConcurrentMark::checkpointRootsInitialPre() { 885 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 886 G1CollectorPolicy* g1p = g1h->g1_policy(); 887 888 _has_aborted = false; 889 890 #ifndef PRODUCT 891 if (G1PrintReachableAtInitialMark) { 892 print_reachable("at-cycle-start", 893 VerifyOption_G1UsePrevMarking, true /* all */); 894 } 895 #endif 896 897 // Initialise marking structures. This has to be done in a STW phase. 898 reset(); 899 900 // For each region note start of marking. 901 NoteStartOfMarkHRClosure startcl; 902 g1h->heap_region_iterate(&startcl); 903 } 904 905 906 void ConcurrentMark::checkpointRootsInitialPost() { 907 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 908 909 // If we force an overflow during remark, the remark operation will 910 // actually abort and we'll restart concurrent marking. If we always 911 // force an oveflow during remark we'll never actually complete the 912 // marking phase. So, we initilize this here, at the start of the 913 // cycle, so that at the remaining overflow number will decrease at 914 // every remark and we'll eventually not need to cause one. 915 force_overflow_stw()->init(); 916 917 // Start Concurrent Marking weak-reference discovery. 918 ReferenceProcessor* rp = g1h->ref_processor_cm(); 919 // enable ("weak") refs discovery 920 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 921 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 922 923 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 924 // This is the start of the marking cycle, we're expected all 925 // threads to have SATB queues with active set to false. 926 satb_mq_set.set_active_all_threads(true, /* new active value */ 927 false /* expected_active */); 928 929 _root_regions.prepare_for_scan(); 930 931 // update_g1_committed() will be called at the end of an evac pause 932 // when marking is on. So, it's also called at the end of the 933 // initial-mark pause to update the heap end, if the heap expands 934 // during it. No need to call it here. 935 } 936 937 /* 938 * Notice that in the next two methods, we actually leave the STS 939 * during the barrier sync and join it immediately afterwards. If we 940 * do not do this, the following deadlock can occur: one thread could 941 * be in the barrier sync code, waiting for the other thread to also 942 * sync up, whereas another one could be trying to yield, while also 943 * waiting for the other threads to sync up too. 944 * 945 * Note, however, that this code is also used during remark and in 946 * this case we should not attempt to leave / enter the STS, otherwise 947 * we'll either hit an asseert (debug / fastdebug) or deadlock 948 * (product). So we should only leave / enter the STS if we are 949 * operating concurrently. 950 * 951 * Because the thread that does the sync barrier has left the STS, it 952 * is possible to be suspended for a Full GC or an evacuation pause 953 * could occur. This is actually safe, since the entering the sync 954 * barrier is one of the last things do_marking_step() does, and it 955 * doesn't manipulate any data structures afterwards. 956 */ 957 958 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 959 if (verbose_low()) { 960 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id); 961 } 962 963 if (concurrent()) { 964 ConcurrentGCThread::stsLeave(); 965 } 966 _first_overflow_barrier_sync.enter(); 967 if (concurrent()) { 968 ConcurrentGCThread::stsJoin(); 969 } 970 // at this point everyone should have synced up and not be doing any 971 // more work 972 973 if (verbose_low()) { 974 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); 975 } 976 977 // let the task associated with with worker 0 do this 978 if (worker_id == 0) { 979 // task 0 is responsible for clearing the global data structures 980 // We should be here because of an overflow. During STW we should 981 // not clear the overflow flag since we rely on it being true when 982 // we exit this method to abort the pause and restart concurent 983 // marking. 984 reset_marking_state(concurrent() /* clear_overflow */); 985 force_overflow()->update(); 986 987 if (G1Log::fine()) { 988 gclog_or_tty->date_stamp(PrintGCDateStamps); 989 gclog_or_tty->stamp(PrintGCTimeStamps); 990 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 991 } 992 } 993 994 // after this, each task should reset its own data structures then 995 // then go into the second barrier 996 } 997 998 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 999 if (verbose_low()) { 1000 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id); 1001 } 1002 1003 if (concurrent()) { 1004 ConcurrentGCThread::stsLeave(); 1005 } 1006 _second_overflow_barrier_sync.enter(); 1007 if (concurrent()) { 1008 ConcurrentGCThread::stsJoin(); 1009 } 1010 // at this point everything should be re-initialised and ready to go 1011 1012 if (verbose_low()) { 1013 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); 1014 } 1015 } 1016 1017 #ifndef PRODUCT 1018 void ForceOverflowSettings::init() { 1019 _num_remaining = G1ConcMarkForceOverflow; 1020 _force = false; 1021 update(); 1022 } 1023 1024 void ForceOverflowSettings::update() { 1025 if (_num_remaining > 0) { 1026 _num_remaining -= 1; 1027 _force = true; 1028 } else { 1029 _force = false; 1030 } 1031 } 1032 1033 bool ForceOverflowSettings::should_force() { 1034 if (_force) { 1035 _force = false; 1036 return true; 1037 } else { 1038 return false; 1039 } 1040 } 1041 #endif // !PRODUCT 1042 1043 class CMConcurrentMarkingTask: public AbstractGangTask { 1044 private: 1045 ConcurrentMark* _cm; 1046 ConcurrentMarkThread* _cmt; 1047 1048 public: 1049 void work(uint worker_id) { 1050 assert(Thread::current()->is_ConcurrentGC_thread(), 1051 "this should only be done by a conc GC thread"); 1052 ResourceMark rm; 1053 1054 double start_vtime = os::elapsedVTime(); 1055 1056 ConcurrentGCThread::stsJoin(); 1057 1058 assert(worker_id < _cm->active_tasks(), "invariant"); 1059 CMTask* the_task = _cm->task(worker_id); 1060 the_task->record_start_time(); 1061 if (!_cm->has_aborted()) { 1062 do { 1063 double start_vtime_sec = os::elapsedVTime(); 1064 double start_time_sec = os::elapsedTime(); 1065 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1066 1067 the_task->do_marking_step(mark_step_duration_ms, 1068 true /* do_stealing */, 1069 true /* do_termination */); 1070 1071 double end_time_sec = os::elapsedTime(); 1072 double end_vtime_sec = os::elapsedVTime(); 1073 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1074 double elapsed_time_sec = end_time_sec - start_time_sec; 1075 _cm->clear_has_overflown(); 1076 1077 bool ret = _cm->do_yield_check(worker_id); 1078 1079 jlong sleep_time_ms; 1080 if (!_cm->has_aborted() && the_task->has_aborted()) { 1081 sleep_time_ms = 1082 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1083 ConcurrentGCThread::stsLeave(); 1084 os::sleep(Thread::current(), sleep_time_ms, false); 1085 ConcurrentGCThread::stsJoin(); 1086 } 1087 double end_time2_sec = os::elapsedTime(); 1088 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1089 1090 #if 0 1091 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1092 "overhead %1.4lf", 1093 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1094 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1095 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1096 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1097 #endif 1098 } while (!_cm->has_aborted() && the_task->has_aborted()); 1099 } 1100 the_task->record_end_time(); 1101 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1102 1103 ConcurrentGCThread::stsLeave(); 1104 1105 double end_vtime = os::elapsedVTime(); 1106 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1107 } 1108 1109 CMConcurrentMarkingTask(ConcurrentMark* cm, 1110 ConcurrentMarkThread* cmt) : 1111 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1112 1113 ~CMConcurrentMarkingTask() { } 1114 }; 1115 1116 // Calculates the number of active workers for a concurrent 1117 // phase. 1118 uint ConcurrentMark::calc_parallel_marking_threads() { 1119 if (G1CollectedHeap::use_parallel_gc_threads()) { 1120 uint n_conc_workers = 0; 1121 if (!UseDynamicNumberOfGCThreads || 1122 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1123 !ForceDynamicNumberOfGCThreads)) { 1124 n_conc_workers = max_parallel_marking_threads(); 1125 } else { 1126 n_conc_workers = 1127 AdaptiveSizePolicy::calc_default_active_workers( 1128 max_parallel_marking_threads(), 1129 1, /* Minimum workers */ 1130 parallel_marking_threads(), 1131 Threads::number_of_non_daemon_threads()); 1132 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1133 // that scaling has already gone into "_max_parallel_marking_threads". 1134 } 1135 assert(n_conc_workers > 0, "Always need at least 1"); 1136 return n_conc_workers; 1137 } 1138 // If we are not running with any parallel GC threads we will not 1139 // have spawned any marking threads either. Hence the number of 1140 // concurrent workers should be 0. 1141 return 0; 1142 } 1143 1144 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1145 // Currently, only survivors can be root regions. 1146 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1147 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1148 1149 const uintx interval = PrefetchScanIntervalInBytes; 1150 HeapWord* curr = hr->bottom(); 1151 const HeapWord* end = hr->top(); 1152 while (curr < end) { 1153 Prefetch::read(curr, interval); 1154 oop obj = oop(curr); 1155 int size = obj->oop_iterate(&cl); 1156 assert(size == obj->size(), "sanity"); 1157 curr += size; 1158 } 1159 } 1160 1161 class CMRootRegionScanTask : public AbstractGangTask { 1162 private: 1163 ConcurrentMark* _cm; 1164 1165 public: 1166 CMRootRegionScanTask(ConcurrentMark* cm) : 1167 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1168 1169 void work(uint worker_id) { 1170 assert(Thread::current()->is_ConcurrentGC_thread(), 1171 "this should only be done by a conc GC thread"); 1172 1173 CMRootRegions* root_regions = _cm->root_regions(); 1174 HeapRegion* hr = root_regions->claim_next(); 1175 while (hr != NULL) { 1176 _cm->scanRootRegion(hr, worker_id); 1177 hr = root_regions->claim_next(); 1178 } 1179 } 1180 }; 1181 1182 void ConcurrentMark::scanRootRegions() { 1183 // scan_in_progress() will have been set to true only if there was 1184 // at least one root region to scan. So, if it's false, we 1185 // should not attempt to do any further work. 1186 if (root_regions()->scan_in_progress()) { 1187 _parallel_marking_threads = calc_parallel_marking_threads(); 1188 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1189 "Maximum number of marking threads exceeded"); 1190 uint active_workers = MAX2(1U, parallel_marking_threads()); 1191 1192 CMRootRegionScanTask task(this); 1193 if (parallel_marking_threads() > 0) { 1194 _parallel_workers->set_active_workers((int) active_workers); 1195 _parallel_workers->run_task(&task); 1196 } else { 1197 task.work(0); 1198 } 1199 1200 // It's possible that has_aborted() is true here without actually 1201 // aborting the survivor scan earlier. This is OK as it's 1202 // mainly used for sanity checking. 1203 root_regions()->scan_finished(); 1204 } 1205 } 1206 1207 void ConcurrentMark::markFromRoots() { 1208 // we might be tempted to assert that: 1209 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1210 // "inconsistent argument?"); 1211 // However that wouldn't be right, because it's possible that 1212 // a safepoint is indeed in progress as a younger generation 1213 // stop-the-world GC happens even as we mark in this generation. 1214 1215 _restart_for_overflow = false; 1216 force_overflow_conc()->init(); 1217 1218 // _g1h has _n_par_threads 1219 _parallel_marking_threads = calc_parallel_marking_threads(); 1220 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1221 "Maximum number of marking threads exceeded"); 1222 1223 uint active_workers = MAX2(1U, parallel_marking_threads()); 1224 1225 // Parallel task terminator is set in "set_phase()" 1226 set_phase(active_workers, true /* concurrent */); 1227 1228 CMConcurrentMarkingTask markingTask(this, cmThread()); 1229 if (parallel_marking_threads() > 0) { 1230 _parallel_workers->set_active_workers((int)active_workers); 1231 // Don't set _n_par_threads because it affects MT in proceess_strong_roots() 1232 // and the decisions on that MT processing is made elsewhere. 1233 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1234 _parallel_workers->run_task(&markingTask); 1235 } else { 1236 markingTask.work(0); 1237 } 1238 print_stats(); 1239 } 1240 1241 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1242 // world is stopped at this checkpoint 1243 assert(SafepointSynchronize::is_at_safepoint(), 1244 "world should be stopped"); 1245 1246 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1247 1248 // If a full collection has happened, we shouldn't do this. 1249 if (has_aborted()) { 1250 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1251 return; 1252 } 1253 1254 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1255 1256 if (VerifyDuringGC) { 1257 HandleMark hm; // handle scope 1258 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1259 Universe::heap()->prepare_for_verify(); 1260 Universe::verify(/* silent */ false, 1261 /* option */ VerifyOption_G1UsePrevMarking); 1262 } 1263 g1h->check_bitmaps("Remark Start"); 1264 1265 G1CollectorPolicy* g1p = g1h->g1_policy(); 1266 g1p->record_concurrent_mark_remark_start(); 1267 1268 double start = os::elapsedTime(); 1269 1270 checkpointRootsFinalWork(); 1271 1272 double mark_work_end = os::elapsedTime(); 1273 1274 weakRefsWork(clear_all_soft_refs); 1275 1276 if (has_overflown()) { 1277 // Oops. We overflowed. Restart concurrent marking. 1278 _restart_for_overflow = true; 1279 // Clear the marking state because we will be restarting 1280 // marking due to overflowing the global mark stack. 1281 reset_marking_state(); 1282 if (G1TraceMarkStackOverflow) { 1283 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1284 } 1285 } else { 1286 // Aggregate the per-task counting data that we have accumulated 1287 // while marking. 1288 aggregate_count_data(); 1289 1290 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1291 // We're done with marking. 1292 // This is the end of the marking cycle, we're expected all 1293 // threads to have SATB queues with active set to true. 1294 satb_mq_set.set_active_all_threads(false, /* new active value */ 1295 true /* expected_active */); 1296 1297 if (VerifyDuringGC) { 1298 HandleMark hm; // handle scope 1299 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1300 Universe::heap()->prepare_for_verify(); 1301 Universe::verify(/* silent */ false, 1302 /* option */ VerifyOption_G1UseNextMarking); 1303 } 1304 g1h->check_bitmaps("Remark End"); 1305 assert(!restart_for_overflow(), "sanity"); 1306 // Completely reset the marking state since marking completed 1307 set_non_marking_state(); 1308 } 1309 1310 // Expand the marking stack, if we have to and if we can. 1311 if (_markStack.should_expand()) { 1312 _markStack.expand(); 1313 } 1314 1315 #if VERIFY_OBJS_PROCESSED 1316 _scan_obj_cl.objs_processed = 0; 1317 ThreadLocalObjQueue::objs_enqueued = 0; 1318 #endif 1319 1320 // Statistics 1321 double now = os::elapsedTime(); 1322 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1323 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1324 _remark_times.add((now - start) * 1000.0); 1325 1326 g1p->record_concurrent_mark_remark_end(); 1327 } 1328 1329 // Base class of the closures that finalize and verify the 1330 // liveness counting data. 1331 class CMCountDataClosureBase: public HeapRegionClosure { 1332 protected: 1333 G1CollectedHeap* _g1h; 1334 ConcurrentMark* _cm; 1335 CardTableModRefBS* _ct_bs; 1336 1337 BitMap* _region_bm; 1338 BitMap* _card_bm; 1339 1340 // Takes a region that's not empty (i.e., it has at least one 1341 // live object in it and sets its corresponding bit on the region 1342 // bitmap to 1. If the region is "starts humongous" it will also set 1343 // to 1 the bits on the region bitmap that correspond to its 1344 // associated "continues humongous" regions. 1345 void set_bit_for_region(HeapRegion* hr) { 1346 assert(!hr->continuesHumongous(), "should have filtered those out"); 1347 1348 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1349 if (!hr->startsHumongous()) { 1350 // Normal (non-humongous) case: just set the bit. 1351 _region_bm->par_at_put(index, true); 1352 } else { 1353 // Starts humongous case: calculate how many regions are part of 1354 // this humongous region and then set the bit range. 1355 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1356 _region_bm->par_at_put_range(index, end_index, true); 1357 } 1358 } 1359 1360 public: 1361 CMCountDataClosureBase(G1CollectedHeap* g1h, 1362 BitMap* region_bm, BitMap* card_bm): 1363 _g1h(g1h), _cm(g1h->concurrent_mark()), 1364 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 1365 _region_bm(region_bm), _card_bm(card_bm) { } 1366 }; 1367 1368 // Closure that calculates the # live objects per region. Used 1369 // for verification purposes during the cleanup pause. 1370 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1371 CMBitMapRO* _bm; 1372 size_t _region_marked_bytes; 1373 1374 public: 1375 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1376 BitMap* region_bm, BitMap* card_bm) : 1377 CMCountDataClosureBase(g1h, region_bm, card_bm), 1378 _bm(bm), _region_marked_bytes(0) { } 1379 1380 bool doHeapRegion(HeapRegion* hr) { 1381 1382 if (hr->continuesHumongous()) { 1383 // We will ignore these here and process them when their 1384 // associated "starts humongous" region is processed (see 1385 // set_bit_for_heap_region()). Note that we cannot rely on their 1386 // associated "starts humongous" region to have their bit set to 1387 // 1 since, due to the region chunking in the parallel region 1388 // iteration, a "continues humongous" region might be visited 1389 // before its associated "starts humongous". 1390 return false; 1391 } 1392 1393 HeapWord* ntams = hr->next_top_at_mark_start(); 1394 HeapWord* start = hr->bottom(); 1395 1396 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1397 err_msg("Preconditions not met - " 1398 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT, 1399 start, ntams, hr->end())); 1400 1401 // Find the first marked object at or after "start". 1402 start = _bm->getNextMarkedWordAddress(start, ntams); 1403 1404 size_t marked_bytes = 0; 1405 1406 while (start < ntams) { 1407 oop obj = oop(start); 1408 int obj_sz = obj->size(); 1409 HeapWord* obj_end = start + obj_sz; 1410 1411 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1412 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1413 1414 // Note: if we're looking at the last region in heap - obj_end 1415 // could be actually just beyond the end of the heap; end_idx 1416 // will then correspond to a (non-existent) card that is also 1417 // just beyond the heap. 1418 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1419 // end of object is not card aligned - increment to cover 1420 // all the cards spanned by the object 1421 end_idx += 1; 1422 } 1423 1424 // Set the bits in the card BM for the cards spanned by this object. 1425 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1426 1427 // Add the size of this object to the number of marked bytes. 1428 marked_bytes += (size_t)obj_sz * HeapWordSize; 1429 1430 // Find the next marked object after this one. 1431 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1432 } 1433 1434 // Mark the allocated-since-marking portion... 1435 HeapWord* top = hr->top(); 1436 if (ntams < top) { 1437 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1438 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1439 1440 // Note: if we're looking at the last region in heap - top 1441 // could be actually just beyond the end of the heap; end_idx 1442 // will then correspond to a (non-existent) card that is also 1443 // just beyond the heap. 1444 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1445 // end of object is not card aligned - increment to cover 1446 // all the cards spanned by the object 1447 end_idx += 1; 1448 } 1449 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1450 1451 // This definitely means the region has live objects. 1452 set_bit_for_region(hr); 1453 } 1454 1455 // Update the live region bitmap. 1456 if (marked_bytes > 0) { 1457 set_bit_for_region(hr); 1458 } 1459 1460 // Set the marked bytes for the current region so that 1461 // it can be queried by a calling verificiation routine 1462 _region_marked_bytes = marked_bytes; 1463 1464 return false; 1465 } 1466 1467 size_t region_marked_bytes() const { return _region_marked_bytes; } 1468 }; 1469 1470 // Heap region closure used for verifying the counting data 1471 // that was accumulated concurrently and aggregated during 1472 // the remark pause. This closure is applied to the heap 1473 // regions during the STW cleanup pause. 1474 1475 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1476 G1CollectedHeap* _g1h; 1477 ConcurrentMark* _cm; 1478 CalcLiveObjectsClosure _calc_cl; 1479 BitMap* _region_bm; // Region BM to be verified 1480 BitMap* _card_bm; // Card BM to be verified 1481 bool _verbose; // verbose output? 1482 1483 BitMap* _exp_region_bm; // Expected Region BM values 1484 BitMap* _exp_card_bm; // Expected card BM values 1485 1486 int _failures; 1487 1488 public: 1489 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1490 BitMap* region_bm, 1491 BitMap* card_bm, 1492 BitMap* exp_region_bm, 1493 BitMap* exp_card_bm, 1494 bool verbose) : 1495 _g1h(g1h), _cm(g1h->concurrent_mark()), 1496 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1497 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1498 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1499 _failures(0) { } 1500 1501 int failures() const { return _failures; } 1502 1503 bool doHeapRegion(HeapRegion* hr) { 1504 if (hr->continuesHumongous()) { 1505 // We will ignore these here and process them when their 1506 // associated "starts humongous" region is processed (see 1507 // set_bit_for_heap_region()). Note that we cannot rely on their 1508 // associated "starts humongous" region to have their bit set to 1509 // 1 since, due to the region chunking in the parallel region 1510 // iteration, a "continues humongous" region might be visited 1511 // before its associated "starts humongous". 1512 return false; 1513 } 1514 1515 int failures = 0; 1516 1517 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1518 // this region and set the corresponding bits in the expected region 1519 // and card bitmaps. 1520 bool res = _calc_cl.doHeapRegion(hr); 1521 assert(res == false, "should be continuing"); 1522 1523 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1524 Mutex::_no_safepoint_check_flag); 1525 1526 // Verify the marked bytes for this region. 1527 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1528 size_t act_marked_bytes = hr->next_marked_bytes(); 1529 1530 // We're not OK if expected marked bytes > actual marked bytes. It means 1531 // we have missed accounting some objects during the actual marking. 1532 if (exp_marked_bytes > act_marked_bytes) { 1533 if (_verbose) { 1534 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1535 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1536 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1537 } 1538 failures += 1; 1539 } 1540 1541 // Verify the bit, for this region, in the actual and expected 1542 // (which was just calculated) region bit maps. 1543 // We're not OK if the bit in the calculated expected region 1544 // bitmap is set and the bit in the actual region bitmap is not. 1545 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1546 1547 bool expected = _exp_region_bm->at(index); 1548 bool actual = _region_bm->at(index); 1549 if (expected && !actual) { 1550 if (_verbose) { 1551 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1552 "expected: %s, actual: %s", 1553 hr->hrs_index(), 1554 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1555 } 1556 failures += 1; 1557 } 1558 1559 // Verify that the card bit maps for the cards spanned by the current 1560 // region match. We have an error if we have a set bit in the expected 1561 // bit map and the corresponding bit in the actual bitmap is not set. 1562 1563 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1564 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1565 1566 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1567 expected = _exp_card_bm->at(i); 1568 actual = _card_bm->at(i); 1569 1570 if (expected && !actual) { 1571 if (_verbose) { 1572 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1573 "expected: %s, actual: %s", 1574 hr->hrs_index(), i, 1575 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1576 } 1577 failures += 1; 1578 } 1579 } 1580 1581 if (failures > 0 && _verbose) { 1582 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1583 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1584 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(), 1585 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1586 } 1587 1588 _failures += failures; 1589 1590 // We could stop iteration over the heap when we 1591 // find the first violating region by returning true. 1592 return false; 1593 } 1594 }; 1595 1596 1597 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1598 protected: 1599 G1CollectedHeap* _g1h; 1600 ConcurrentMark* _cm; 1601 BitMap* _actual_region_bm; 1602 BitMap* _actual_card_bm; 1603 1604 uint _n_workers; 1605 1606 BitMap* _expected_region_bm; 1607 BitMap* _expected_card_bm; 1608 1609 int _failures; 1610 bool _verbose; 1611 1612 public: 1613 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1614 BitMap* region_bm, BitMap* card_bm, 1615 BitMap* expected_region_bm, BitMap* expected_card_bm) 1616 : AbstractGangTask("G1 verify final counting"), 1617 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1618 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1619 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1620 _failures(0), _verbose(false), 1621 _n_workers(0) { 1622 assert(VerifyDuringGC, "don't call this otherwise"); 1623 1624 // Use the value already set as the number of active threads 1625 // in the call to run_task(). 1626 if (G1CollectedHeap::use_parallel_gc_threads()) { 1627 assert( _g1h->workers()->active_workers() > 0, 1628 "Should have been previously set"); 1629 _n_workers = _g1h->workers()->active_workers(); 1630 } else { 1631 _n_workers = 1; 1632 } 1633 1634 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1635 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1636 1637 _verbose = _cm->verbose_medium(); 1638 } 1639 1640 void work(uint worker_id) { 1641 assert(worker_id < _n_workers, "invariant"); 1642 1643 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1644 _actual_region_bm, _actual_card_bm, 1645 _expected_region_bm, 1646 _expected_card_bm, 1647 _verbose); 1648 1649 if (G1CollectedHeap::use_parallel_gc_threads()) { 1650 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1651 worker_id, 1652 _n_workers, 1653 HeapRegion::VerifyCountClaimValue); 1654 } else { 1655 _g1h->heap_region_iterate(&verify_cl); 1656 } 1657 1658 Atomic::add(verify_cl.failures(), &_failures); 1659 } 1660 1661 int failures() const { return _failures; } 1662 }; 1663 1664 // Closure that finalizes the liveness counting data. 1665 // Used during the cleanup pause. 1666 // Sets the bits corresponding to the interval [NTAMS, top] 1667 // (which contains the implicitly live objects) in the 1668 // card liveness bitmap. Also sets the bit for each region, 1669 // containing live data, in the region liveness bitmap. 1670 1671 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1672 public: 1673 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1674 BitMap* region_bm, 1675 BitMap* card_bm) : 1676 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1677 1678 bool doHeapRegion(HeapRegion* hr) { 1679 1680 if (hr->continuesHumongous()) { 1681 // We will ignore these here and process them when their 1682 // associated "starts humongous" region is processed (see 1683 // set_bit_for_heap_region()). Note that we cannot rely on their 1684 // associated "starts humongous" region to have their bit set to 1685 // 1 since, due to the region chunking in the parallel region 1686 // iteration, a "continues humongous" region might be visited 1687 // before its associated "starts humongous". 1688 return false; 1689 } 1690 1691 HeapWord* ntams = hr->next_top_at_mark_start(); 1692 HeapWord* top = hr->top(); 1693 1694 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1695 1696 // Mark the allocated-since-marking portion... 1697 if (ntams < top) { 1698 // This definitely means the region has live objects. 1699 set_bit_for_region(hr); 1700 1701 // Now set the bits in the card bitmap for [ntams, top) 1702 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1703 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1704 1705 // Note: if we're looking at the last region in heap - top 1706 // could be actually just beyond the end of the heap; end_idx 1707 // will then correspond to a (non-existent) card that is also 1708 // just beyond the heap. 1709 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1710 // end of object is not card aligned - increment to cover 1711 // all the cards spanned by the object 1712 end_idx += 1; 1713 } 1714 1715 assert(end_idx <= _card_bm->size(), 1716 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1717 end_idx, _card_bm->size())); 1718 assert(start_idx < _card_bm->size(), 1719 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1720 start_idx, _card_bm->size())); 1721 1722 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1723 } 1724 1725 // Set the bit for the region if it contains live data 1726 if (hr->next_marked_bytes() > 0) { 1727 set_bit_for_region(hr); 1728 } 1729 1730 return false; 1731 } 1732 }; 1733 1734 class G1ParFinalCountTask: public AbstractGangTask { 1735 protected: 1736 G1CollectedHeap* _g1h; 1737 ConcurrentMark* _cm; 1738 BitMap* _actual_region_bm; 1739 BitMap* _actual_card_bm; 1740 1741 uint _n_workers; 1742 1743 public: 1744 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1745 : AbstractGangTask("G1 final counting"), 1746 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1747 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1748 _n_workers(0) { 1749 // Use the value already set as the number of active threads 1750 // in the call to run_task(). 1751 if (G1CollectedHeap::use_parallel_gc_threads()) { 1752 assert( _g1h->workers()->active_workers() > 0, 1753 "Should have been previously set"); 1754 _n_workers = _g1h->workers()->active_workers(); 1755 } else { 1756 _n_workers = 1; 1757 } 1758 } 1759 1760 void work(uint worker_id) { 1761 assert(worker_id < _n_workers, "invariant"); 1762 1763 FinalCountDataUpdateClosure final_update_cl(_g1h, 1764 _actual_region_bm, 1765 _actual_card_bm); 1766 1767 if (G1CollectedHeap::use_parallel_gc_threads()) { 1768 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1769 worker_id, 1770 _n_workers, 1771 HeapRegion::FinalCountClaimValue); 1772 } else { 1773 _g1h->heap_region_iterate(&final_update_cl); 1774 } 1775 } 1776 }; 1777 1778 class G1ParNoteEndTask; 1779 1780 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1781 G1CollectedHeap* _g1; 1782 int _worker_num; 1783 size_t _max_live_bytes; 1784 uint _regions_claimed; 1785 size_t _freed_bytes; 1786 FreeRegionList* _local_cleanup_list; 1787 OldRegionSet* _old_proxy_set; 1788 HumongousRegionSet* _humongous_proxy_set; 1789 HRRSCleanupTask* _hrrs_cleanup_task; 1790 double _claimed_region_time; 1791 double _max_region_time; 1792 1793 public: 1794 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1795 int worker_num, 1796 FreeRegionList* local_cleanup_list, 1797 OldRegionSet* old_proxy_set, 1798 HumongousRegionSet* humongous_proxy_set, 1799 HRRSCleanupTask* hrrs_cleanup_task) : 1800 _g1(g1), _worker_num(worker_num), 1801 _max_live_bytes(0), _regions_claimed(0), 1802 _freed_bytes(0), 1803 _claimed_region_time(0.0), _max_region_time(0.0), 1804 _local_cleanup_list(local_cleanup_list), 1805 _old_proxy_set(old_proxy_set), 1806 _humongous_proxy_set(humongous_proxy_set), 1807 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1808 1809 size_t freed_bytes() { return _freed_bytes; } 1810 1811 bool doHeapRegion(HeapRegion *hr) { 1812 if (hr->continuesHumongous()) { 1813 return false; 1814 } 1815 // We use a claim value of zero here because all regions 1816 // were claimed with value 1 in the FinalCount task. 1817 _g1->reset_gc_time_stamps(hr); 1818 double start = os::elapsedTime(); 1819 _regions_claimed++; 1820 hr->note_end_of_marking(); 1821 _max_live_bytes += hr->max_live_bytes(); 1822 _g1->free_region_if_empty(hr, 1823 &_freed_bytes, 1824 _local_cleanup_list, 1825 _old_proxy_set, 1826 _humongous_proxy_set, 1827 _hrrs_cleanup_task, 1828 true /* par */); 1829 double region_time = (os::elapsedTime() - start); 1830 _claimed_region_time += region_time; 1831 if (region_time > _max_region_time) { 1832 _max_region_time = region_time; 1833 } 1834 return false; 1835 } 1836 1837 size_t max_live_bytes() { return _max_live_bytes; } 1838 uint regions_claimed() { return _regions_claimed; } 1839 double claimed_region_time_sec() { return _claimed_region_time; } 1840 double max_region_time_sec() { return _max_region_time; } 1841 }; 1842 1843 class G1ParNoteEndTask: public AbstractGangTask { 1844 friend class G1NoteEndOfConcMarkClosure; 1845 1846 protected: 1847 G1CollectedHeap* _g1h; 1848 size_t _max_live_bytes; 1849 size_t _freed_bytes; 1850 FreeRegionList* _cleanup_list; 1851 1852 public: 1853 G1ParNoteEndTask(G1CollectedHeap* g1h, 1854 FreeRegionList* cleanup_list) : 1855 AbstractGangTask("G1 note end"), _g1h(g1h), 1856 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1857 1858 void work(uint worker_id) { 1859 double start = os::elapsedTime(); 1860 FreeRegionList local_cleanup_list("Local Cleanup List"); 1861 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set"); 1862 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1863 HRRSCleanupTask hrrs_cleanup_task; 1864 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list, 1865 &old_proxy_set, 1866 &humongous_proxy_set, 1867 &hrrs_cleanup_task); 1868 if (G1CollectedHeap::use_parallel_gc_threads()) { 1869 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1870 _g1h->workers()->active_workers(), 1871 HeapRegion::NoteEndClaimValue); 1872 } else { 1873 _g1h->heap_region_iterate(&g1_note_end); 1874 } 1875 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1876 1877 // Now update the lists 1878 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1879 NULL /* free_list */, 1880 &old_proxy_set, 1881 &humongous_proxy_set, 1882 true /* par */); 1883 { 1884 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1885 _max_live_bytes += g1_note_end.max_live_bytes(); 1886 _freed_bytes += g1_note_end.freed_bytes(); 1887 1888 // If we iterate over the global cleanup list at the end of 1889 // cleanup to do this printing we will not guarantee to only 1890 // generate output for the newly-reclaimed regions (the list 1891 // might not be empty at the beginning of cleanup; we might 1892 // still be working on its previous contents). So we do the 1893 // printing here, before we append the new regions to the global 1894 // cleanup list. 1895 1896 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1897 if (hr_printer->is_active()) { 1898 HeapRegionLinkedListIterator iter(&local_cleanup_list); 1899 while (iter.more_available()) { 1900 HeapRegion* hr = iter.get_next(); 1901 hr_printer->cleanup(hr); 1902 } 1903 } 1904 1905 _cleanup_list->add_as_tail(&local_cleanup_list); 1906 assert(local_cleanup_list.is_empty(), "post-condition"); 1907 1908 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1909 } 1910 } 1911 size_t max_live_bytes() { return _max_live_bytes; } 1912 size_t freed_bytes() { return _freed_bytes; } 1913 }; 1914 1915 class G1ParScrubRemSetTask: public AbstractGangTask { 1916 protected: 1917 G1RemSet* _g1rs; 1918 BitMap* _region_bm; 1919 BitMap* _card_bm; 1920 public: 1921 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1922 BitMap* region_bm, BitMap* card_bm) : 1923 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1924 _region_bm(region_bm), _card_bm(card_bm) { } 1925 1926 void work(uint worker_id) { 1927 if (G1CollectedHeap::use_parallel_gc_threads()) { 1928 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1929 HeapRegion::ScrubRemSetClaimValue); 1930 } else { 1931 _g1rs->scrub(_region_bm, _card_bm); 1932 } 1933 } 1934 1935 }; 1936 1937 void ConcurrentMark::cleanup() { 1938 // world is stopped at this checkpoint 1939 assert(SafepointSynchronize::is_at_safepoint(), 1940 "world should be stopped"); 1941 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1942 1943 // If a full collection has happened, we shouldn't do this. 1944 if (has_aborted()) { 1945 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1946 return; 1947 } 1948 1949 HRSPhaseSetter x(HRSPhaseCleanup); 1950 g1h->verify_region_sets_optional(); 1951 1952 if (VerifyDuringGC) { 1953 HandleMark hm; // handle scope 1954 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1955 Universe::heap()->prepare_for_verify(); 1956 Universe::verify(/* silent */ false, 1957 /* option */ VerifyOption_G1UsePrevMarking); 1958 } 1959 g1h->check_bitmaps("Cleanup Start"); 1960 1961 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1962 g1p->record_concurrent_mark_cleanup_start(); 1963 1964 double start = os::elapsedTime(); 1965 1966 HeapRegionRemSet::reset_for_cleanup_tasks(); 1967 1968 uint n_workers; 1969 1970 // Do counting once more with the world stopped for good measure. 1971 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1972 1973 if (G1CollectedHeap::use_parallel_gc_threads()) { 1974 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 1975 "sanity check"); 1976 1977 g1h->set_par_threads(); 1978 n_workers = g1h->n_par_threads(); 1979 assert(g1h->n_par_threads() == n_workers, 1980 "Should not have been reset"); 1981 g1h->workers()->run_task(&g1_par_count_task); 1982 // Done with the parallel phase so reset to 0. 1983 g1h->set_par_threads(0); 1984 1985 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 1986 "sanity check"); 1987 } else { 1988 n_workers = 1; 1989 g1_par_count_task.work(0); 1990 } 1991 1992 if (VerifyDuringGC) { 1993 // Verify that the counting data accumulated during marking matches 1994 // that calculated by walking the marking bitmap. 1995 1996 // Bitmaps to hold expected values 1997 BitMap expected_region_bm(_region_bm.size(), false); 1998 BitMap expected_card_bm(_card_bm.size(), false); 1999 2000 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 2001 &_region_bm, 2002 &_card_bm, 2003 &expected_region_bm, 2004 &expected_card_bm); 2005 2006 if (G1CollectedHeap::use_parallel_gc_threads()) { 2007 g1h->set_par_threads((int)n_workers); 2008 g1h->workers()->run_task(&g1_par_verify_task); 2009 // Done with the parallel phase so reset to 0. 2010 g1h->set_par_threads(0); 2011 2012 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 2013 "sanity check"); 2014 } else { 2015 g1_par_verify_task.work(0); 2016 } 2017 2018 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 2019 } 2020 2021 size_t start_used_bytes = g1h->used(); 2022 g1h->set_marking_complete(); 2023 2024 double count_end = os::elapsedTime(); 2025 double this_final_counting_time = (count_end - start); 2026 _total_counting_time += this_final_counting_time; 2027 2028 if (G1PrintRegionLivenessInfo) { 2029 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 2030 _g1h->heap_region_iterate(&cl); 2031 } 2032 2033 // Install newly created mark bitMap as "prev". 2034 swapMarkBitMaps(); 2035 2036 g1h->reset_gc_time_stamp(); 2037 2038 // Note end of marking in all heap regions. 2039 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 2040 if (G1CollectedHeap::use_parallel_gc_threads()) { 2041 g1h->set_par_threads((int)n_workers); 2042 g1h->workers()->run_task(&g1_par_note_end_task); 2043 g1h->set_par_threads(0); 2044 2045 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 2046 "sanity check"); 2047 } else { 2048 g1_par_note_end_task.work(0); 2049 } 2050 g1h->check_gc_time_stamps(); 2051 2052 if (!cleanup_list_is_empty()) { 2053 // The cleanup list is not empty, so we'll have to process it 2054 // concurrently. Notify anyone else that might be wanting free 2055 // regions that there will be more free regions coming soon. 2056 g1h->set_free_regions_coming(); 2057 } 2058 2059 // call below, since it affects the metric by which we sort the heap 2060 // regions. 2061 if (G1ScrubRemSets) { 2062 double rs_scrub_start = os::elapsedTime(); 2063 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 2064 if (G1CollectedHeap::use_parallel_gc_threads()) { 2065 g1h->set_par_threads((int)n_workers); 2066 g1h->workers()->run_task(&g1_par_scrub_rs_task); 2067 g1h->set_par_threads(0); 2068 2069 assert(g1h->check_heap_region_claim_values( 2070 HeapRegion::ScrubRemSetClaimValue), 2071 "sanity check"); 2072 } else { 2073 g1_par_scrub_rs_task.work(0); 2074 } 2075 2076 double rs_scrub_end = os::elapsedTime(); 2077 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 2078 _total_rs_scrub_time += this_rs_scrub_time; 2079 } 2080 2081 // this will also free any regions totally full of garbage objects, 2082 // and sort the regions. 2083 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 2084 2085 // Statistics. 2086 double end = os::elapsedTime(); 2087 _cleanup_times.add((end - start) * 1000.0); 2088 2089 if (G1Log::fine()) { 2090 g1h->print_size_transition(gclog_or_tty, 2091 start_used_bytes, 2092 g1h->used(), 2093 g1h->capacity()); 2094 } 2095 2096 // Clean up will have freed any regions completely full of garbage. 2097 // Update the soft reference policy with the new heap occupancy. 2098 Universe::update_heap_info_at_gc(); 2099 2100 // We need to make this be a "collection" so any collection pause that 2101 // races with it goes around and waits for completeCleanup to finish. 2102 g1h->increment_total_collections(); 2103 2104 // We reclaimed old regions so we should calculate the sizes to make 2105 // sure we update the old gen/space data. 2106 g1h->g1mm()->update_sizes(); 2107 2108 if (VerifyDuringGC) { 2109 HandleMark hm; // handle scope 2110 gclog_or_tty->print(" VerifyDuringGC:(after)"); 2111 Universe::heap()->prepare_for_verify(); 2112 Universe::verify(/* silent */ false, 2113 /* option */ VerifyOption_G1UsePrevMarking); 2114 } 2115 g1h->check_bitmaps("Cleanup End"); 2116 2117 g1h->verify_region_sets_optional(); 2118 } 2119 2120 void ConcurrentMark::completeCleanup() { 2121 if (has_aborted()) return; 2122 2123 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2124 2125 _cleanup_list.verify_optional(); 2126 FreeRegionList tmp_free_list("Tmp Free List"); 2127 2128 if (G1ConcRegionFreeingVerbose) { 2129 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2130 "cleanup list has %u entries", 2131 _cleanup_list.length()); 2132 } 2133 2134 // Noone else should be accessing the _cleanup_list at this point, 2135 // so it's not necessary to take any locks 2136 while (!_cleanup_list.is_empty()) { 2137 HeapRegion* hr = _cleanup_list.remove_head(); 2138 assert(hr != NULL, "the list was not empty"); 2139 hr->par_clear(); 2140 tmp_free_list.add_as_tail(hr); 2141 2142 // Instead of adding one region at a time to the secondary_free_list, 2143 // we accumulate them in the local list and move them a few at a 2144 // time. This also cuts down on the number of notify_all() calls 2145 // we do during this process. We'll also append the local list when 2146 // _cleanup_list is empty (which means we just removed the last 2147 // region from the _cleanup_list). 2148 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2149 _cleanup_list.is_empty()) { 2150 if (G1ConcRegionFreeingVerbose) { 2151 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2152 "appending %u entries to the secondary_free_list, " 2153 "cleanup list still has %u entries", 2154 tmp_free_list.length(), 2155 _cleanup_list.length()); 2156 } 2157 2158 { 2159 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2160 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 2161 SecondaryFreeList_lock->notify_all(); 2162 } 2163 2164 if (G1StressConcRegionFreeing) { 2165 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2166 os::sleep(Thread::current(), (jlong) 1, false); 2167 } 2168 } 2169 } 2170 } 2171 assert(tmp_free_list.is_empty(), "post-condition"); 2172 } 2173 2174 // Support closures for reference procssing in G1 2175 2176 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2177 HeapWord* addr = (HeapWord*)obj; 2178 return addr != NULL && 2179 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2180 } 2181 2182 class G1CMKeepAliveClosure: public ExtendedOopClosure { 2183 G1CollectedHeap* _g1; 2184 ConcurrentMark* _cm; 2185 public: 2186 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) : 2187 _g1(g1), _cm(cm) { 2188 assert(Thread::current()->is_VM_thread(), "otherwise fix worker id"); 2189 } 2190 2191 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2192 virtual void do_oop( oop* p) { do_oop_work(p); } 2193 2194 template <class T> void do_oop_work(T* p) { 2195 oop obj = oopDesc::load_decode_heap_oop(p); 2196 HeapWord* addr = (HeapWord*)obj; 2197 2198 if (_cm->verbose_high()) { 2199 gclog_or_tty->print_cr("\t[0] we're looking at location " 2200 "*"PTR_FORMAT" = "PTR_FORMAT, 2201 p, (void*) obj); 2202 } 2203 2204 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) { 2205 _cm->mark_and_count(obj); 2206 _cm->mark_stack_push(obj); 2207 } 2208 } 2209 }; 2210 2211 class G1CMDrainMarkingStackClosure: public VoidClosure { 2212 ConcurrentMark* _cm; 2213 CMMarkStack* _markStack; 2214 G1CMKeepAliveClosure* _oopClosure; 2215 public: 2216 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack, 2217 G1CMKeepAliveClosure* oopClosure) : 2218 _cm(cm), 2219 _markStack(markStack), 2220 _oopClosure(oopClosure) { } 2221 2222 void do_void() { 2223 _markStack->drain(_oopClosure, _cm->nextMarkBitMap(), false); 2224 } 2225 }; 2226 2227 // 'Keep Alive' closure used by parallel reference processing. 2228 // An instance of this closure is used in the parallel reference processing 2229 // code rather than an instance of G1CMKeepAliveClosure. We could have used 2230 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are 2231 // placed on to discovered ref lists once so we can mark and push with no 2232 // need to check whether the object has already been marked. Using the 2233 // G1CMKeepAliveClosure would mean, however, having all the worker threads 2234 // operating on the global mark stack. This means that an individual 2235 // worker would be doing lock-free pushes while it processes its own 2236 // discovered ref list followed by drain call. If the discovered ref lists 2237 // are unbalanced then this could cause interference with the other 2238 // workers. Using a CMTask (and its embedded local data structures) 2239 // avoids that potential interference. 2240 class G1CMParKeepAliveAndDrainClosure: public OopClosure { 2241 ConcurrentMark* _cm; 2242 CMTask* _task; 2243 int _ref_counter_limit; 2244 int _ref_counter; 2245 public: 2246 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) : 2247 _cm(cm), _task(task), 2248 _ref_counter_limit(G1RefProcDrainInterval) { 2249 assert(_ref_counter_limit > 0, "sanity"); 2250 _ref_counter = _ref_counter_limit; 2251 } 2252 2253 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2254 virtual void do_oop( oop* p) { do_oop_work(p); } 2255 2256 template <class T> void do_oop_work(T* p) { 2257 if (!_cm->has_overflown()) { 2258 oop obj = oopDesc::load_decode_heap_oop(p); 2259 if (_cm->verbose_high()) { 2260 gclog_or_tty->print_cr("\t[%u] we're looking at location " 2261 "*"PTR_FORMAT" = "PTR_FORMAT, 2262 _task->worker_id(), p, (void*) obj); 2263 } 2264 2265 _task->deal_with_reference(obj); 2266 _ref_counter--; 2267 2268 if (_ref_counter == 0) { 2269 // We have dealt with _ref_counter_limit references, pushing them and objects 2270 // reachable from them on to the local stack (and possibly the global stack). 2271 // Call do_marking_step() to process these entries. We call the routine in a 2272 // loop, which we'll exit if there's nothing more to do (i.e. we're done 2273 // with the entries that we've pushed as a result of the deal_with_reference 2274 // calls above) or we overflow. 2275 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2276 // while there may still be some work to do. (See the comment at the 2277 // beginning of CMTask::do_marking_step() for those conditions - one of which 2278 // is reaching the specified time target.) It is only when 2279 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2280 // that the marking has completed. 2281 do { 2282 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2283 _task->do_marking_step(mark_step_duration_ms, 2284 false /* do_stealing */, 2285 false /* do_termination */); 2286 } while (_task->has_aborted() && !_cm->has_overflown()); 2287 _ref_counter = _ref_counter_limit; 2288 } 2289 } else { 2290 if (_cm->verbose_high()) { 2291 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id()); 2292 } 2293 } 2294 } 2295 }; 2296 2297 class G1CMParDrainMarkingStackClosure: public VoidClosure { 2298 ConcurrentMark* _cm; 2299 CMTask* _task; 2300 public: 2301 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) : 2302 _cm(cm), _task(task) { } 2303 2304 void do_void() { 2305 do { 2306 if (_cm->verbose_high()) { 2307 gclog_or_tty->print_cr("\t[%u] Drain: Calling do marking_step", 2308 _task->worker_id()); 2309 } 2310 2311 // We call CMTask::do_marking_step() to completely drain the local and 2312 // global marking stacks. The routine is called in a loop, which we'll 2313 // exit if there's nothing more to do (i.e. we'completely drained the 2314 // entries that were pushed as a result of applying the 2315 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref 2316 // lists above) or we overflow the global marking stack. 2317 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2318 // while there may still be some work to do. (See the comment at the 2319 // beginning of CMTask::do_marking_step() for those conditions - one of which 2320 // is reaching the specified time target.) It is only when 2321 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2322 // that the marking has completed. 2323 2324 _task->do_marking_step(1000000000.0 /* something very large */, 2325 true /* do_stealing */, 2326 true /* do_termination */); 2327 } while (_task->has_aborted() && !_cm->has_overflown()); 2328 } 2329 }; 2330 2331 // Implementation of AbstractRefProcTaskExecutor for parallel 2332 // reference processing at the end of G1 concurrent marking 2333 2334 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2335 private: 2336 G1CollectedHeap* _g1h; 2337 ConcurrentMark* _cm; 2338 WorkGang* _workers; 2339 int _active_workers; 2340 2341 public: 2342 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2343 ConcurrentMark* cm, 2344 WorkGang* workers, 2345 int n_workers) : 2346 _g1h(g1h), _cm(cm), 2347 _workers(workers), _active_workers(n_workers) { } 2348 2349 // Executes the given task using concurrent marking worker threads. 2350 virtual void execute(ProcessTask& task); 2351 virtual void execute(EnqueueTask& task); 2352 }; 2353 2354 class G1CMRefProcTaskProxy: public AbstractGangTask { 2355 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2356 ProcessTask& _proc_task; 2357 G1CollectedHeap* _g1h; 2358 ConcurrentMark* _cm; 2359 2360 public: 2361 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2362 G1CollectedHeap* g1h, 2363 ConcurrentMark* cm) : 2364 AbstractGangTask("Process reference objects in parallel"), 2365 _proc_task(proc_task), _g1h(g1h), _cm(cm) { } 2366 2367 virtual void work(uint worker_id) { 2368 CMTask* marking_task = _cm->task(worker_id); 2369 G1CMIsAliveClosure g1_is_alive(_g1h); 2370 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task); 2371 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task); 2372 2373 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2374 } 2375 }; 2376 2377 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2378 assert(_workers != NULL, "Need parallel worker threads."); 2379 2380 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2381 2382 // We need to reset the phase for each task execution so that 2383 // the termination protocol of CMTask::do_marking_step works. 2384 _cm->set_phase(_active_workers, false /* concurrent */); 2385 _g1h->set_par_threads(_active_workers); 2386 _workers->run_task(&proc_task_proxy); 2387 _g1h->set_par_threads(0); 2388 } 2389 2390 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2391 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2392 EnqueueTask& _enq_task; 2393 2394 public: 2395 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2396 AbstractGangTask("Enqueue reference objects in parallel"), 2397 _enq_task(enq_task) { } 2398 2399 virtual void work(uint worker_id) { 2400 _enq_task.work(worker_id); 2401 } 2402 }; 2403 2404 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2405 assert(_workers != NULL, "Need parallel worker threads."); 2406 2407 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2408 2409 _g1h->set_par_threads(_active_workers); 2410 _workers->run_task(&enq_task_proxy); 2411 _g1h->set_par_threads(0); 2412 } 2413 2414 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2415 ResourceMark rm; 2416 HandleMark hm; 2417 2418 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2419 2420 // Is alive closure. 2421 G1CMIsAliveClosure g1_is_alive(g1h); 2422 2423 // Inner scope to exclude the cleaning of the string and symbol 2424 // tables from the displayed time. 2425 { 2426 if (G1Log::finer()) { 2427 gclog_or_tty->put(' '); 2428 } 2429 TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty); 2430 2431 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2432 2433 // See the comment in G1CollectedHeap::ref_processing_init() 2434 // about how reference processing currently works in G1. 2435 2436 // Process weak references. 2437 rp->setup_policy(clear_all_soft_refs); 2438 assert(_markStack.isEmpty(), "mark stack should be empty"); 2439 2440 G1CMKeepAliveClosure g1_keep_alive(g1h, this); 2441 G1CMDrainMarkingStackClosure 2442 g1_drain_mark_stack(this, &_markStack, &g1_keep_alive); 2443 2444 // We use the work gang from the G1CollectedHeap and we utilize all 2445 // the worker threads. 2446 uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U; 2447 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2448 2449 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2450 g1h->workers(), active_workers); 2451 2452 if (rp->processing_is_mt()) { 2453 // Set the degree of MT here. If the discovery is done MT, there 2454 // may have been a different number of threads doing the discovery 2455 // and a different number of discovered lists may have Ref objects. 2456 // That is OK as long as the Reference lists are balanced (see 2457 // balance_all_queues() and balance_queues()). 2458 rp->set_active_mt_degree(active_workers); 2459 2460 rp->process_discovered_references(&g1_is_alive, 2461 &g1_keep_alive, 2462 &g1_drain_mark_stack, 2463 &par_task_executor); 2464 2465 // The work routines of the parallel keep_alive and drain_marking_stack 2466 // will set the has_overflown flag if we overflow the global marking 2467 // stack. 2468 } else { 2469 rp->process_discovered_references(&g1_is_alive, 2470 &g1_keep_alive, 2471 &g1_drain_mark_stack, 2472 NULL); 2473 } 2474 2475 assert(_markStack.overflow() || _markStack.isEmpty(), 2476 "mark stack should be empty (unless it overflowed)"); 2477 if (_markStack.overflow()) { 2478 // Should have been done already when we tried to push an 2479 // entry on to the global mark stack. But let's do it again. 2480 set_has_overflown(); 2481 } 2482 2483 if (rp->processing_is_mt()) { 2484 assert(rp->num_q() == active_workers, "why not"); 2485 rp->enqueue_discovered_references(&par_task_executor); 2486 } else { 2487 rp->enqueue_discovered_references(); 2488 } 2489 2490 rp->verify_no_references_recorded(); 2491 assert(!rp->discovery_enabled(), "Post condition"); 2492 } 2493 2494 // Now clean up stale oops in StringTable 2495 StringTable::unlink(&g1_is_alive); 2496 // Clean up unreferenced symbols in symbol table. 2497 SymbolTable::unlink(); 2498 } 2499 2500 void ConcurrentMark::swapMarkBitMaps() { 2501 CMBitMapRO* temp = _prevMarkBitMap; 2502 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2503 _nextMarkBitMap = (CMBitMap*) temp; 2504 } 2505 2506 class CMRemarkTask: public AbstractGangTask { 2507 private: 2508 ConcurrentMark *_cm; 2509 2510 public: 2511 void work(uint worker_id) { 2512 // Since all available tasks are actually started, we should 2513 // only proceed if we're supposed to be actived. 2514 if (worker_id < _cm->active_tasks()) { 2515 CMTask* task = _cm->task(worker_id); 2516 task->record_start_time(); 2517 do { 2518 task->do_marking_step(1000000000.0 /* something very large */, 2519 true /* do_stealing */, 2520 true /* do_termination */); 2521 } while (task->has_aborted() && !_cm->has_overflown()); 2522 // If we overflow, then we do not want to restart. We instead 2523 // want to abort remark and do concurrent marking again. 2524 task->record_end_time(); 2525 } 2526 } 2527 2528 CMRemarkTask(ConcurrentMark* cm, int active_workers) : 2529 AbstractGangTask("Par Remark"), _cm(cm) { 2530 _cm->terminator()->reset_for_reuse(active_workers); 2531 } 2532 }; 2533 2534 void ConcurrentMark::checkpointRootsFinalWork() { 2535 ResourceMark rm; 2536 HandleMark hm; 2537 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2538 2539 g1h->ensure_parsability(false); 2540 2541 if (G1CollectedHeap::use_parallel_gc_threads()) { 2542 G1CollectedHeap::StrongRootsScope srs(g1h); 2543 // this is remark, so we'll use up all active threads 2544 uint active_workers = g1h->workers()->active_workers(); 2545 if (active_workers == 0) { 2546 assert(active_workers > 0, "Should have been set earlier"); 2547 active_workers = (uint) ParallelGCThreads; 2548 g1h->workers()->set_active_workers(active_workers); 2549 } 2550 set_phase(active_workers, false /* concurrent */); 2551 // Leave _parallel_marking_threads at it's 2552 // value originally calculated in the ConcurrentMark 2553 // constructor and pass values of the active workers 2554 // through the gang in the task. 2555 2556 CMRemarkTask remarkTask(this, active_workers); 2557 g1h->set_par_threads(active_workers); 2558 g1h->workers()->run_task(&remarkTask); 2559 g1h->set_par_threads(0); 2560 } else { 2561 G1CollectedHeap::StrongRootsScope srs(g1h); 2562 // this is remark, so we'll use up all available threads 2563 uint active_workers = 1; 2564 set_phase(active_workers, false /* concurrent */); 2565 2566 CMRemarkTask remarkTask(this, active_workers); 2567 // We will start all available threads, even if we decide that the 2568 // active_workers will be fewer. The extra ones will just bail out 2569 // immediately. 2570 remarkTask.work(0); 2571 } 2572 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2573 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2574 2575 print_stats(); 2576 2577 #if VERIFY_OBJS_PROCESSED 2578 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2579 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2580 _scan_obj_cl.objs_processed, 2581 ThreadLocalObjQueue::objs_enqueued); 2582 guarantee(_scan_obj_cl.objs_processed == 2583 ThreadLocalObjQueue::objs_enqueued, 2584 "Different number of objs processed and enqueued."); 2585 } 2586 #endif 2587 } 2588 2589 #ifndef PRODUCT 2590 2591 class PrintReachableOopClosure: public OopClosure { 2592 private: 2593 G1CollectedHeap* _g1h; 2594 outputStream* _out; 2595 VerifyOption _vo; 2596 bool _all; 2597 2598 public: 2599 PrintReachableOopClosure(outputStream* out, 2600 VerifyOption vo, 2601 bool all) : 2602 _g1h(G1CollectedHeap::heap()), 2603 _out(out), _vo(vo), _all(all) { } 2604 2605 void do_oop(narrowOop* p) { do_oop_work(p); } 2606 void do_oop( oop* p) { do_oop_work(p); } 2607 2608 template <class T> void do_oop_work(T* p) { 2609 oop obj = oopDesc::load_decode_heap_oop(p); 2610 const char* str = NULL; 2611 const char* str2 = ""; 2612 2613 if (obj == NULL) { 2614 str = ""; 2615 } else if (!_g1h->is_in_g1_reserved(obj)) { 2616 str = " O"; 2617 } else { 2618 HeapRegion* hr = _g1h->heap_region_containing(obj); 2619 guarantee(hr != NULL, "invariant"); 2620 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2621 bool marked = _g1h->is_marked(obj, _vo); 2622 2623 if (over_tams) { 2624 str = " >"; 2625 if (marked) { 2626 str2 = " AND MARKED"; 2627 } 2628 } else if (marked) { 2629 str = " M"; 2630 } else { 2631 str = " NOT"; 2632 } 2633 } 2634 2635 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2636 p, (void*) obj, str, str2); 2637 } 2638 }; 2639 2640 class PrintReachableObjectClosure : public ObjectClosure { 2641 private: 2642 G1CollectedHeap* _g1h; 2643 outputStream* _out; 2644 VerifyOption _vo; 2645 bool _all; 2646 HeapRegion* _hr; 2647 2648 public: 2649 PrintReachableObjectClosure(outputStream* out, 2650 VerifyOption vo, 2651 bool all, 2652 HeapRegion* hr) : 2653 _g1h(G1CollectedHeap::heap()), 2654 _out(out), _vo(vo), _all(all), _hr(hr) { } 2655 2656 void do_object(oop o) { 2657 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2658 bool marked = _g1h->is_marked(o, _vo); 2659 bool print_it = _all || over_tams || marked; 2660 2661 if (print_it) { 2662 _out->print_cr(" "PTR_FORMAT"%s", 2663 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2664 PrintReachableOopClosure oopCl(_out, _vo, _all); 2665 o->oop_iterate_no_header(&oopCl); 2666 } 2667 } 2668 }; 2669 2670 class PrintReachableRegionClosure : public HeapRegionClosure { 2671 private: 2672 G1CollectedHeap* _g1h; 2673 outputStream* _out; 2674 VerifyOption _vo; 2675 bool _all; 2676 2677 public: 2678 bool doHeapRegion(HeapRegion* hr) { 2679 HeapWord* b = hr->bottom(); 2680 HeapWord* e = hr->end(); 2681 HeapWord* t = hr->top(); 2682 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2683 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2684 "TAMS: "PTR_FORMAT, b, e, t, p); 2685 _out->cr(); 2686 2687 HeapWord* from = b; 2688 HeapWord* to = t; 2689 2690 if (to > from) { 2691 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2692 _out->cr(); 2693 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2694 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2695 _out->cr(); 2696 } 2697 2698 return false; 2699 } 2700 2701 PrintReachableRegionClosure(outputStream* out, 2702 VerifyOption vo, 2703 bool all) : 2704 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2705 }; 2706 2707 void ConcurrentMark::print_reachable(const char* str, 2708 VerifyOption vo, 2709 bool all) { 2710 gclog_or_tty->cr(); 2711 gclog_or_tty->print_cr("== Doing heap dump... "); 2712 2713 if (G1PrintReachableBaseFile == NULL) { 2714 gclog_or_tty->print_cr(" #### error: no base file defined"); 2715 return; 2716 } 2717 2718 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2719 (JVM_MAXPATHLEN - 1)) { 2720 gclog_or_tty->print_cr(" #### error: file name too long"); 2721 return; 2722 } 2723 2724 char file_name[JVM_MAXPATHLEN]; 2725 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2726 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2727 2728 fileStream fout(file_name); 2729 if (!fout.is_open()) { 2730 gclog_or_tty->print_cr(" #### error: could not open file"); 2731 return; 2732 } 2733 2734 outputStream* out = &fout; 2735 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2736 out->cr(); 2737 2738 out->print_cr("--- ITERATING OVER REGIONS"); 2739 out->cr(); 2740 PrintReachableRegionClosure rcl(out, vo, all); 2741 _g1h->heap_region_iterate(&rcl); 2742 out->cr(); 2743 2744 gclog_or_tty->print_cr(" done"); 2745 gclog_or_tty->flush(); 2746 } 2747 2748 #endif // PRODUCT 2749 2750 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2751 // Note we are overriding the read-only view of the prev map here, via 2752 // the cast. 2753 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2754 } 2755 2756 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2757 _nextMarkBitMap->clearRange(mr); 2758 } 2759 2760 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2761 clearRangePrevBitmap(mr); 2762 clearRangeNextBitmap(mr); 2763 } 2764 2765 HeapRegion* 2766 ConcurrentMark::claim_region(uint worker_id) { 2767 // "checkpoint" the finger 2768 HeapWord* finger = _finger; 2769 2770 // _heap_end will not change underneath our feet; it only changes at 2771 // yield points. 2772 while (finger < _heap_end) { 2773 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2774 2775 // Note on how this code handles humongous regions. In the 2776 // normal case the finger will reach the start of a "starts 2777 // humongous" (SH) region. Its end will either be the end of the 2778 // last "continues humongous" (CH) region in the sequence, or the 2779 // standard end of the SH region (if the SH is the only region in 2780 // the sequence). That way claim_region() will skip over the CH 2781 // regions. However, there is a subtle race between a CM thread 2782 // executing this method and a mutator thread doing a humongous 2783 // object allocation. The two are not mutually exclusive as the CM 2784 // thread does not need to hold the Heap_lock when it gets 2785 // here. So there is a chance that claim_region() will come across 2786 // a free region that's in the progress of becoming a SH or a CH 2787 // region. In the former case, it will either 2788 // a) Miss the update to the region's end, in which case it will 2789 // visit every subsequent CH region, will find their bitmaps 2790 // empty, and do nothing, or 2791 // b) Will observe the update of the region's end (in which case 2792 // it will skip the subsequent CH regions). 2793 // If it comes across a region that suddenly becomes CH, the 2794 // scenario will be similar to b). So, the race between 2795 // claim_region() and a humongous object allocation might force us 2796 // to do a bit of unnecessary work (due to some unnecessary bitmap 2797 // iterations) but it should not introduce and correctness issues. 2798 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2799 HeapWord* bottom = curr_region->bottom(); 2800 HeapWord* end = curr_region->end(); 2801 HeapWord* limit = curr_region->next_top_at_mark_start(); 2802 2803 if (verbose_low()) { 2804 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" " 2805 "["PTR_FORMAT", "PTR_FORMAT"), " 2806 "limit = "PTR_FORMAT, 2807 worker_id, curr_region, bottom, end, limit); 2808 } 2809 2810 // Is the gap between reading the finger and doing the CAS too long? 2811 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2812 if (res == finger) { 2813 // we succeeded 2814 2815 // notice that _finger == end cannot be guaranteed here since, 2816 // someone else might have moved the finger even further 2817 assert(_finger >= end, "the finger should have moved forward"); 2818 2819 if (verbose_low()) { 2820 gclog_or_tty->print_cr("[%u] we were successful with region = " 2821 PTR_FORMAT, worker_id, curr_region); 2822 } 2823 2824 if (limit > bottom) { 2825 if (verbose_low()) { 2826 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, " 2827 "returning it ", worker_id, curr_region); 2828 } 2829 return curr_region; 2830 } else { 2831 assert(limit == bottom, 2832 "the region limit should be at bottom"); 2833 if (verbose_low()) { 2834 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, " 2835 "returning NULL", worker_id, curr_region); 2836 } 2837 // we return NULL and the caller should try calling 2838 // claim_region() again. 2839 return NULL; 2840 } 2841 } else { 2842 assert(_finger > finger, "the finger should have moved forward"); 2843 if (verbose_low()) { 2844 gclog_or_tty->print_cr("[%u] somebody else moved the finger, " 2845 "global finger = "PTR_FORMAT", " 2846 "our finger = "PTR_FORMAT, 2847 worker_id, _finger, finger); 2848 } 2849 2850 // read it again 2851 finger = _finger; 2852 } 2853 } 2854 2855 return NULL; 2856 } 2857 2858 #ifndef PRODUCT 2859 enum VerifyNoCSetOopsPhase { 2860 VerifyNoCSetOopsStack, 2861 VerifyNoCSetOopsQueues, 2862 VerifyNoCSetOopsSATBCompleted, 2863 VerifyNoCSetOopsSATBThread 2864 }; 2865 2866 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 2867 private: 2868 G1CollectedHeap* _g1h; 2869 VerifyNoCSetOopsPhase _phase; 2870 int _info; 2871 2872 const char* phase_str() { 2873 switch (_phase) { 2874 case VerifyNoCSetOopsStack: return "Stack"; 2875 case VerifyNoCSetOopsQueues: return "Queue"; 2876 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 2877 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 2878 default: ShouldNotReachHere(); 2879 } 2880 return NULL; 2881 } 2882 2883 void do_object_work(oop obj) { 2884 guarantee(!_g1h->obj_in_cs(obj), 2885 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 2886 (void*) obj, phase_str(), _info)); 2887 } 2888 2889 public: 2890 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 2891 2892 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 2893 _phase = phase; 2894 _info = info; 2895 } 2896 2897 virtual void do_oop(oop* p) { 2898 oop obj = oopDesc::load_decode_heap_oop(p); 2899 do_object_work(obj); 2900 } 2901 2902 virtual void do_oop(narrowOop* p) { 2903 // We should not come across narrow oops while scanning marking 2904 // stacks and SATB buffers. 2905 ShouldNotReachHere(); 2906 } 2907 2908 virtual void do_object(oop obj) { 2909 do_object_work(obj); 2910 } 2911 }; 2912 2913 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 2914 bool verify_enqueued_buffers, 2915 bool verify_thread_buffers, 2916 bool verify_fingers) { 2917 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2918 if (!G1CollectedHeap::heap()->mark_in_progress()) { 2919 return; 2920 } 2921 2922 VerifyNoCSetOopsClosure cl; 2923 2924 if (verify_stacks) { 2925 // Verify entries on the global mark stack 2926 cl.set_phase(VerifyNoCSetOopsStack); 2927 _markStack.oops_do(&cl); 2928 2929 // Verify entries on the task queues 2930 for (uint i = 0; i < _max_worker_id; i += 1) { 2931 cl.set_phase(VerifyNoCSetOopsQueues, i); 2932 CMTaskQueue* queue = _task_queues->queue(i); 2933 queue->oops_do(&cl); 2934 } 2935 } 2936 2937 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 2938 2939 // Verify entries on the enqueued SATB buffers 2940 if (verify_enqueued_buffers) { 2941 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 2942 satb_qs.iterate_completed_buffers_read_only(&cl); 2943 } 2944 2945 // Verify entries on the per-thread SATB buffers 2946 if (verify_thread_buffers) { 2947 cl.set_phase(VerifyNoCSetOopsSATBThread); 2948 satb_qs.iterate_thread_buffers_read_only(&cl); 2949 } 2950 2951 if (verify_fingers) { 2952 // Verify the global finger 2953 HeapWord* global_finger = finger(); 2954 if (global_finger != NULL && global_finger < _heap_end) { 2955 // The global finger always points to a heap region boundary. We 2956 // use heap_region_containing_raw() to get the containing region 2957 // given that the global finger could be pointing to a free region 2958 // which subsequently becomes continues humongous. If that 2959 // happens, heap_region_containing() will return the bottom of the 2960 // corresponding starts humongous region and the check below will 2961 // not hold any more. 2962 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 2963 guarantee(global_finger == global_hr->bottom(), 2964 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 2965 global_finger, HR_FORMAT_PARAMS(global_hr))); 2966 } 2967 2968 // Verify the task fingers 2969 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2970 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 2971 CMTask* task = _tasks[i]; 2972 HeapWord* task_finger = task->finger(); 2973 if (task_finger != NULL && task_finger < _heap_end) { 2974 // See above note on the global finger verification. 2975 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 2976 guarantee(task_finger == task_hr->bottom() || 2977 !task_hr->in_collection_set(), 2978 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 2979 task_finger, HR_FORMAT_PARAMS(task_hr))); 2980 } 2981 } 2982 } 2983 } 2984 #endif // PRODUCT 2985 2986 // Aggregate the counting data that was constructed concurrently 2987 // with marking. 2988 class AggregateCountDataHRClosure: public HeapRegionClosure { 2989 G1CollectedHeap* _g1h; 2990 ConcurrentMark* _cm; 2991 CardTableModRefBS* _ct_bs; 2992 BitMap* _cm_card_bm; 2993 uint _max_worker_id; 2994 2995 public: 2996 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2997 BitMap* cm_card_bm, 2998 uint max_worker_id) : 2999 _g1h(g1h), _cm(g1h->concurrent_mark()), 3000 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 3001 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 3002 3003 bool doHeapRegion(HeapRegion* hr) { 3004 if (hr->continuesHumongous()) { 3005 // We will ignore these here and process them when their 3006 // associated "starts humongous" region is processed. 3007 // Note that we cannot rely on their associated 3008 // "starts humongous" region to have their bit set to 1 3009 // since, due to the region chunking in the parallel region 3010 // iteration, a "continues humongous" region might be visited 3011 // before its associated "starts humongous". 3012 return false; 3013 } 3014 3015 HeapWord* start = hr->bottom(); 3016 HeapWord* limit = hr->next_top_at_mark_start(); 3017 HeapWord* end = hr->end(); 3018 3019 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 3020 err_msg("Preconditions not met - " 3021 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 3022 "top: "PTR_FORMAT", end: "PTR_FORMAT, 3023 start, limit, hr->top(), hr->end())); 3024 3025 assert(hr->next_marked_bytes() == 0, "Precondition"); 3026 3027 if (start == limit) { 3028 // NTAMS of this region has not been set so nothing to do. 3029 return false; 3030 } 3031 3032 // 'start' should be in the heap. 3033 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 3034 // 'end' *may* be just beyone the end of the heap (if hr is the last region) 3035 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 3036 3037 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 3038 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 3039 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 3040 3041 // If ntams is not card aligned then we bump card bitmap index 3042 // for limit so that we get the all the cards spanned by 3043 // the object ending at ntams. 3044 // Note: if this is the last region in the heap then ntams 3045 // could be actually just beyond the end of the the heap; 3046 // limit_idx will then correspond to a (non-existent) card 3047 // that is also outside the heap. 3048 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 3049 limit_idx += 1; 3050 } 3051 3052 assert(limit_idx <= end_idx, "or else use atomics"); 3053 3054 // Aggregate the "stripe" in the count data associated with hr. 3055 uint hrs_index = hr->hrs_index(); 3056 size_t marked_bytes = 0; 3057 3058 for (uint i = 0; i < _max_worker_id; i += 1) { 3059 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 3060 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 3061 3062 // Fetch the marked_bytes in this region for task i and 3063 // add it to the running total for this region. 3064 marked_bytes += marked_bytes_array[hrs_index]; 3065 3066 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 3067 // into the global card bitmap. 3068 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 3069 3070 while (scan_idx < limit_idx) { 3071 assert(task_card_bm->at(scan_idx) == true, "should be"); 3072 _cm_card_bm->set_bit(scan_idx); 3073 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 3074 3075 // BitMap::get_next_one_offset() can handle the case when 3076 // its left_offset parameter is greater than its right_offset 3077 // parameter. It does, however, have an early exit if 3078 // left_offset == right_offset. So let's limit the value 3079 // passed in for left offset here. 3080 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 3081 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 3082 } 3083 } 3084 3085 // Update the marked bytes for this region. 3086 hr->add_to_marked_bytes(marked_bytes); 3087 3088 // Next heap region 3089 return false; 3090 } 3091 }; 3092 3093 class G1AggregateCountDataTask: public AbstractGangTask { 3094 protected: 3095 G1CollectedHeap* _g1h; 3096 ConcurrentMark* _cm; 3097 BitMap* _cm_card_bm; 3098 uint _max_worker_id; 3099 int _active_workers; 3100 3101 public: 3102 G1AggregateCountDataTask(G1CollectedHeap* g1h, 3103 ConcurrentMark* cm, 3104 BitMap* cm_card_bm, 3105 uint max_worker_id, 3106 int n_workers) : 3107 AbstractGangTask("Count Aggregation"), 3108 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 3109 _max_worker_id(max_worker_id), 3110 _active_workers(n_workers) { } 3111 3112 void work(uint worker_id) { 3113 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 3114 3115 if (G1CollectedHeap::use_parallel_gc_threads()) { 3116 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 3117 _active_workers, 3118 HeapRegion::AggregateCountClaimValue); 3119 } else { 3120 _g1h->heap_region_iterate(&cl); 3121 } 3122 } 3123 }; 3124 3125 3126 void ConcurrentMark::aggregate_count_data() { 3127 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3128 _g1h->workers()->active_workers() : 3129 1); 3130 3131 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3132 _max_worker_id, n_workers); 3133 3134 if (G1CollectedHeap::use_parallel_gc_threads()) { 3135 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3136 "sanity check"); 3137 _g1h->set_par_threads(n_workers); 3138 _g1h->workers()->run_task(&g1_par_agg_task); 3139 _g1h->set_par_threads(0); 3140 3141 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3142 "sanity check"); 3143 _g1h->reset_heap_region_claim_values(); 3144 } else { 3145 g1_par_agg_task.work(0); 3146 } 3147 } 3148 3149 // Clear the per-worker arrays used to store the per-region counting data 3150 void ConcurrentMark::clear_all_count_data() { 3151 // Clear the global card bitmap - it will be filled during 3152 // liveness count aggregation (during remark) and the 3153 // final counting task. 3154 _card_bm.clear(); 3155 3156 // Clear the global region bitmap - it will be filled as part 3157 // of the final counting task. 3158 _region_bm.clear(); 3159 3160 uint max_regions = _g1h->max_regions(); 3161 assert(_max_worker_id > 0, "uninitialized"); 3162 3163 for (uint i = 0; i < _max_worker_id; i += 1) { 3164 BitMap* task_card_bm = count_card_bitmap_for(i); 3165 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3166 3167 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3168 assert(marked_bytes_array != NULL, "uninitialized"); 3169 3170 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3171 task_card_bm->clear(); 3172 } 3173 } 3174 3175 void ConcurrentMark::print_stats() { 3176 if (verbose_stats()) { 3177 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3178 for (size_t i = 0; i < _active_tasks; ++i) { 3179 _tasks[i]->print_stats(); 3180 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3181 } 3182 } 3183 } 3184 3185 // abandon current marking iteration due to a Full GC 3186 void ConcurrentMark::abort() { 3187 // Clear all marks to force marking thread to do nothing 3188 _nextMarkBitMap->clearAll(); 3189 3190 // Note we cannot clear the previous marking bitmap here 3191 // since VerifyDuringGC verifies the objects marked during 3192 // a full GC against the previous bitmap. 3193 3194 // Clear the liveness counting data 3195 clear_all_count_data(); 3196 // Empty mark stack 3197 reset_marking_state(); 3198 for (uint i = 0; i < _max_worker_id; ++i) { 3199 _tasks[i]->clear_region_fields(); 3200 } 3201 _has_aborted = true; 3202 3203 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3204 satb_mq_set.abandon_partial_marking(); 3205 // This can be called either during or outside marking, we'll read 3206 // the expected_active value from the SATB queue set. 3207 satb_mq_set.set_active_all_threads( 3208 false, /* new active value */ 3209 satb_mq_set.is_active() /* expected_active */); 3210 } 3211 3212 static void print_ms_time_info(const char* prefix, const char* name, 3213 NumberSeq& ns) { 3214 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3215 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3216 if (ns.num() > 0) { 3217 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3218 prefix, ns.sd(), ns.maximum()); 3219 } 3220 } 3221 3222 void ConcurrentMark::print_summary_info() { 3223 gclog_or_tty->print_cr(" Concurrent marking:"); 3224 print_ms_time_info(" ", "init marks", _init_times); 3225 print_ms_time_info(" ", "remarks", _remark_times); 3226 { 3227 print_ms_time_info(" ", "final marks", _remark_mark_times); 3228 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3229 3230 } 3231 print_ms_time_info(" ", "cleanups", _cleanup_times); 3232 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3233 _total_counting_time, 3234 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3235 (double)_cleanup_times.num() 3236 : 0.0)); 3237 if (G1ScrubRemSets) { 3238 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3239 _total_rs_scrub_time, 3240 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3241 (double)_cleanup_times.num() 3242 : 0.0)); 3243 } 3244 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3245 (_init_times.sum() + _remark_times.sum() + 3246 _cleanup_times.sum())/1000.0); 3247 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3248 "(%8.2f s marking).", 3249 cmThread()->vtime_accum(), 3250 cmThread()->vtime_mark_accum()); 3251 } 3252 3253 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3254 _parallel_workers->print_worker_threads_on(st); 3255 } 3256 3257 // We take a break if someone is trying to stop the world. 3258 bool ConcurrentMark::do_yield_check(uint worker_id) { 3259 if (should_yield()) { 3260 if (worker_id == 0) { 3261 _g1h->g1_policy()->record_concurrent_pause(); 3262 } 3263 cmThread()->yield(); 3264 return true; 3265 } else { 3266 return false; 3267 } 3268 } 3269 3270 bool ConcurrentMark::should_yield() { 3271 return cmThread()->should_yield(); 3272 } 3273 3274 bool ConcurrentMark::containing_card_is_marked(void* p) { 3275 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3276 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3277 } 3278 3279 bool ConcurrentMark::containing_cards_are_marked(void* start, 3280 void* last) { 3281 return containing_card_is_marked(start) && 3282 containing_card_is_marked(last); 3283 } 3284 3285 #ifndef PRODUCT 3286 // for debugging purposes 3287 void ConcurrentMark::print_finger() { 3288 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3289 _heap_start, _heap_end, _finger); 3290 for (uint i = 0; i < _max_worker_id; ++i) { 3291 gclog_or_tty->print(" %u: "PTR_FORMAT, i, _tasks[i]->finger()); 3292 } 3293 gclog_or_tty->print_cr(""); 3294 } 3295 #endif 3296 3297 void CMTask::scan_object(oop obj) { 3298 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3299 3300 if (_cm->verbose_high()) { 3301 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT, 3302 _worker_id, (void*) obj); 3303 } 3304 3305 size_t obj_size = obj->size(); 3306 _words_scanned += obj_size; 3307 3308 obj->oop_iterate(_cm_oop_closure); 3309 statsOnly( ++_objs_scanned ); 3310 check_limits(); 3311 } 3312 3313 // Closure for iteration over bitmaps 3314 class CMBitMapClosure : public BitMapClosure { 3315 private: 3316 // the bitmap that is being iterated over 3317 CMBitMap* _nextMarkBitMap; 3318 ConcurrentMark* _cm; 3319 CMTask* _task; 3320 3321 public: 3322 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3323 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3324 3325 bool do_bit(size_t offset) { 3326 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3327 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3328 assert( addr < _cm->finger(), "invariant"); 3329 3330 statsOnly( _task->increase_objs_found_on_bitmap() ); 3331 assert(addr >= _task->finger(), "invariant"); 3332 3333 // We move that task's local finger along. 3334 _task->move_finger_to(addr); 3335 3336 _task->scan_object(oop(addr)); 3337 // we only partially drain the local queue and global stack 3338 _task->drain_local_queue(true); 3339 _task->drain_global_stack(true); 3340 3341 // if the has_aborted flag has been raised, we need to bail out of 3342 // the iteration 3343 return !_task->has_aborted(); 3344 } 3345 }; 3346 3347 // Closure for iterating over objects, currently only used for 3348 // processing SATB buffers. 3349 class CMObjectClosure : public ObjectClosure { 3350 private: 3351 CMTask* _task; 3352 3353 public: 3354 void do_object(oop obj) { 3355 _task->deal_with_reference(obj); 3356 } 3357 3358 CMObjectClosure(CMTask* task) : _task(task) { } 3359 }; 3360 3361 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3362 ConcurrentMark* cm, 3363 CMTask* task) 3364 : _g1h(g1h), _cm(cm), _task(task) { 3365 assert(_ref_processor == NULL, "should be initialized to NULL"); 3366 3367 if (G1UseConcMarkReferenceProcessing) { 3368 _ref_processor = g1h->ref_processor_cm(); 3369 assert(_ref_processor != NULL, "should not be NULL"); 3370 } 3371 } 3372 3373 void CMTask::setup_for_region(HeapRegion* hr) { 3374 // Separated the asserts so that we know which one fires. 3375 assert(hr != NULL, 3376 "claim_region() should have filtered out continues humongous regions"); 3377 assert(!hr->continuesHumongous(), 3378 "claim_region() should have filtered out continues humongous regions"); 3379 3380 if (_cm->verbose_low()) { 3381 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT, 3382 _worker_id, hr); 3383 } 3384 3385 _curr_region = hr; 3386 _finger = hr->bottom(); 3387 update_region_limit(); 3388 } 3389 3390 void CMTask::update_region_limit() { 3391 HeapRegion* hr = _curr_region; 3392 HeapWord* bottom = hr->bottom(); 3393 HeapWord* limit = hr->next_top_at_mark_start(); 3394 3395 if (limit == bottom) { 3396 if (_cm->verbose_low()) { 3397 gclog_or_tty->print_cr("[%u] found an empty region " 3398 "["PTR_FORMAT", "PTR_FORMAT")", 3399 _worker_id, bottom, limit); 3400 } 3401 // The region was collected underneath our feet. 3402 // We set the finger to bottom to ensure that the bitmap 3403 // iteration that will follow this will not do anything. 3404 // (this is not a condition that holds when we set the region up, 3405 // as the region is not supposed to be empty in the first place) 3406 _finger = bottom; 3407 } else if (limit >= _region_limit) { 3408 assert(limit >= _finger, "peace of mind"); 3409 } else { 3410 assert(limit < _region_limit, "only way to get here"); 3411 // This can happen under some pretty unusual circumstances. An 3412 // evacuation pause empties the region underneath our feet (NTAMS 3413 // at bottom). We then do some allocation in the region (NTAMS 3414 // stays at bottom), followed by the region being used as a GC 3415 // alloc region (NTAMS will move to top() and the objects 3416 // originally below it will be grayed). All objects now marked in 3417 // the region are explicitly grayed, if below the global finger, 3418 // and we do not need in fact to scan anything else. So, we simply 3419 // set _finger to be limit to ensure that the bitmap iteration 3420 // doesn't do anything. 3421 _finger = limit; 3422 } 3423 3424 _region_limit = limit; 3425 } 3426 3427 void CMTask::giveup_current_region() { 3428 assert(_curr_region != NULL, "invariant"); 3429 if (_cm->verbose_low()) { 3430 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT, 3431 _worker_id, _curr_region); 3432 } 3433 clear_region_fields(); 3434 } 3435 3436 void CMTask::clear_region_fields() { 3437 // Values for these three fields that indicate that we're not 3438 // holding on to a region. 3439 _curr_region = NULL; 3440 _finger = NULL; 3441 _region_limit = NULL; 3442 } 3443 3444 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3445 if (cm_oop_closure == NULL) { 3446 assert(_cm_oop_closure != NULL, "invariant"); 3447 } else { 3448 assert(_cm_oop_closure == NULL, "invariant"); 3449 } 3450 _cm_oop_closure = cm_oop_closure; 3451 } 3452 3453 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3454 guarantee(nextMarkBitMap != NULL, "invariant"); 3455 3456 if (_cm->verbose_low()) { 3457 gclog_or_tty->print_cr("[%u] resetting", _worker_id); 3458 } 3459 3460 _nextMarkBitMap = nextMarkBitMap; 3461 clear_region_fields(); 3462 3463 _calls = 0; 3464 _elapsed_time_ms = 0.0; 3465 _termination_time_ms = 0.0; 3466 _termination_start_time_ms = 0.0; 3467 3468 #if _MARKING_STATS_ 3469 _local_pushes = 0; 3470 _local_pops = 0; 3471 _local_max_size = 0; 3472 _objs_scanned = 0; 3473 _global_pushes = 0; 3474 _global_pops = 0; 3475 _global_max_size = 0; 3476 _global_transfers_to = 0; 3477 _global_transfers_from = 0; 3478 _regions_claimed = 0; 3479 _objs_found_on_bitmap = 0; 3480 _satb_buffers_processed = 0; 3481 _steal_attempts = 0; 3482 _steals = 0; 3483 _aborted = 0; 3484 _aborted_overflow = 0; 3485 _aborted_cm_aborted = 0; 3486 _aborted_yield = 0; 3487 _aborted_timed_out = 0; 3488 _aborted_satb = 0; 3489 _aborted_termination = 0; 3490 #endif // _MARKING_STATS_ 3491 } 3492 3493 bool CMTask::should_exit_termination() { 3494 regular_clock_call(); 3495 // This is called when we are in the termination protocol. We should 3496 // quit if, for some reason, this task wants to abort or the global 3497 // stack is not empty (this means that we can get work from it). 3498 return !_cm->mark_stack_empty() || has_aborted(); 3499 } 3500 3501 void CMTask::reached_limit() { 3502 assert(_words_scanned >= _words_scanned_limit || 3503 _refs_reached >= _refs_reached_limit , 3504 "shouldn't have been called otherwise"); 3505 regular_clock_call(); 3506 } 3507 3508 void CMTask::regular_clock_call() { 3509 if (has_aborted()) return; 3510 3511 // First, we need to recalculate the words scanned and refs reached 3512 // limits for the next clock call. 3513 recalculate_limits(); 3514 3515 // During the regular clock call we do the following 3516 3517 // (1) If an overflow has been flagged, then we abort. 3518 if (_cm->has_overflown()) { 3519 set_has_aborted(); 3520 return; 3521 } 3522 3523 // If we are not concurrent (i.e. we're doing remark) we don't need 3524 // to check anything else. The other steps are only needed during 3525 // the concurrent marking phase. 3526 if (!concurrent()) return; 3527 3528 // (2) If marking has been aborted for Full GC, then we also abort. 3529 if (_cm->has_aborted()) { 3530 set_has_aborted(); 3531 statsOnly( ++_aborted_cm_aborted ); 3532 return; 3533 } 3534 3535 double curr_time_ms = os::elapsedVTime() * 1000.0; 3536 3537 // (3) If marking stats are enabled, then we update the step history. 3538 #if _MARKING_STATS_ 3539 if (_words_scanned >= _words_scanned_limit) { 3540 ++_clock_due_to_scanning; 3541 } 3542 if (_refs_reached >= _refs_reached_limit) { 3543 ++_clock_due_to_marking; 3544 } 3545 3546 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3547 _interval_start_time_ms = curr_time_ms; 3548 _all_clock_intervals_ms.add(last_interval_ms); 3549 3550 if (_cm->verbose_medium()) { 3551 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, " 3552 "scanned = %d%s, refs reached = %d%s", 3553 _worker_id, last_interval_ms, 3554 _words_scanned, 3555 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3556 _refs_reached, 3557 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3558 } 3559 #endif // _MARKING_STATS_ 3560 3561 // (4) We check whether we should yield. If we have to, then we abort. 3562 if (_cm->should_yield()) { 3563 // We should yield. To do this we abort the task. The caller is 3564 // responsible for yielding. 3565 set_has_aborted(); 3566 statsOnly( ++_aborted_yield ); 3567 return; 3568 } 3569 3570 // (5) We check whether we've reached our time quota. If we have, 3571 // then we abort. 3572 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3573 if (elapsed_time_ms > _time_target_ms) { 3574 set_has_aborted(); 3575 _has_timed_out = true; 3576 statsOnly( ++_aborted_timed_out ); 3577 return; 3578 } 3579 3580 // (6) Finally, we check whether there are enough completed STAB 3581 // buffers available for processing. If there are, we abort. 3582 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3583 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3584 if (_cm->verbose_low()) { 3585 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers", 3586 _worker_id); 3587 } 3588 // we do need to process SATB buffers, we'll abort and restart 3589 // the marking task to do so 3590 set_has_aborted(); 3591 statsOnly( ++_aborted_satb ); 3592 return; 3593 } 3594 } 3595 3596 void CMTask::recalculate_limits() { 3597 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3598 _words_scanned_limit = _real_words_scanned_limit; 3599 3600 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3601 _refs_reached_limit = _real_refs_reached_limit; 3602 } 3603 3604 void CMTask::decrease_limits() { 3605 // This is called when we believe that we're going to do an infrequent 3606 // operation which will increase the per byte scanned cost (i.e. move 3607 // entries to/from the global stack). It basically tries to decrease the 3608 // scanning limit so that the clock is called earlier. 3609 3610 if (_cm->verbose_medium()) { 3611 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id); 3612 } 3613 3614 _words_scanned_limit = _real_words_scanned_limit - 3615 3 * words_scanned_period / 4; 3616 _refs_reached_limit = _real_refs_reached_limit - 3617 3 * refs_reached_period / 4; 3618 } 3619 3620 void CMTask::move_entries_to_global_stack() { 3621 // local array where we'll store the entries that will be popped 3622 // from the local queue 3623 oop buffer[global_stack_transfer_size]; 3624 3625 int n = 0; 3626 oop obj; 3627 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3628 buffer[n] = obj; 3629 ++n; 3630 } 3631 3632 if (n > 0) { 3633 // we popped at least one entry from the local queue 3634 3635 statsOnly( ++_global_transfers_to; _local_pops += n ); 3636 3637 if (!_cm->mark_stack_push(buffer, n)) { 3638 if (_cm->verbose_low()) { 3639 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow", 3640 _worker_id); 3641 } 3642 set_has_aborted(); 3643 } else { 3644 // the transfer was successful 3645 3646 if (_cm->verbose_medium()) { 3647 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack", 3648 _worker_id, n); 3649 } 3650 statsOnly( int tmp_size = _cm->mark_stack_size(); 3651 if (tmp_size > _global_max_size) { 3652 _global_max_size = tmp_size; 3653 } 3654 _global_pushes += n ); 3655 } 3656 } 3657 3658 // this operation was quite expensive, so decrease the limits 3659 decrease_limits(); 3660 } 3661 3662 void CMTask::get_entries_from_global_stack() { 3663 // local array where we'll store the entries that will be popped 3664 // from the global stack. 3665 oop buffer[global_stack_transfer_size]; 3666 int n; 3667 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3668 assert(n <= global_stack_transfer_size, 3669 "we should not pop more than the given limit"); 3670 if (n > 0) { 3671 // yes, we did actually pop at least one entry 3672 3673 statsOnly( ++_global_transfers_from; _global_pops += n ); 3674 if (_cm->verbose_medium()) { 3675 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack", 3676 _worker_id, n); 3677 } 3678 for (int i = 0; i < n; ++i) { 3679 bool success = _task_queue->push(buffer[i]); 3680 // We only call this when the local queue is empty or under a 3681 // given target limit. So, we do not expect this push to fail. 3682 assert(success, "invariant"); 3683 } 3684 3685 statsOnly( int tmp_size = _task_queue->size(); 3686 if (tmp_size > _local_max_size) { 3687 _local_max_size = tmp_size; 3688 } 3689 _local_pushes += n ); 3690 } 3691 3692 // this operation was quite expensive, so decrease the limits 3693 decrease_limits(); 3694 } 3695 3696 void CMTask::drain_local_queue(bool partially) { 3697 if (has_aborted()) return; 3698 3699 // Decide what the target size is, depending whether we're going to 3700 // drain it partially (so that other tasks can steal if they run out 3701 // of things to do) or totally (at the very end). 3702 size_t target_size; 3703 if (partially) { 3704 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3705 } else { 3706 target_size = 0; 3707 } 3708 3709 if (_task_queue->size() > target_size) { 3710 if (_cm->verbose_high()) { 3711 gclog_or_tty->print_cr("[%u] draining local queue, target size = %d", 3712 _worker_id, target_size); 3713 } 3714 3715 oop obj; 3716 bool ret = _task_queue->pop_local(obj); 3717 while (ret) { 3718 statsOnly( ++_local_pops ); 3719 3720 if (_cm->verbose_high()) { 3721 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id, 3722 (void*) obj); 3723 } 3724 3725 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3726 assert(!_g1h->is_on_master_free_list( 3727 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3728 3729 scan_object(obj); 3730 3731 if (_task_queue->size() <= target_size || has_aborted()) { 3732 ret = false; 3733 } else { 3734 ret = _task_queue->pop_local(obj); 3735 } 3736 } 3737 3738 if (_cm->verbose_high()) { 3739 gclog_or_tty->print_cr("[%u] drained local queue, size = %d", 3740 _worker_id, _task_queue->size()); 3741 } 3742 } 3743 } 3744 3745 void CMTask::drain_global_stack(bool partially) { 3746 if (has_aborted()) return; 3747 3748 // We have a policy to drain the local queue before we attempt to 3749 // drain the global stack. 3750 assert(partially || _task_queue->size() == 0, "invariant"); 3751 3752 // Decide what the target size is, depending whether we're going to 3753 // drain it partially (so that other tasks can steal if they run out 3754 // of things to do) or totally (at the very end). Notice that, 3755 // because we move entries from the global stack in chunks or 3756 // because another task might be doing the same, we might in fact 3757 // drop below the target. But, this is not a problem. 3758 size_t target_size; 3759 if (partially) { 3760 target_size = _cm->partial_mark_stack_size_target(); 3761 } else { 3762 target_size = 0; 3763 } 3764 3765 if (_cm->mark_stack_size() > target_size) { 3766 if (_cm->verbose_low()) { 3767 gclog_or_tty->print_cr("[%u] draining global_stack, target size %d", 3768 _worker_id, target_size); 3769 } 3770 3771 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3772 get_entries_from_global_stack(); 3773 drain_local_queue(partially); 3774 } 3775 3776 if (_cm->verbose_low()) { 3777 gclog_or_tty->print_cr("[%u] drained global stack, size = %d", 3778 _worker_id, _cm->mark_stack_size()); 3779 } 3780 } 3781 } 3782 3783 // SATB Queue has several assumptions on whether to call the par or 3784 // non-par versions of the methods. this is why some of the code is 3785 // replicated. We should really get rid of the single-threaded version 3786 // of the code to simplify things. 3787 void CMTask::drain_satb_buffers() { 3788 if (has_aborted()) return; 3789 3790 // We set this so that the regular clock knows that we're in the 3791 // middle of draining buffers and doesn't set the abort flag when it 3792 // notices that SATB buffers are available for draining. It'd be 3793 // very counter productive if it did that. :-) 3794 _draining_satb_buffers = true; 3795 3796 CMObjectClosure oc(this); 3797 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3798 if (G1CollectedHeap::use_parallel_gc_threads()) { 3799 satb_mq_set.set_par_closure(_worker_id, &oc); 3800 } else { 3801 satb_mq_set.set_closure(&oc); 3802 } 3803 3804 // This keeps claiming and applying the closure to completed buffers 3805 // until we run out of buffers or we need to abort. 3806 if (G1CollectedHeap::use_parallel_gc_threads()) { 3807 while (!has_aborted() && 3808 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) { 3809 if (_cm->verbose_medium()) { 3810 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3811 } 3812 statsOnly( ++_satb_buffers_processed ); 3813 regular_clock_call(); 3814 } 3815 } else { 3816 while (!has_aborted() && 3817 satb_mq_set.apply_closure_to_completed_buffer()) { 3818 if (_cm->verbose_medium()) { 3819 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3820 } 3821 statsOnly( ++_satb_buffers_processed ); 3822 regular_clock_call(); 3823 } 3824 } 3825 3826 if (!concurrent() && !has_aborted()) { 3827 // We should only do this during remark. 3828 if (G1CollectedHeap::use_parallel_gc_threads()) { 3829 satb_mq_set.par_iterate_closure_all_threads(_worker_id); 3830 } else { 3831 satb_mq_set.iterate_closure_all_threads(); 3832 } 3833 } 3834 3835 _draining_satb_buffers = false; 3836 3837 assert(has_aborted() || 3838 concurrent() || 3839 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3840 3841 if (G1CollectedHeap::use_parallel_gc_threads()) { 3842 satb_mq_set.set_par_closure(_worker_id, NULL); 3843 } else { 3844 satb_mq_set.set_closure(NULL); 3845 } 3846 3847 // again, this was a potentially expensive operation, decrease the 3848 // limits to get the regular clock call early 3849 decrease_limits(); 3850 } 3851 3852 void CMTask::print_stats() { 3853 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 3854 _worker_id, _calls); 3855 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3856 _elapsed_time_ms, _termination_time_ms); 3857 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3858 _step_times_ms.num(), _step_times_ms.avg(), 3859 _step_times_ms.sd()); 3860 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3861 _step_times_ms.maximum(), _step_times_ms.sum()); 3862 3863 #if _MARKING_STATS_ 3864 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3865 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3866 _all_clock_intervals_ms.sd()); 3867 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3868 _all_clock_intervals_ms.maximum(), 3869 _all_clock_intervals_ms.sum()); 3870 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3871 _clock_due_to_scanning, _clock_due_to_marking); 3872 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3873 _objs_scanned, _objs_found_on_bitmap); 3874 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3875 _local_pushes, _local_pops, _local_max_size); 3876 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3877 _global_pushes, _global_pops, _global_max_size); 3878 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3879 _global_transfers_to,_global_transfers_from); 3880 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 3881 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3882 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3883 _steal_attempts, _steals); 3884 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3885 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3886 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3887 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3888 _aborted_timed_out, _aborted_satb, _aborted_termination); 3889 #endif // _MARKING_STATS_ 3890 } 3891 3892 /***************************************************************************** 3893 3894 The do_marking_step(time_target_ms) method is the building block 3895 of the parallel marking framework. It can be called in parallel 3896 with other invocations of do_marking_step() on different tasks 3897 (but only one per task, obviously) and concurrently with the 3898 mutator threads, or during remark, hence it eliminates the need 3899 for two versions of the code. When called during remark, it will 3900 pick up from where the task left off during the concurrent marking 3901 phase. Interestingly, tasks are also claimable during evacuation 3902 pauses too, since do_marking_step() ensures that it aborts before 3903 it needs to yield. 3904 3905 The data structures that is uses to do marking work are the 3906 following: 3907 3908 (1) Marking Bitmap. If there are gray objects that appear only 3909 on the bitmap (this happens either when dealing with an overflow 3910 or when the initial marking phase has simply marked the roots 3911 and didn't push them on the stack), then tasks claim heap 3912 regions whose bitmap they then scan to find gray objects. A 3913 global finger indicates where the end of the last claimed region 3914 is. A local finger indicates how far into the region a task has 3915 scanned. The two fingers are used to determine how to gray an 3916 object (i.e. whether simply marking it is OK, as it will be 3917 visited by a task in the future, or whether it needs to be also 3918 pushed on a stack). 3919 3920 (2) Local Queue. The local queue of the task which is accessed 3921 reasonably efficiently by the task. Other tasks can steal from 3922 it when they run out of work. Throughout the marking phase, a 3923 task attempts to keep its local queue short but not totally 3924 empty, so that entries are available for stealing by other 3925 tasks. Only when there is no more work, a task will totally 3926 drain its local queue. 3927 3928 (3) Global Mark Stack. This handles local queue overflow. During 3929 marking only sets of entries are moved between it and the local 3930 queues, as access to it requires a mutex and more fine-grain 3931 interaction with it which might cause contention. If it 3932 overflows, then the marking phase should restart and iterate 3933 over the bitmap to identify gray objects. Throughout the marking 3934 phase, tasks attempt to keep the global mark stack at a small 3935 length but not totally empty, so that entries are available for 3936 popping by other tasks. Only when there is no more work, tasks 3937 will totally drain the global mark stack. 3938 3939 (4) SATB Buffer Queue. This is where completed SATB buffers are 3940 made available. Buffers are regularly removed from this queue 3941 and scanned for roots, so that the queue doesn't get too 3942 long. During remark, all completed buffers are processed, as 3943 well as the filled in parts of any uncompleted buffers. 3944 3945 The do_marking_step() method tries to abort when the time target 3946 has been reached. There are a few other cases when the 3947 do_marking_step() method also aborts: 3948 3949 (1) When the marking phase has been aborted (after a Full GC). 3950 3951 (2) When a global overflow (on the global stack) has been 3952 triggered. Before the task aborts, it will actually sync up with 3953 the other tasks to ensure that all the marking data structures 3954 (local queues, stacks, fingers etc.) are re-initialised so that 3955 when do_marking_step() completes, the marking phase can 3956 immediately restart. 3957 3958 (3) When enough completed SATB buffers are available. The 3959 do_marking_step() method only tries to drain SATB buffers right 3960 at the beginning. So, if enough buffers are available, the 3961 marking step aborts and the SATB buffers are processed at 3962 the beginning of the next invocation. 3963 3964 (4) To yield. when we have to yield then we abort and yield 3965 right at the end of do_marking_step(). This saves us from a lot 3966 of hassle as, by yielding we might allow a Full GC. If this 3967 happens then objects will be compacted underneath our feet, the 3968 heap might shrink, etc. We save checking for this by just 3969 aborting and doing the yield right at the end. 3970 3971 From the above it follows that the do_marking_step() method should 3972 be called in a loop (or, otherwise, regularly) until it completes. 3973 3974 If a marking step completes without its has_aborted() flag being 3975 true, it means it has completed the current marking phase (and 3976 also all other marking tasks have done so and have all synced up). 3977 3978 A method called regular_clock_call() is invoked "regularly" (in 3979 sub ms intervals) throughout marking. It is this clock method that 3980 checks all the abort conditions which were mentioned above and 3981 decides when the task should abort. A work-based scheme is used to 3982 trigger this clock method: when the number of object words the 3983 marking phase has scanned or the number of references the marking 3984 phase has visited reach a given limit. Additional invocations to 3985 the method clock have been planted in a few other strategic places 3986 too. The initial reason for the clock method was to avoid calling 3987 vtime too regularly, as it is quite expensive. So, once it was in 3988 place, it was natural to piggy-back all the other conditions on it 3989 too and not constantly check them throughout the code. 3990 3991 *****************************************************************************/ 3992 3993 void CMTask::do_marking_step(double time_target_ms, 3994 bool do_stealing, 3995 bool do_termination) { 3996 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3997 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3998 3999 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4000 assert(_task_queues != NULL, "invariant"); 4001 assert(_task_queue != NULL, "invariant"); 4002 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 4003 4004 assert(!_claimed, 4005 "only one thread should claim this task at any one time"); 4006 4007 // OK, this doesn't safeguard again all possible scenarios, as it is 4008 // possible for two threads to set the _claimed flag at the same 4009 // time. But it is only for debugging purposes anyway and it will 4010 // catch most problems. 4011 _claimed = true; 4012 4013 _start_time_ms = os::elapsedVTime() * 1000.0; 4014 statsOnly( _interval_start_time_ms = _start_time_ms ); 4015 4016 double diff_prediction_ms = 4017 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4018 _time_target_ms = time_target_ms - diff_prediction_ms; 4019 4020 // set up the variables that are used in the work-based scheme to 4021 // call the regular clock method 4022 _words_scanned = 0; 4023 _refs_reached = 0; 4024 recalculate_limits(); 4025 4026 // clear all flags 4027 clear_has_aborted(); 4028 _has_timed_out = false; 4029 _draining_satb_buffers = false; 4030 4031 ++_calls; 4032 4033 if (_cm->verbose_low()) { 4034 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, " 4035 "target = %1.2lfms >>>>>>>>>>", 4036 _worker_id, _calls, _time_target_ms); 4037 } 4038 4039 // Set up the bitmap and oop closures. Anything that uses them is 4040 // eventually called from this method, so it is OK to allocate these 4041 // statically. 4042 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4043 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 4044 set_cm_oop_closure(&cm_oop_closure); 4045 4046 if (_cm->has_overflown()) { 4047 // This can happen if the mark stack overflows during a GC pause 4048 // and this task, after a yield point, restarts. We have to abort 4049 // as we need to get into the overflow protocol which happens 4050 // right at the end of this task. 4051 set_has_aborted(); 4052 } 4053 4054 // First drain any available SATB buffers. After this, we will not 4055 // look at SATB buffers before the next invocation of this method. 4056 // If enough completed SATB buffers are queued up, the regular clock 4057 // will abort this task so that it restarts. 4058 drain_satb_buffers(); 4059 // ...then partially drain the local queue and the global stack 4060 drain_local_queue(true); 4061 drain_global_stack(true); 4062 4063 do { 4064 if (!has_aborted() && _curr_region != NULL) { 4065 // This means that we're already holding on to a region. 4066 assert(_finger != NULL, "if region is not NULL, then the finger " 4067 "should not be NULL either"); 4068 4069 // We might have restarted this task after an evacuation pause 4070 // which might have evacuated the region we're holding on to 4071 // underneath our feet. Let's read its limit again to make sure 4072 // that we do not iterate over a region of the heap that 4073 // contains garbage (update_region_limit() will also move 4074 // _finger to the start of the region if it is found empty). 4075 update_region_limit(); 4076 // We will start from _finger not from the start of the region, 4077 // as we might be restarting this task after aborting half-way 4078 // through scanning this region. In this case, _finger points to 4079 // the address where we last found a marked object. If this is a 4080 // fresh region, _finger points to start(). 4081 MemRegion mr = MemRegion(_finger, _region_limit); 4082 4083 if (_cm->verbose_low()) { 4084 gclog_or_tty->print_cr("[%u] we're scanning part " 4085 "["PTR_FORMAT", "PTR_FORMAT") " 4086 "of region "PTR_FORMAT, 4087 _worker_id, _finger, _region_limit, _curr_region); 4088 } 4089 4090 // Let's iterate over the bitmap of the part of the 4091 // region that is left. 4092 if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4093 // We successfully completed iterating over the region. Now, 4094 // let's give up the region. 4095 giveup_current_region(); 4096 regular_clock_call(); 4097 } else { 4098 assert(has_aborted(), "currently the only way to do so"); 4099 // The only way to abort the bitmap iteration is to return 4100 // false from the do_bit() method. However, inside the 4101 // do_bit() method we move the _finger to point to the 4102 // object currently being looked at. So, if we bail out, we 4103 // have definitely set _finger to something non-null. 4104 assert(_finger != NULL, "invariant"); 4105 4106 // Region iteration was actually aborted. So now _finger 4107 // points to the address of the object we last scanned. If we 4108 // leave it there, when we restart this task, we will rescan 4109 // the object. It is easy to avoid this. We move the finger by 4110 // enough to point to the next possible object header (the 4111 // bitmap knows by how much we need to move it as it knows its 4112 // granularity). 4113 assert(_finger < _region_limit, "invariant"); 4114 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 4115 // Check if bitmap iteration was aborted while scanning the last object 4116 if (new_finger >= _region_limit) { 4117 giveup_current_region(); 4118 } else { 4119 move_finger_to(new_finger); 4120 } 4121 } 4122 } 4123 // At this point we have either completed iterating over the 4124 // region we were holding on to, or we have aborted. 4125 4126 // We then partially drain the local queue and the global stack. 4127 // (Do we really need this?) 4128 drain_local_queue(true); 4129 drain_global_stack(true); 4130 4131 // Read the note on the claim_region() method on why it might 4132 // return NULL with potentially more regions available for 4133 // claiming and why we have to check out_of_regions() to determine 4134 // whether we're done or not. 4135 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4136 // We are going to try to claim a new region. We should have 4137 // given up on the previous one. 4138 // Separated the asserts so that we know which one fires. 4139 assert(_curr_region == NULL, "invariant"); 4140 assert(_finger == NULL, "invariant"); 4141 assert(_region_limit == NULL, "invariant"); 4142 if (_cm->verbose_low()) { 4143 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id); 4144 } 4145 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 4146 if (claimed_region != NULL) { 4147 // Yes, we managed to claim one 4148 statsOnly( ++_regions_claimed ); 4149 4150 if (_cm->verbose_low()) { 4151 gclog_or_tty->print_cr("[%u] we successfully claimed " 4152 "region "PTR_FORMAT, 4153 _worker_id, claimed_region); 4154 } 4155 4156 setup_for_region(claimed_region); 4157 assert(_curr_region == claimed_region, "invariant"); 4158 } 4159 // It is important to call the regular clock here. It might take 4160 // a while to claim a region if, for example, we hit a large 4161 // block of empty regions. So we need to call the regular clock 4162 // method once round the loop to make sure it's called 4163 // frequently enough. 4164 regular_clock_call(); 4165 } 4166 4167 if (!has_aborted() && _curr_region == NULL) { 4168 assert(_cm->out_of_regions(), 4169 "at this point we should be out of regions"); 4170 } 4171 } while ( _curr_region != NULL && !has_aborted()); 4172 4173 if (!has_aborted()) { 4174 // We cannot check whether the global stack is empty, since other 4175 // tasks might be pushing objects to it concurrently. 4176 assert(_cm->out_of_regions(), 4177 "at this point we should be out of regions"); 4178 4179 if (_cm->verbose_low()) { 4180 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id); 4181 } 4182 4183 // Try to reduce the number of available SATB buffers so that 4184 // remark has less work to do. 4185 drain_satb_buffers(); 4186 } 4187 4188 // Since we've done everything else, we can now totally drain the 4189 // local queue and global stack. 4190 drain_local_queue(false); 4191 drain_global_stack(false); 4192 4193 // Attempt at work stealing from other task's queues. 4194 if (do_stealing && !has_aborted()) { 4195 // We have not aborted. This means that we have finished all that 4196 // we could. Let's try to do some stealing... 4197 4198 // We cannot check whether the global stack is empty, since other 4199 // tasks might be pushing objects to it concurrently. 4200 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4201 "only way to reach here"); 4202 4203 if (_cm->verbose_low()) { 4204 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id); 4205 } 4206 4207 while (!has_aborted()) { 4208 oop obj; 4209 statsOnly( ++_steal_attempts ); 4210 4211 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 4212 if (_cm->verbose_medium()) { 4213 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully", 4214 _worker_id, (void*) obj); 4215 } 4216 4217 statsOnly( ++_steals ); 4218 4219 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4220 "any stolen object should be marked"); 4221 scan_object(obj); 4222 4223 // And since we're towards the end, let's totally drain the 4224 // local queue and global stack. 4225 drain_local_queue(false); 4226 drain_global_stack(false); 4227 } else { 4228 break; 4229 } 4230 } 4231 } 4232 4233 // If we are about to wrap up and go into termination, check if we 4234 // should raise the overflow flag. 4235 if (do_termination && !has_aborted()) { 4236 if (_cm->force_overflow()->should_force()) { 4237 _cm->set_has_overflown(); 4238 regular_clock_call(); 4239 } 4240 } 4241 4242 // We still haven't aborted. Now, let's try to get into the 4243 // termination protocol. 4244 if (do_termination && !has_aborted()) { 4245 // We cannot check whether the global stack is empty, since other 4246 // tasks might be concurrently pushing objects on it. 4247 // Separated the asserts so that we know which one fires. 4248 assert(_cm->out_of_regions(), "only way to reach here"); 4249 assert(_task_queue->size() == 0, "only way to reach here"); 4250 4251 if (_cm->verbose_low()) { 4252 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id); 4253 } 4254 4255 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4256 // The CMTask class also extends the TerminatorTerminator class, 4257 // hence its should_exit_termination() method will also decide 4258 // whether to exit the termination protocol or not. 4259 bool finished = _cm->terminator()->offer_termination(this); 4260 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4261 _termination_time_ms += 4262 termination_end_time_ms - _termination_start_time_ms; 4263 4264 if (finished) { 4265 // We're all done. 4266 4267 if (_worker_id == 0) { 4268 // let's allow task 0 to do this 4269 if (concurrent()) { 4270 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4271 // we need to set this to false before the next 4272 // safepoint. This way we ensure that the marking phase 4273 // doesn't observe any more heap expansions. 4274 _cm->clear_concurrent_marking_in_progress(); 4275 } 4276 } 4277 4278 // We can now guarantee that the global stack is empty, since 4279 // all other tasks have finished. We separated the guarantees so 4280 // that, if a condition is false, we can immediately find out 4281 // which one. 4282 guarantee(_cm->out_of_regions(), "only way to reach here"); 4283 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4284 guarantee(_task_queue->size() == 0, "only way to reach here"); 4285 guarantee(!_cm->has_overflown(), "only way to reach here"); 4286 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4287 4288 if (_cm->verbose_low()) { 4289 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id); 4290 } 4291 } else { 4292 // Apparently there's more work to do. Let's abort this task. It 4293 // will restart it and we can hopefully find more things to do. 4294 4295 if (_cm->verbose_low()) { 4296 gclog_or_tty->print_cr("[%u] apparently there is more work to do", 4297 _worker_id); 4298 } 4299 4300 set_has_aborted(); 4301 statsOnly( ++_aborted_termination ); 4302 } 4303 } 4304 4305 // Mainly for debugging purposes to make sure that a pointer to the 4306 // closure which was statically allocated in this frame doesn't 4307 // escape it by accident. 4308 set_cm_oop_closure(NULL); 4309 double end_time_ms = os::elapsedVTime() * 1000.0; 4310 double elapsed_time_ms = end_time_ms - _start_time_ms; 4311 // Update the step history. 4312 _step_times_ms.add(elapsed_time_ms); 4313 4314 if (has_aborted()) { 4315 // The task was aborted for some reason. 4316 4317 statsOnly( ++_aborted ); 4318 4319 if (_has_timed_out) { 4320 double diff_ms = elapsed_time_ms - _time_target_ms; 4321 // Keep statistics of how well we did with respect to hitting 4322 // our target only if we actually timed out (if we aborted for 4323 // other reasons, then the results might get skewed). 4324 _marking_step_diffs_ms.add(diff_ms); 4325 } 4326 4327 if (_cm->has_overflown()) { 4328 // This is the interesting one. We aborted because a global 4329 // overflow was raised. This means we have to restart the 4330 // marking phase and start iterating over regions. However, in 4331 // order to do this we have to make sure that all tasks stop 4332 // what they are doing and re-initialise in a safe manner. We 4333 // will achieve this with the use of two barrier sync points. 4334 4335 if (_cm->verbose_low()) { 4336 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id); 4337 } 4338 4339 _cm->enter_first_sync_barrier(_worker_id); 4340 // When we exit this sync barrier we know that all tasks have 4341 // stopped doing marking work. So, it's now safe to 4342 // re-initialise our data structures. At the end of this method, 4343 // task 0 will clear the global data structures. 4344 4345 statsOnly( ++_aborted_overflow ); 4346 4347 // We clear the local state of this task... 4348 clear_region_fields(); 4349 4350 // ...and enter the second barrier. 4351 _cm->enter_second_sync_barrier(_worker_id); 4352 // At this point everything has bee re-initialised and we're 4353 // ready to restart. 4354 } 4355 4356 if (_cm->verbose_low()) { 4357 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4358 "elapsed = %1.2lfms <<<<<<<<<<", 4359 _worker_id, _time_target_ms, elapsed_time_ms); 4360 if (_cm->has_aborted()) { 4361 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========", 4362 _worker_id); 4363 } 4364 } 4365 } else { 4366 if (_cm->verbose_low()) { 4367 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4368 "elapsed = %1.2lfms <<<<<<<<<<", 4369 _worker_id, _time_target_ms, elapsed_time_ms); 4370 } 4371 } 4372 4373 _claimed = false; 4374 } 4375 4376 CMTask::CMTask(uint worker_id, 4377 ConcurrentMark* cm, 4378 size_t* marked_bytes, 4379 BitMap* card_bm, 4380 CMTaskQueue* task_queue, 4381 CMTaskQueueSet* task_queues) 4382 : _g1h(G1CollectedHeap::heap()), 4383 _worker_id(worker_id), _cm(cm), 4384 _claimed(false), 4385 _nextMarkBitMap(NULL), _hash_seed(17), 4386 _task_queue(task_queue), 4387 _task_queues(task_queues), 4388 _cm_oop_closure(NULL), 4389 _marked_bytes_array(marked_bytes), 4390 _card_bm(card_bm) { 4391 guarantee(task_queue != NULL, "invariant"); 4392 guarantee(task_queues != NULL, "invariant"); 4393 4394 statsOnly( _clock_due_to_scanning = 0; 4395 _clock_due_to_marking = 0 ); 4396 4397 _marking_step_diffs_ms.add(0.5); 4398 } 4399 4400 // These are formatting macros that are used below to ensure 4401 // consistent formatting. The *_H_* versions are used to format the 4402 // header for a particular value and they should be kept consistent 4403 // with the corresponding macro. Also note that most of the macros add 4404 // the necessary white space (as a prefix) which makes them a bit 4405 // easier to compose. 4406 4407 // All the output lines are prefixed with this string to be able to 4408 // identify them easily in a large log file. 4409 #define G1PPRL_LINE_PREFIX "###" 4410 4411 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4412 #ifdef _LP64 4413 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4414 #else // _LP64 4415 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4416 #endif // _LP64 4417 4418 // For per-region info 4419 #define G1PPRL_TYPE_FORMAT " %-4s" 4420 #define G1PPRL_TYPE_H_FORMAT " %4s" 4421 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4422 #define G1PPRL_BYTE_H_FORMAT " %9s" 4423 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4424 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4425 4426 // For summary info 4427 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4428 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4429 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4430 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4431 4432 G1PrintRegionLivenessInfoClosure:: 4433 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4434 : _out(out), 4435 _total_used_bytes(0), _total_capacity_bytes(0), 4436 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4437 _hum_used_bytes(0), _hum_capacity_bytes(0), 4438 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) { 4439 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4440 MemRegion g1_committed = g1h->g1_committed(); 4441 MemRegion g1_reserved = g1h->g1_reserved(); 4442 double now = os::elapsedTime(); 4443 4444 // Print the header of the output. 4445 _out->cr(); 4446 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4447 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4448 G1PPRL_SUM_ADDR_FORMAT("committed") 4449 G1PPRL_SUM_ADDR_FORMAT("reserved") 4450 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4451 g1_committed.start(), g1_committed.end(), 4452 g1_reserved.start(), g1_reserved.end(), 4453 HeapRegion::GrainBytes); 4454 _out->print_cr(G1PPRL_LINE_PREFIX); 4455 _out->print_cr(G1PPRL_LINE_PREFIX 4456 G1PPRL_TYPE_H_FORMAT 4457 G1PPRL_ADDR_BASE_H_FORMAT 4458 G1PPRL_BYTE_H_FORMAT 4459 G1PPRL_BYTE_H_FORMAT 4460 G1PPRL_BYTE_H_FORMAT 4461 G1PPRL_DOUBLE_H_FORMAT, 4462 "type", "address-range", 4463 "used", "prev-live", "next-live", "gc-eff"); 4464 _out->print_cr(G1PPRL_LINE_PREFIX 4465 G1PPRL_TYPE_H_FORMAT 4466 G1PPRL_ADDR_BASE_H_FORMAT 4467 G1PPRL_BYTE_H_FORMAT 4468 G1PPRL_BYTE_H_FORMAT 4469 G1PPRL_BYTE_H_FORMAT 4470 G1PPRL_DOUBLE_H_FORMAT, 4471 "", "", 4472 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)"); 4473 } 4474 4475 // It takes as a parameter a reference to one of the _hum_* fields, it 4476 // deduces the corresponding value for a region in a humongous region 4477 // series (either the region size, or what's left if the _hum_* field 4478 // is < the region size), and updates the _hum_* field accordingly. 4479 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4480 size_t bytes = 0; 4481 // The > 0 check is to deal with the prev and next live bytes which 4482 // could be 0. 4483 if (*hum_bytes > 0) { 4484 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4485 *hum_bytes -= bytes; 4486 } 4487 return bytes; 4488 } 4489 4490 // It deduces the values for a region in a humongous region series 4491 // from the _hum_* fields and updates those accordingly. It assumes 4492 // that that _hum_* fields have already been set up from the "starts 4493 // humongous" region and we visit the regions in address order. 4494 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4495 size_t* capacity_bytes, 4496 size_t* prev_live_bytes, 4497 size_t* next_live_bytes) { 4498 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4499 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4500 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4501 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4502 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4503 } 4504 4505 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4506 const char* type = ""; 4507 HeapWord* bottom = r->bottom(); 4508 HeapWord* end = r->end(); 4509 size_t capacity_bytes = r->capacity(); 4510 size_t used_bytes = r->used(); 4511 size_t prev_live_bytes = r->live_bytes(); 4512 size_t next_live_bytes = r->next_live_bytes(); 4513 double gc_eff = r->gc_efficiency(); 4514 if (r->used() == 0) { 4515 type = "FREE"; 4516 } else if (r->is_survivor()) { 4517 type = "SURV"; 4518 } else if (r->is_young()) { 4519 type = "EDEN"; 4520 } else if (r->startsHumongous()) { 4521 type = "HUMS"; 4522 4523 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4524 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4525 "they should have been zeroed after the last time we used them"); 4526 // Set up the _hum_* fields. 4527 _hum_capacity_bytes = capacity_bytes; 4528 _hum_used_bytes = used_bytes; 4529 _hum_prev_live_bytes = prev_live_bytes; 4530 _hum_next_live_bytes = next_live_bytes; 4531 get_hum_bytes(&used_bytes, &capacity_bytes, 4532 &prev_live_bytes, &next_live_bytes); 4533 end = bottom + HeapRegion::GrainWords; 4534 } else if (r->continuesHumongous()) { 4535 type = "HUMC"; 4536 get_hum_bytes(&used_bytes, &capacity_bytes, 4537 &prev_live_bytes, &next_live_bytes); 4538 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4539 } else { 4540 type = "OLD"; 4541 } 4542 4543 _total_used_bytes += used_bytes; 4544 _total_capacity_bytes += capacity_bytes; 4545 _total_prev_live_bytes += prev_live_bytes; 4546 _total_next_live_bytes += next_live_bytes; 4547 4548 // Print a line for this particular region. 4549 _out->print_cr(G1PPRL_LINE_PREFIX 4550 G1PPRL_TYPE_FORMAT 4551 G1PPRL_ADDR_BASE_FORMAT 4552 G1PPRL_BYTE_FORMAT 4553 G1PPRL_BYTE_FORMAT 4554 G1PPRL_BYTE_FORMAT 4555 G1PPRL_DOUBLE_FORMAT, 4556 type, bottom, end, 4557 used_bytes, prev_live_bytes, next_live_bytes, gc_eff); 4558 4559 return false; 4560 } 4561 4562 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4563 // Print the footer of the output. 4564 _out->print_cr(G1PPRL_LINE_PREFIX); 4565 _out->print_cr(G1PPRL_LINE_PREFIX 4566 " SUMMARY" 4567 G1PPRL_SUM_MB_FORMAT("capacity") 4568 G1PPRL_SUM_MB_PERC_FORMAT("used") 4569 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4570 G1PPRL_SUM_MB_PERC_FORMAT("next-live"), 4571 bytes_to_mb(_total_capacity_bytes), 4572 bytes_to_mb(_total_used_bytes), 4573 perc(_total_used_bytes, _total_capacity_bytes), 4574 bytes_to_mb(_total_prev_live_bytes), 4575 perc(_total_prev_live_bytes, _total_capacity_bytes), 4576 bytes_to_mb(_total_next_live_bytes), 4577 perc(_total_next_live_bytes, _total_capacity_bytes)); 4578 _out->cr(); 4579 }