1 /* 2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 32 #include "gc_implementation/g1/g1Log.hpp" 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 34 #include "gc_implementation/g1/g1RemSet.hpp" 35 #include "gc_implementation/g1/heapRegion.inline.hpp" 36 #include "gc_implementation/g1/heapRegionRemSet.hpp" 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 38 #include "gc_implementation/shared/vmGCOperations.hpp" 39 #include "gc_implementation/shared/gcTimer.hpp" 40 #include "gc_implementation/shared/gcTrace.hpp" 41 #include "gc_implementation/shared/gcTraceTime.hpp" 42 #include "memory/genOopClosures.inline.hpp" 43 #include "memory/referencePolicy.hpp" 44 #include "memory/resourceArea.hpp" 45 #include "oops/oop.inline.hpp" 46 #include "runtime/handles.inline.hpp" 47 #include "runtime/java.hpp" 48 #include "services/memTracker.hpp" 49 50 // Concurrent marking bit map wrapper 51 52 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) : 53 _bm((uintptr_t*)NULL,0), 54 _shifter(shifter) { 55 _bmStartWord = (HeapWord*)(rs.base()); 56 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 57 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 58 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 59 60 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC); 61 62 guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map"); 63 // For now we'll just commit all of the bit map up fromt. 64 // Later on we'll try to be more parsimonious with swap. 65 guarantee(_virtual_space.initialize(brs, brs.size()), 66 "couldn't reseve backing store for concurrent marking bit map"); 67 assert(_virtual_space.committed_size() == brs.size(), 68 "didn't reserve backing store for all of concurrent marking bit map?"); 69 _bm.set_map((uintptr_t*)_virtual_space.low()); 70 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 71 _bmWordSize, "inconsistency in bit map sizing"); 72 _bm.set_size(_bmWordSize >> _shifter); 73 } 74 75 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 76 HeapWord* limit) const { 77 // First we must round addr *up* to a possible object boundary. 78 addr = (HeapWord*)align_size_up((intptr_t)addr, 79 HeapWordSize << _shifter); 80 size_t addrOffset = heapWordToOffset(addr); 81 if (limit == NULL) { 82 limit = _bmStartWord + _bmWordSize; 83 } 84 size_t limitOffset = heapWordToOffset(limit); 85 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 86 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 87 assert(nextAddr >= addr, "get_next_one postcondition"); 88 assert(nextAddr == limit || isMarked(nextAddr), 89 "get_next_one postcondition"); 90 return nextAddr; 91 } 92 93 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 94 HeapWord* limit) const { 95 size_t addrOffset = heapWordToOffset(addr); 96 if (limit == NULL) { 97 limit = _bmStartWord + _bmWordSize; 98 } 99 size_t limitOffset = heapWordToOffset(limit); 100 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 101 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 102 assert(nextAddr >= addr, "get_next_one postcondition"); 103 assert(nextAddr == limit || !isMarked(nextAddr), 104 "get_next_one postcondition"); 105 return nextAddr; 106 } 107 108 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 109 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 110 return (int) (diff >> _shifter); 111 } 112 113 #ifndef PRODUCT 114 bool CMBitMapRO::covers(ReservedSpace rs) const { 115 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 116 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 117 "size inconsistency"); 118 return _bmStartWord == (HeapWord*)(rs.base()) && 119 _bmWordSize == rs.size()>>LogHeapWordSize; 120 } 121 #endif 122 123 void CMBitMap::clearAll() { 124 _bm.clear(); 125 return; 126 } 127 128 void CMBitMap::markRange(MemRegion mr) { 129 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 130 assert(!mr.is_empty(), "unexpected empty region"); 131 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 132 ((HeapWord *) mr.end())), 133 "markRange memory region end is not card aligned"); 134 // convert address range into offset range 135 _bm.at_put_range(heapWordToOffset(mr.start()), 136 heapWordToOffset(mr.end()), true); 137 } 138 139 void CMBitMap::clearRange(MemRegion mr) { 140 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 141 assert(!mr.is_empty(), "unexpected empty region"); 142 // convert address range into offset range 143 _bm.at_put_range(heapWordToOffset(mr.start()), 144 heapWordToOffset(mr.end()), false); 145 } 146 147 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 148 HeapWord* end_addr) { 149 HeapWord* start = getNextMarkedWordAddress(addr); 150 start = MIN2(start, end_addr); 151 HeapWord* end = getNextUnmarkedWordAddress(start); 152 end = MIN2(end, end_addr); 153 assert(start <= end, "Consistency check"); 154 MemRegion mr(start, end); 155 if (!mr.is_empty()) { 156 clearRange(mr); 157 } 158 return mr; 159 } 160 161 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 162 _base(NULL), _cm(cm) 163 #ifdef ASSERT 164 , _drain_in_progress(false) 165 , _drain_in_progress_yields(false) 166 #endif 167 {} 168 169 void CMMarkStack::allocate(size_t size) { 170 _base = NEW_C_HEAP_ARRAY(oop, size, mtGC); 171 if (_base == NULL) { 172 vm_exit_during_initialization("Failed to allocate CM region mark stack"); 173 } 174 _index = 0; 175 _capacity = (jint) size; 176 _saved_index = -1; 177 NOT_PRODUCT(_max_depth = 0); 178 } 179 180 CMMarkStack::~CMMarkStack() { 181 if (_base != NULL) { 182 FREE_C_HEAP_ARRAY(oop, _base, mtGC); 183 } 184 } 185 186 void CMMarkStack::par_push(oop ptr) { 187 while (true) { 188 if (isFull()) { 189 _overflow = true; 190 return; 191 } 192 // Otherwise... 193 jint index = _index; 194 jint next_index = index+1; 195 jint res = Atomic::cmpxchg(next_index, &_index, index); 196 if (res == index) { 197 _base[index] = ptr; 198 // Note that we don't maintain this atomically. We could, but it 199 // doesn't seem necessary. 200 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 201 return; 202 } 203 // Otherwise, we need to try again. 204 } 205 } 206 207 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 208 while (true) { 209 if (isFull()) { 210 _overflow = true; 211 return; 212 } 213 // Otherwise... 214 jint index = _index; 215 jint next_index = index + n; 216 if (next_index > _capacity) { 217 _overflow = true; 218 return; 219 } 220 jint res = Atomic::cmpxchg(next_index, &_index, index); 221 if (res == index) { 222 for (int i = 0; i < n; i++) { 223 int ind = index + i; 224 assert(ind < _capacity, "By overflow test above."); 225 _base[ind] = ptr_arr[i]; 226 } 227 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 228 return; 229 } 230 // Otherwise, we need to try again. 231 } 232 } 233 234 235 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 236 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 237 jint start = _index; 238 jint next_index = start + n; 239 if (next_index > _capacity) { 240 _overflow = true; 241 return; 242 } 243 // Otherwise. 244 _index = next_index; 245 for (int i = 0; i < n; i++) { 246 int ind = start + i; 247 assert(ind < _capacity, "By overflow test above."); 248 _base[ind] = ptr_arr[i]; 249 } 250 } 251 252 253 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 254 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 255 jint index = _index; 256 if (index == 0) { 257 *n = 0; 258 return false; 259 } else { 260 int k = MIN2(max, index); 261 jint new_ind = index - k; 262 for (int j = 0; j < k; j++) { 263 ptr_arr[j] = _base[new_ind + j]; 264 } 265 _index = new_ind; 266 *n = k; 267 return true; 268 } 269 } 270 271 template<class OopClosureClass> 272 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 273 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 274 || SafepointSynchronize::is_at_safepoint(), 275 "Drain recursion must be yield-safe."); 276 bool res = true; 277 debug_only(_drain_in_progress = true); 278 debug_only(_drain_in_progress_yields = yield_after); 279 while (!isEmpty()) { 280 oop newOop = pop(); 281 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 282 assert(newOop->is_oop(), "Expected an oop"); 283 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 284 "only grey objects on this stack"); 285 newOop->oop_iterate(cl); 286 if (yield_after && _cm->do_yield_check()) { 287 res = false; 288 break; 289 } 290 } 291 debug_only(_drain_in_progress = false); 292 return res; 293 } 294 295 void CMMarkStack::note_start_of_gc() { 296 assert(_saved_index == -1, 297 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 298 _saved_index = _index; 299 } 300 301 void CMMarkStack::note_end_of_gc() { 302 // This is intentionally a guarantee, instead of an assert. If we 303 // accidentally add something to the mark stack during GC, it 304 // will be a correctness issue so it's better if we crash. we'll 305 // only check this once per GC anyway, so it won't be a performance 306 // issue in any way. 307 guarantee(_saved_index == _index, 308 err_msg("saved index: %d index: %d", _saved_index, _index)); 309 _saved_index = -1; 310 } 311 312 void CMMarkStack::oops_do(OopClosure* f) { 313 assert(_saved_index == _index, 314 err_msg("saved index: %d index: %d", _saved_index, _index)); 315 for (int i = 0; i < _index; i += 1) { 316 f->do_oop(&_base[i]); 317 } 318 } 319 320 bool ConcurrentMark::not_yet_marked(oop obj) const { 321 return (_g1h->is_obj_ill(obj) 322 || (_g1h->is_in_permanent(obj) 323 && !nextMarkBitMap()->isMarked((HeapWord*)obj))); 324 } 325 326 CMRootRegions::CMRootRegions() : 327 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 328 _should_abort(false), _next_survivor(NULL) { } 329 330 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 331 _young_list = g1h->young_list(); 332 _cm = cm; 333 } 334 335 void CMRootRegions::prepare_for_scan() { 336 assert(!scan_in_progress(), "pre-condition"); 337 338 // Currently, only survivors can be root regions. 339 assert(_next_survivor == NULL, "pre-condition"); 340 _next_survivor = _young_list->first_survivor_region(); 341 _scan_in_progress = (_next_survivor != NULL); 342 _should_abort = false; 343 } 344 345 HeapRegion* CMRootRegions::claim_next() { 346 if (_should_abort) { 347 // If someone has set the should_abort flag, we return NULL to 348 // force the caller to bail out of their loop. 349 return NULL; 350 } 351 352 // Currently, only survivors can be root regions. 353 HeapRegion* res = _next_survivor; 354 if (res != NULL) { 355 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 356 // Read it again in case it changed while we were waiting for the lock. 357 res = _next_survivor; 358 if (res != NULL) { 359 if (res == _young_list->last_survivor_region()) { 360 // We just claimed the last survivor so store NULL to indicate 361 // that we're done. 362 _next_survivor = NULL; 363 } else { 364 _next_survivor = res->get_next_young_region(); 365 } 366 } else { 367 // Someone else claimed the last survivor while we were trying 368 // to take the lock so nothing else to do. 369 } 370 } 371 assert(res == NULL || res->is_survivor(), "post-condition"); 372 373 return res; 374 } 375 376 void CMRootRegions::scan_finished() { 377 assert(scan_in_progress(), "pre-condition"); 378 379 // Currently, only survivors can be root regions. 380 if (!_should_abort) { 381 assert(_next_survivor == NULL, "we should have claimed all survivors"); 382 } 383 _next_survivor = NULL; 384 385 { 386 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 387 _scan_in_progress = false; 388 RootRegionScan_lock->notify_all(); 389 } 390 } 391 392 bool CMRootRegions::wait_until_scan_finished() { 393 if (!scan_in_progress()) return false; 394 395 { 396 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 397 while (scan_in_progress()) { 398 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 399 } 400 } 401 return true; 402 } 403 404 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 405 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 406 #endif // _MSC_VER 407 408 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 409 return MAX2((n_par_threads + 2) / 4, 1U); 410 } 411 412 ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) : 413 _markBitMap1(rs, MinObjAlignment - 1), 414 _markBitMap2(rs, MinObjAlignment - 1), 415 416 _parallel_marking_threads(0), 417 _max_parallel_marking_threads(0), 418 _sleep_factor(0.0), 419 _marking_task_overhead(1.0), 420 _cleanup_sleep_factor(0.0), 421 _cleanup_task_overhead(1.0), 422 _cleanup_list("Cleanup List"), 423 _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/), 424 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 425 CardTableModRefBS::card_shift, 426 false /* in_resource_area*/), 427 428 _prevMarkBitMap(&_markBitMap1), 429 _nextMarkBitMap(&_markBitMap2), 430 431 _markStack(this), 432 // _finger set in set_non_marking_state 433 434 _max_task_num(MAX2((uint)ParallelGCThreads, 1U)), 435 // _active_tasks set in set_non_marking_state 436 // _tasks set inside the constructor 437 _task_queues(new CMTaskQueueSet((int) _max_task_num)), 438 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 439 440 _has_overflown(false), 441 _concurrent(false), 442 _has_aborted(false), 443 _restart_for_overflow(false), 444 _concurrent_marking_in_progress(false), 445 446 // _verbose_level set below 447 448 _init_times(), 449 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 450 _cleanup_times(), 451 _total_counting_time(0.0), 452 _total_rs_scrub_time(0.0), 453 454 _parallel_workers(NULL), 455 456 _count_card_bitmaps(NULL), 457 _count_marked_bytes(NULL) { 458 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 459 if (verbose_level < no_verbose) { 460 verbose_level = no_verbose; 461 } 462 if (verbose_level > high_verbose) { 463 verbose_level = high_verbose; 464 } 465 _verbose_level = verbose_level; 466 467 if (verbose_low()) { 468 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 469 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 470 } 471 472 _markStack.allocate(MarkStackSize); 473 474 // Create & start a ConcurrentMark thread. 475 _cmThread = new ConcurrentMarkThread(this); 476 assert(cmThread() != NULL, "CM Thread should have been created"); 477 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 478 479 _g1h = G1CollectedHeap::heap(); 480 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 481 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 482 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 483 484 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 485 satb_qs.set_buffer_size(G1SATBBufferSize); 486 487 _root_regions.init(_g1h, this); 488 489 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num, mtGC); 490 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num, mtGC); 491 492 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_task_num, mtGC); 493 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num, mtGC); 494 495 BitMap::idx_t card_bm_size = _card_bm.size(); 496 497 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 498 _active_tasks = _max_task_num; 499 for (int i = 0; i < (int) _max_task_num; ++i) { 500 CMTaskQueue* task_queue = new CMTaskQueue(); 501 task_queue->initialize(); 502 _task_queues->register_queue(i, task_queue); 503 504 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 505 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC); 506 507 _tasks[i] = new CMTask(i, this, 508 _count_marked_bytes[i], 509 &_count_card_bitmaps[i], 510 task_queue, _task_queues); 511 512 _accum_task_vtime[i] = 0.0; 513 } 514 515 // Calculate the card number for the bottom of the heap. Used 516 // in biasing indexes into the accounting card bitmaps. 517 _heap_bottom_card_num = 518 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 519 CardTableModRefBS::card_shift); 520 521 // Clear all the liveness counting data 522 clear_all_count_data(); 523 524 if (ConcGCThreads > ParallelGCThreads) { 525 vm_exit_during_initialization("Can't have more ConcGCThreads " 526 "than ParallelGCThreads."); 527 } 528 if (ParallelGCThreads == 0) { 529 // if we are not running with any parallel GC threads we will not 530 // spawn any marking threads either 531 _parallel_marking_threads = 0; 532 _max_parallel_marking_threads = 0; 533 _sleep_factor = 0.0; 534 _marking_task_overhead = 1.0; 535 } else { 536 if (ConcGCThreads > 0) { 537 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 538 // if both are set 539 540 _parallel_marking_threads = (uint) ConcGCThreads; 541 _max_parallel_marking_threads = _parallel_marking_threads; 542 _sleep_factor = 0.0; 543 _marking_task_overhead = 1.0; 544 } else if (G1MarkingOverheadPercent > 0) { 545 // we will calculate the number of parallel marking threads 546 // based on a target overhead with respect to the soft real-time 547 // goal 548 549 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 550 double overall_cm_overhead = 551 (double) MaxGCPauseMillis * marking_overhead / 552 (double) GCPauseIntervalMillis; 553 double cpu_ratio = 1.0 / (double) os::processor_count(); 554 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 555 double marking_task_overhead = 556 overall_cm_overhead / marking_thread_num * 557 (double) os::processor_count(); 558 double sleep_factor = 559 (1.0 - marking_task_overhead) / marking_task_overhead; 560 561 _parallel_marking_threads = (uint) marking_thread_num; 562 _max_parallel_marking_threads = _parallel_marking_threads; 563 _sleep_factor = sleep_factor; 564 _marking_task_overhead = marking_task_overhead; 565 } else { 566 _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads); 567 _max_parallel_marking_threads = _parallel_marking_threads; 568 _sleep_factor = 0.0; 569 _marking_task_overhead = 1.0; 570 } 571 572 if (parallel_marking_threads() > 1) { 573 _cleanup_task_overhead = 1.0; 574 } else { 575 _cleanup_task_overhead = marking_task_overhead(); 576 } 577 _cleanup_sleep_factor = 578 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 579 580 #if 0 581 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 582 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 583 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 584 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 585 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 586 #endif 587 588 guarantee(parallel_marking_threads() > 0, "peace of mind"); 589 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 590 _max_parallel_marking_threads, false, true); 591 if (_parallel_workers == NULL) { 592 vm_exit_during_initialization("Failed necessary allocation."); 593 } else { 594 _parallel_workers->initialize_workers(); 595 } 596 } 597 598 // so that the call below can read a sensible value 599 _heap_start = (HeapWord*) rs.base(); 600 set_non_marking_state(); 601 } 602 603 void ConcurrentMark::update_g1_committed(bool force) { 604 // If concurrent marking is not in progress, then we do not need to 605 // update _heap_end. 606 if (!concurrent_marking_in_progress() && !force) return; 607 608 MemRegion committed = _g1h->g1_committed(); 609 assert(committed.start() == _heap_start, "start shouldn't change"); 610 HeapWord* new_end = committed.end(); 611 if (new_end > _heap_end) { 612 // The heap has been expanded. 613 614 _heap_end = new_end; 615 } 616 // Notice that the heap can also shrink. However, this only happens 617 // during a Full GC (at least currently) and the entire marking 618 // phase will bail out and the task will not be restarted. So, let's 619 // do nothing. 620 } 621 622 void ConcurrentMark::reset() { 623 // Starting values for these two. This should be called in a STW 624 // phase. CM will be notified of any future g1_committed expansions 625 // will be at the end of evacuation pauses, when tasks are 626 // inactive. 627 MemRegion committed = _g1h->g1_committed(); 628 _heap_start = committed.start(); 629 _heap_end = committed.end(); 630 631 // Separated the asserts so that we know which one fires. 632 assert(_heap_start != NULL, "heap bounds should look ok"); 633 assert(_heap_end != NULL, "heap bounds should look ok"); 634 assert(_heap_start < _heap_end, "heap bounds should look ok"); 635 636 // Reset all the marking data structures and any necessary flags 637 reset_marking_state(); 638 639 if (verbose_low()) { 640 gclog_or_tty->print_cr("[global] resetting"); 641 } 642 643 // We do reset all of them, since different phases will use 644 // different number of active threads. So, it's easiest to have all 645 // of them ready. 646 for (int i = 0; i < (int) _max_task_num; ++i) { 647 _tasks[i]->reset(_nextMarkBitMap); 648 } 649 650 // we need this to make sure that the flag is on during the evac 651 // pause with initial mark piggy-backed 652 set_concurrent_marking_in_progress(); 653 } 654 655 656 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 657 _markStack.setEmpty(); 658 _markStack.clear_overflow(); 659 if (clear_overflow) { 660 clear_has_overflown(); 661 } else { 662 assert(has_overflown(), "pre-condition"); 663 } 664 _finger = _heap_start; 665 666 for (uint i = 0; i < _max_task_num; ++i) { 667 CMTaskQueue* queue = _task_queues->queue(i); 668 queue->set_empty(); 669 } 670 } 671 672 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) { 673 assert(active_tasks <= _max_task_num, "we should not have more"); 674 675 _active_tasks = active_tasks; 676 // Need to update the three data structures below according to the 677 // number of active threads for this phase. 678 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 679 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 680 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 681 682 _concurrent = concurrent; 683 // We propagate this to all tasks, not just the active ones. 684 for (int i = 0; i < (int) _max_task_num; ++i) 685 _tasks[i]->set_concurrent(concurrent); 686 687 if (concurrent) { 688 set_concurrent_marking_in_progress(); 689 } else { 690 // We currently assume that the concurrent flag has been set to 691 // false before we start remark. At this point we should also be 692 // in a STW phase. 693 assert(!concurrent_marking_in_progress(), "invariant"); 694 assert(_finger == _heap_end, "only way to get here"); 695 update_g1_committed(true); 696 } 697 } 698 699 void ConcurrentMark::set_non_marking_state() { 700 // We set the global marking state to some default values when we're 701 // not doing marking. 702 reset_marking_state(); 703 _active_tasks = 0; 704 clear_concurrent_marking_in_progress(); 705 } 706 707 ConcurrentMark::~ConcurrentMark() { 708 // The ConcurrentMark instance is never freed. 709 ShouldNotReachHere(); 710 } 711 712 void ConcurrentMark::clearNextBitmap() { 713 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 714 G1CollectorPolicy* g1p = g1h->g1_policy(); 715 716 // Make sure that the concurrent mark thread looks to still be in 717 // the current cycle. 718 guarantee(cmThread()->during_cycle(), "invariant"); 719 720 // We are finishing up the current cycle by clearing the next 721 // marking bitmap and getting it ready for the next cycle. During 722 // this time no other cycle can start. So, let's make sure that this 723 // is the case. 724 guarantee(!g1h->mark_in_progress(), "invariant"); 725 726 // clear the mark bitmap (no grey objects to start with). 727 // We need to do this in chunks and offer to yield in between 728 // each chunk. 729 HeapWord* start = _nextMarkBitMap->startWord(); 730 HeapWord* end = _nextMarkBitMap->endWord(); 731 HeapWord* cur = start; 732 size_t chunkSize = M; 733 while (cur < end) { 734 HeapWord* next = cur + chunkSize; 735 if (next > end) { 736 next = end; 737 } 738 MemRegion mr(cur,next); 739 _nextMarkBitMap->clearRange(mr); 740 cur = next; 741 do_yield_check(); 742 743 // Repeat the asserts from above. We'll do them as asserts here to 744 // minimize their overhead on the product. However, we'll have 745 // them as guarantees at the beginning / end of the bitmap 746 // clearing to get some checking in the product. 747 assert(cmThread()->during_cycle(), "invariant"); 748 assert(!g1h->mark_in_progress(), "invariant"); 749 } 750 751 // Clear the liveness counting data 752 clear_all_count_data(); 753 754 // Repeat the asserts from above. 755 guarantee(cmThread()->during_cycle(), "invariant"); 756 guarantee(!g1h->mark_in_progress(), "invariant"); 757 } 758 759 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 760 public: 761 bool doHeapRegion(HeapRegion* r) { 762 if (!r->continuesHumongous()) { 763 r->note_start_of_marking(); 764 } 765 return false; 766 } 767 }; 768 769 void ConcurrentMark::checkpointRootsInitialPre() { 770 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 771 G1CollectorPolicy* g1p = g1h->g1_policy(); 772 773 _has_aborted = false; 774 775 #ifndef PRODUCT 776 if (G1PrintReachableAtInitialMark) { 777 print_reachable("at-cycle-start", 778 VerifyOption_G1UsePrevMarking, true /* all */); 779 } 780 #endif 781 782 // Initialise marking structures. This has to be done in a STW phase. 783 reset(); 784 785 // For each region note start of marking. 786 NoteStartOfMarkHRClosure startcl; 787 g1h->heap_region_iterate(&startcl); 788 } 789 790 791 void ConcurrentMark::checkpointRootsInitialPost() { 792 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 793 794 // If we force an overflow during remark, the remark operation will 795 // actually abort and we'll restart concurrent marking. If we always 796 // force an oveflow during remark we'll never actually complete the 797 // marking phase. So, we initilize this here, at the start of the 798 // cycle, so that at the remaining overflow number will decrease at 799 // every remark and we'll eventually not need to cause one. 800 force_overflow_stw()->init(); 801 802 // Start Concurrent Marking weak-reference discovery. 803 ReferenceProcessor* rp = g1h->ref_processor_cm(); 804 // enable ("weak") refs discovery 805 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 806 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 807 808 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 809 // This is the start of the marking cycle, we're expected all 810 // threads to have SATB queues with active set to false. 811 satb_mq_set.set_active_all_threads(true, /* new active value */ 812 false /* expected_active */); 813 814 _root_regions.prepare_for_scan(); 815 816 // update_g1_committed() will be called at the end of an evac pause 817 // when marking is on. So, it's also called at the end of the 818 // initial-mark pause to update the heap end, if the heap expands 819 // during it. No need to call it here. 820 } 821 822 /* 823 * Notice that in the next two methods, we actually leave the STS 824 * during the barrier sync and join it immediately afterwards. If we 825 * do not do this, the following deadlock can occur: one thread could 826 * be in the barrier sync code, waiting for the other thread to also 827 * sync up, whereas another one could be trying to yield, while also 828 * waiting for the other threads to sync up too. 829 * 830 * Note, however, that this code is also used during remark and in 831 * this case we should not attempt to leave / enter the STS, otherwise 832 * we'll either hit an asseert (debug / fastdebug) or deadlock 833 * (product). So we should only leave / enter the STS if we are 834 * operating concurrently. 835 * 836 * Because the thread that does the sync barrier has left the STS, it 837 * is possible to be suspended for a Full GC or an evacuation pause 838 * could occur. This is actually safe, since the entering the sync 839 * barrier is one of the last things do_marking_step() does, and it 840 * doesn't manipulate any data structures afterwards. 841 */ 842 843 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 844 if (verbose_low()) { 845 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 846 } 847 848 if (concurrent()) { 849 ConcurrentGCThread::stsLeave(); 850 } 851 _first_overflow_barrier_sync.enter(); 852 if (concurrent()) { 853 ConcurrentGCThread::stsJoin(); 854 } 855 // at this point everyone should have synced up and not be doing any 856 // more work 857 858 if (verbose_low()) { 859 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 860 } 861 862 // let task 0 do this 863 if (task_num == 0) { 864 // task 0 is responsible for clearing the global data structures 865 // We should be here because of an overflow. During STW we should 866 // not clear the overflow flag since we rely on it being true when 867 // we exit this method to abort the pause and restart concurent 868 // marking. 869 reset_marking_state(concurrent() /* clear_overflow */); 870 force_overflow()->update(); 871 872 if (G1Log::fine()) { 873 gclog_or_tty->date_stamp(PrintGCDateStamps); 874 gclog_or_tty->stamp(PrintGCTimeStamps); 875 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 876 } 877 } 878 879 // after this, each task should reset its own data structures then 880 // then go into the second barrier 881 } 882 883 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 884 if (verbose_low()) { 885 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 886 } 887 888 if (concurrent()) { 889 ConcurrentGCThread::stsLeave(); 890 } 891 _second_overflow_barrier_sync.enter(); 892 if (concurrent()) { 893 ConcurrentGCThread::stsJoin(); 894 } 895 // at this point everything should be re-initialised and ready to go 896 897 if (verbose_low()) { 898 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 899 } 900 } 901 902 #ifndef PRODUCT 903 void ForceOverflowSettings::init() { 904 _num_remaining = G1ConcMarkForceOverflow; 905 _force = false; 906 update(); 907 } 908 909 void ForceOverflowSettings::update() { 910 if (_num_remaining > 0) { 911 _num_remaining -= 1; 912 _force = true; 913 } else { 914 _force = false; 915 } 916 } 917 918 bool ForceOverflowSettings::should_force() { 919 if (_force) { 920 _force = false; 921 return true; 922 } else { 923 return false; 924 } 925 } 926 #endif // !PRODUCT 927 928 class CMConcurrentMarkingTask: public AbstractGangTask { 929 private: 930 ConcurrentMark* _cm; 931 ConcurrentMarkThread* _cmt; 932 933 public: 934 void work(uint worker_id) { 935 assert(Thread::current()->is_ConcurrentGC_thread(), 936 "this should only be done by a conc GC thread"); 937 ResourceMark rm; 938 939 double start_vtime = os::elapsedVTime(); 940 941 ConcurrentGCThread::stsJoin(); 942 943 assert(worker_id < _cm->active_tasks(), "invariant"); 944 CMTask* the_task = _cm->task(worker_id); 945 the_task->record_start_time(); 946 if (!_cm->has_aborted()) { 947 do { 948 double start_vtime_sec = os::elapsedVTime(); 949 double start_time_sec = os::elapsedTime(); 950 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 951 952 the_task->do_marking_step(mark_step_duration_ms, 953 true /* do_stealing */, 954 true /* do_termination */); 955 956 double end_time_sec = os::elapsedTime(); 957 double end_vtime_sec = os::elapsedVTime(); 958 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 959 double elapsed_time_sec = end_time_sec - start_time_sec; 960 _cm->clear_has_overflown(); 961 962 bool ret = _cm->do_yield_check(worker_id); 963 964 jlong sleep_time_ms; 965 if (!_cm->has_aborted() && the_task->has_aborted()) { 966 sleep_time_ms = 967 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 968 ConcurrentGCThread::stsLeave(); 969 os::sleep(Thread::current(), sleep_time_ms, false); 970 ConcurrentGCThread::stsJoin(); 971 } 972 double end_time2_sec = os::elapsedTime(); 973 double elapsed_time2_sec = end_time2_sec - start_time_sec; 974 975 #if 0 976 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 977 "overhead %1.4lf", 978 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 979 the_task->conc_overhead(os::elapsedTime()) * 8.0); 980 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 981 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 982 #endif 983 } while (!_cm->has_aborted() && the_task->has_aborted()); 984 } 985 the_task->record_end_time(); 986 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 987 988 ConcurrentGCThread::stsLeave(); 989 990 double end_vtime = os::elapsedVTime(); 991 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 992 } 993 994 CMConcurrentMarkingTask(ConcurrentMark* cm, 995 ConcurrentMarkThread* cmt) : 996 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 997 998 ~CMConcurrentMarkingTask() { } 999 }; 1000 1001 // Calculates the number of active workers for a concurrent 1002 // phase. 1003 uint ConcurrentMark::calc_parallel_marking_threads() { 1004 if (G1CollectedHeap::use_parallel_gc_threads()) { 1005 uint n_conc_workers = 0; 1006 if (!UseDynamicNumberOfGCThreads || 1007 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1008 !ForceDynamicNumberOfGCThreads)) { 1009 n_conc_workers = max_parallel_marking_threads(); 1010 } else { 1011 n_conc_workers = 1012 AdaptiveSizePolicy::calc_default_active_workers( 1013 max_parallel_marking_threads(), 1014 1, /* Minimum workers */ 1015 parallel_marking_threads(), 1016 Threads::number_of_non_daemon_threads()); 1017 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1018 // that scaling has already gone into "_max_parallel_marking_threads". 1019 } 1020 assert(n_conc_workers > 0, "Always need at least 1"); 1021 return n_conc_workers; 1022 } 1023 // If we are not running with any parallel GC threads we will not 1024 // have spawned any marking threads either. Hence the number of 1025 // concurrent workers should be 0. 1026 return 0; 1027 } 1028 1029 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1030 // Currently, only survivors can be root regions. 1031 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1032 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1033 1034 const uintx interval = PrefetchScanIntervalInBytes; 1035 HeapWord* curr = hr->bottom(); 1036 const HeapWord* end = hr->top(); 1037 while (curr < end) { 1038 Prefetch::read(curr, interval); 1039 oop obj = oop(curr); 1040 int size = obj->oop_iterate(&cl); 1041 assert(size == obj->size(), "sanity"); 1042 curr += size; 1043 } 1044 } 1045 1046 class CMRootRegionScanTask : public AbstractGangTask { 1047 private: 1048 ConcurrentMark* _cm; 1049 1050 public: 1051 CMRootRegionScanTask(ConcurrentMark* cm) : 1052 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1053 1054 void work(uint worker_id) { 1055 assert(Thread::current()->is_ConcurrentGC_thread(), 1056 "this should only be done by a conc GC thread"); 1057 1058 CMRootRegions* root_regions = _cm->root_regions(); 1059 HeapRegion* hr = root_regions->claim_next(); 1060 while (hr != NULL) { 1061 _cm->scanRootRegion(hr, worker_id); 1062 hr = root_regions->claim_next(); 1063 } 1064 } 1065 }; 1066 1067 void ConcurrentMark::scanRootRegions() { 1068 // scan_in_progress() will have been set to true only if there was 1069 // at least one root region to scan. So, if it's false, we 1070 // should not attempt to do any further work. 1071 if (root_regions()->scan_in_progress()) { 1072 _parallel_marking_threads = calc_parallel_marking_threads(); 1073 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1074 "Maximum number of marking threads exceeded"); 1075 uint active_workers = MAX2(1U, parallel_marking_threads()); 1076 1077 CMRootRegionScanTask task(this); 1078 if (use_parallel_marking_threads()) { 1079 _parallel_workers->set_active_workers((int) active_workers); 1080 _parallel_workers->run_task(&task); 1081 } else { 1082 task.work(0); 1083 } 1084 1085 // It's possible that has_aborted() is true here without actually 1086 // aborting the survivor scan earlier. This is OK as it's 1087 // mainly used for sanity checking. 1088 root_regions()->scan_finished(); 1089 } 1090 } 1091 1092 void ConcurrentMark::markFromRoots() { 1093 // we might be tempted to assert that: 1094 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1095 // "inconsistent argument?"); 1096 // However that wouldn't be right, because it's possible that 1097 // a safepoint is indeed in progress as a younger generation 1098 // stop-the-world GC happens even as we mark in this generation. 1099 1100 _restart_for_overflow = false; 1101 force_overflow_conc()->init(); 1102 1103 // _g1h has _n_par_threads 1104 _parallel_marking_threads = calc_parallel_marking_threads(); 1105 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1106 "Maximum number of marking threads exceeded"); 1107 1108 uint active_workers = MAX2(1U, parallel_marking_threads()); 1109 1110 // Parallel task terminator is set in "set_phase()" 1111 set_phase(active_workers, true /* concurrent */); 1112 1113 CMConcurrentMarkingTask markingTask(this, cmThread()); 1114 if (use_parallel_marking_threads()) { 1115 _parallel_workers->set_active_workers((int)active_workers); 1116 // Don't set _n_par_threads because it affects MT in proceess_strong_roots() 1117 // and the decisions on that MT processing is made elsewhere. 1118 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1119 _parallel_workers->run_task(&markingTask); 1120 } else { 1121 markingTask.work(0); 1122 } 1123 print_stats(); 1124 } 1125 1126 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1127 // world is stopped at this checkpoint 1128 assert(SafepointSynchronize::is_at_safepoint(), 1129 "world should be stopped"); 1130 1131 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1132 1133 // If a full collection has happened, we shouldn't do this. 1134 if (has_aborted()) { 1135 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1136 return; 1137 } 1138 1139 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1140 1141 if (VerifyDuringGC) { 1142 HandleMark hm; // handle scope 1143 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1144 Universe::heap()->prepare_for_verify(); 1145 Universe::verify(/* silent */ false, 1146 /* option */ VerifyOption_G1UsePrevMarking); 1147 } 1148 1149 G1CollectorPolicy* g1p = g1h->g1_policy(); 1150 g1p->record_concurrent_mark_remark_start(); 1151 1152 double start = os::elapsedTime(); 1153 1154 checkpointRootsFinalWork(); 1155 1156 double mark_work_end = os::elapsedTime(); 1157 1158 weakRefsWork(clear_all_soft_refs); 1159 1160 if (has_overflown()) { 1161 // Oops. We overflowed. Restart concurrent marking. 1162 _restart_for_overflow = true; 1163 // Clear the marking state because we will be restarting 1164 // marking due to overflowing the global mark stack. 1165 reset_marking_state(); 1166 if (G1TraceMarkStackOverflow) { 1167 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1168 } 1169 } else { 1170 // Aggregate the per-task counting data that we have accumulated 1171 // while marking. 1172 aggregate_count_data(); 1173 1174 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1175 // We're done with marking. 1176 // This is the end of the marking cycle, we're expected all 1177 // threads to have SATB queues with active set to true. 1178 satb_mq_set.set_active_all_threads(false, /* new active value */ 1179 true /* expected_active */); 1180 1181 if (VerifyDuringGC) { 1182 HandleMark hm; // handle scope 1183 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1184 Universe::heap()->prepare_for_verify(); 1185 Universe::verify(/* silent */ false, 1186 /* option */ VerifyOption_G1UseNextMarking); 1187 } 1188 assert(!restart_for_overflow(), "sanity"); 1189 // Completely reset the marking state since marking completed 1190 set_non_marking_state(); 1191 } 1192 1193 #if VERIFY_OBJS_PROCESSED 1194 _scan_obj_cl.objs_processed = 0; 1195 ThreadLocalObjQueue::objs_enqueued = 0; 1196 #endif 1197 1198 // Statistics 1199 double now = os::elapsedTime(); 1200 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1201 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1202 _remark_times.add((now - start) * 1000.0); 1203 1204 g1p->record_concurrent_mark_remark_end(); 1205 } 1206 1207 // Base class of the closures that finalize and verify the 1208 // liveness counting data. 1209 class CMCountDataClosureBase: public HeapRegionClosure { 1210 protected: 1211 G1CollectedHeap* _g1h; 1212 ConcurrentMark* _cm; 1213 CardTableModRefBS* _ct_bs; 1214 1215 BitMap* _region_bm; 1216 BitMap* _card_bm; 1217 1218 // Takes a region that's not empty (i.e., it has at least one 1219 // live object in it and sets its corresponding bit on the region 1220 // bitmap to 1. If the region is "starts humongous" it will also set 1221 // to 1 the bits on the region bitmap that correspond to its 1222 // associated "continues humongous" regions. 1223 void set_bit_for_region(HeapRegion* hr) { 1224 assert(!hr->continuesHumongous(), "should have filtered those out"); 1225 1226 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1227 if (!hr->startsHumongous()) { 1228 // Normal (non-humongous) case: just set the bit. 1229 _region_bm->par_at_put(index, true); 1230 } else { 1231 // Starts humongous case: calculate how many regions are part of 1232 // this humongous region and then set the bit range. 1233 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1234 _region_bm->par_at_put_range(index, end_index, true); 1235 } 1236 } 1237 1238 public: 1239 CMCountDataClosureBase(G1CollectedHeap* g1h, 1240 BitMap* region_bm, BitMap* card_bm): 1241 _g1h(g1h), _cm(g1h->concurrent_mark()), 1242 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 1243 _region_bm(region_bm), _card_bm(card_bm) { } 1244 }; 1245 1246 // Closure that calculates the # live objects per region. Used 1247 // for verification purposes during the cleanup pause. 1248 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1249 CMBitMapRO* _bm; 1250 size_t _region_marked_bytes; 1251 1252 public: 1253 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1254 BitMap* region_bm, BitMap* card_bm) : 1255 CMCountDataClosureBase(g1h, region_bm, card_bm), 1256 _bm(bm), _region_marked_bytes(0) { } 1257 1258 bool doHeapRegion(HeapRegion* hr) { 1259 1260 if (hr->continuesHumongous()) { 1261 // We will ignore these here and process them when their 1262 // associated "starts humongous" region is processed (see 1263 // set_bit_for_heap_region()). Note that we cannot rely on their 1264 // associated "starts humongous" region to have their bit set to 1265 // 1 since, due to the region chunking in the parallel region 1266 // iteration, a "continues humongous" region might be visited 1267 // before its associated "starts humongous". 1268 return false; 1269 } 1270 1271 HeapWord* ntams = hr->next_top_at_mark_start(); 1272 HeapWord* start = hr->bottom(); 1273 1274 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1275 err_msg("Preconditions not met - " 1276 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT, 1277 start, ntams, hr->end())); 1278 1279 // Find the first marked object at or after "start". 1280 start = _bm->getNextMarkedWordAddress(start, ntams); 1281 1282 size_t marked_bytes = 0; 1283 1284 while (start < ntams) { 1285 oop obj = oop(start); 1286 int obj_sz = obj->size(); 1287 HeapWord* obj_end = start + obj_sz; 1288 1289 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1290 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1291 1292 // Note: if we're looking at the last region in heap - obj_end 1293 // could be actually just beyond the end of the heap; end_idx 1294 // will then correspond to a (non-existent) card that is also 1295 // just beyond the heap. 1296 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1297 // end of object is not card aligned - increment to cover 1298 // all the cards spanned by the object 1299 end_idx += 1; 1300 } 1301 1302 // Set the bits in the card BM for the cards spanned by this object. 1303 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1304 1305 // Add the size of this object to the number of marked bytes. 1306 marked_bytes += (size_t)obj_sz * HeapWordSize; 1307 1308 // Find the next marked object after this one. 1309 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1310 } 1311 1312 // Mark the allocated-since-marking portion... 1313 HeapWord* top = hr->top(); 1314 if (ntams < top) { 1315 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1316 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1317 1318 // Note: if we're looking at the last region in heap - top 1319 // could be actually just beyond the end of the heap; end_idx 1320 // will then correspond to a (non-existent) card that is also 1321 // just beyond the heap. 1322 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1323 // end of object is not card aligned - increment to cover 1324 // all the cards spanned by the object 1325 end_idx += 1; 1326 } 1327 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1328 1329 // This definitely means the region has live objects. 1330 set_bit_for_region(hr); 1331 } 1332 1333 // Update the live region bitmap. 1334 if (marked_bytes > 0) { 1335 set_bit_for_region(hr); 1336 } 1337 1338 // Set the marked bytes for the current region so that 1339 // it can be queried by a calling verificiation routine 1340 _region_marked_bytes = marked_bytes; 1341 1342 return false; 1343 } 1344 1345 size_t region_marked_bytes() const { return _region_marked_bytes; } 1346 }; 1347 1348 // Heap region closure used for verifying the counting data 1349 // that was accumulated concurrently and aggregated during 1350 // the remark pause. This closure is applied to the heap 1351 // regions during the STW cleanup pause. 1352 1353 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1354 G1CollectedHeap* _g1h; 1355 ConcurrentMark* _cm; 1356 CalcLiveObjectsClosure _calc_cl; 1357 BitMap* _region_bm; // Region BM to be verified 1358 BitMap* _card_bm; // Card BM to be verified 1359 bool _verbose; // verbose output? 1360 1361 BitMap* _exp_region_bm; // Expected Region BM values 1362 BitMap* _exp_card_bm; // Expected card BM values 1363 1364 int _failures; 1365 1366 public: 1367 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1368 BitMap* region_bm, 1369 BitMap* card_bm, 1370 BitMap* exp_region_bm, 1371 BitMap* exp_card_bm, 1372 bool verbose) : 1373 _g1h(g1h), _cm(g1h->concurrent_mark()), 1374 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1375 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1376 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1377 _failures(0) { } 1378 1379 int failures() const { return _failures; } 1380 1381 bool doHeapRegion(HeapRegion* hr) { 1382 if (hr->continuesHumongous()) { 1383 // We will ignore these here and process them when their 1384 // associated "starts humongous" region is processed (see 1385 // set_bit_for_heap_region()). Note that we cannot rely on their 1386 // associated "starts humongous" region to have their bit set to 1387 // 1 since, due to the region chunking in the parallel region 1388 // iteration, a "continues humongous" region might be visited 1389 // before its associated "starts humongous". 1390 return false; 1391 } 1392 1393 int failures = 0; 1394 1395 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1396 // this region and set the corresponding bits in the expected region 1397 // and card bitmaps. 1398 bool res = _calc_cl.doHeapRegion(hr); 1399 assert(res == false, "should be continuing"); 1400 1401 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1402 Mutex::_no_safepoint_check_flag); 1403 1404 // Verify the marked bytes for this region. 1405 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1406 size_t act_marked_bytes = hr->next_marked_bytes(); 1407 1408 // We're not OK if expected marked bytes > actual marked bytes. It means 1409 // we have missed accounting some objects during the actual marking. 1410 if (exp_marked_bytes > act_marked_bytes) { 1411 if (_verbose) { 1412 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1413 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1414 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1415 } 1416 failures += 1; 1417 } 1418 1419 // Verify the bit, for this region, in the actual and expected 1420 // (which was just calculated) region bit maps. 1421 // We're not OK if the bit in the calculated expected region 1422 // bitmap is set and the bit in the actual region bitmap is not. 1423 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1424 1425 bool expected = _exp_region_bm->at(index); 1426 bool actual = _region_bm->at(index); 1427 if (expected && !actual) { 1428 if (_verbose) { 1429 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1430 "expected: %s, actual: %s", 1431 hr->hrs_index(), 1432 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1433 } 1434 failures += 1; 1435 } 1436 1437 // Verify that the card bit maps for the cards spanned by the current 1438 // region match. We have an error if we have a set bit in the expected 1439 // bit map and the corresponding bit in the actual bitmap is not set. 1440 1441 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1442 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1443 1444 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1445 expected = _exp_card_bm->at(i); 1446 actual = _card_bm->at(i); 1447 1448 if (expected && !actual) { 1449 if (_verbose) { 1450 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1451 "expected: %s, actual: %s", 1452 hr->hrs_index(), i, 1453 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1454 } 1455 failures += 1; 1456 } 1457 } 1458 1459 if (failures > 0 && _verbose) { 1460 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1461 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1462 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(), 1463 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1464 } 1465 1466 _failures += failures; 1467 1468 // We could stop iteration over the heap when we 1469 // find the first violating region by returning true. 1470 return false; 1471 } 1472 }; 1473 1474 1475 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1476 protected: 1477 G1CollectedHeap* _g1h; 1478 ConcurrentMark* _cm; 1479 BitMap* _actual_region_bm; 1480 BitMap* _actual_card_bm; 1481 1482 uint _n_workers; 1483 1484 BitMap* _expected_region_bm; 1485 BitMap* _expected_card_bm; 1486 1487 int _failures; 1488 bool _verbose; 1489 1490 public: 1491 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1492 BitMap* region_bm, BitMap* card_bm, 1493 BitMap* expected_region_bm, BitMap* expected_card_bm) 1494 : AbstractGangTask("G1 verify final counting"), 1495 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1496 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1497 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1498 _failures(0), _verbose(false), 1499 _n_workers(0) { 1500 assert(VerifyDuringGC, "don't call this otherwise"); 1501 1502 // Use the value already set as the number of active threads 1503 // in the call to run_task(). 1504 if (G1CollectedHeap::use_parallel_gc_threads()) { 1505 assert( _g1h->workers()->active_workers() > 0, 1506 "Should have been previously set"); 1507 _n_workers = _g1h->workers()->active_workers(); 1508 } else { 1509 _n_workers = 1; 1510 } 1511 1512 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1513 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1514 1515 _verbose = _cm->verbose_medium(); 1516 } 1517 1518 void work(uint worker_id) { 1519 assert(worker_id < _n_workers, "invariant"); 1520 1521 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1522 _actual_region_bm, _actual_card_bm, 1523 _expected_region_bm, 1524 _expected_card_bm, 1525 _verbose); 1526 1527 if (G1CollectedHeap::use_parallel_gc_threads()) { 1528 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1529 worker_id, 1530 _n_workers, 1531 HeapRegion::VerifyCountClaimValue); 1532 } else { 1533 _g1h->heap_region_iterate(&verify_cl); 1534 } 1535 1536 Atomic::add(verify_cl.failures(), &_failures); 1537 } 1538 1539 int failures() const { return _failures; } 1540 }; 1541 1542 // Closure that finalizes the liveness counting data. 1543 // Used during the cleanup pause. 1544 // Sets the bits corresponding to the interval [NTAMS, top] 1545 // (which contains the implicitly live objects) in the 1546 // card liveness bitmap. Also sets the bit for each region, 1547 // containing live data, in the region liveness bitmap. 1548 1549 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1550 public: 1551 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1552 BitMap* region_bm, 1553 BitMap* card_bm) : 1554 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1555 1556 bool doHeapRegion(HeapRegion* hr) { 1557 1558 if (hr->continuesHumongous()) { 1559 // We will ignore these here and process them when their 1560 // associated "starts humongous" region is processed (see 1561 // set_bit_for_heap_region()). Note that we cannot rely on their 1562 // associated "starts humongous" region to have their bit set to 1563 // 1 since, due to the region chunking in the parallel region 1564 // iteration, a "continues humongous" region might be visited 1565 // before its associated "starts humongous". 1566 return false; 1567 } 1568 1569 HeapWord* ntams = hr->next_top_at_mark_start(); 1570 HeapWord* top = hr->top(); 1571 1572 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1573 1574 // Mark the allocated-since-marking portion... 1575 if (ntams < top) { 1576 // This definitely means the region has live objects. 1577 set_bit_for_region(hr); 1578 1579 // Now set the bits in the card bitmap for [ntams, top) 1580 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1581 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1582 1583 // Note: if we're looking at the last region in heap - top 1584 // could be actually just beyond the end of the heap; end_idx 1585 // will then correspond to a (non-existent) card that is also 1586 // just beyond the heap. 1587 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1588 // end of object is not card aligned - increment to cover 1589 // all the cards spanned by the object 1590 end_idx += 1; 1591 } 1592 1593 assert(end_idx <= _card_bm->size(), 1594 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1595 end_idx, _card_bm->size())); 1596 assert(start_idx < _card_bm->size(), 1597 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1598 start_idx, _card_bm->size())); 1599 1600 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1601 } 1602 1603 // Set the bit for the region if it contains live data 1604 if (hr->next_marked_bytes() > 0) { 1605 set_bit_for_region(hr); 1606 } 1607 1608 return false; 1609 } 1610 }; 1611 1612 class G1ParFinalCountTask: public AbstractGangTask { 1613 protected: 1614 G1CollectedHeap* _g1h; 1615 ConcurrentMark* _cm; 1616 BitMap* _actual_region_bm; 1617 BitMap* _actual_card_bm; 1618 1619 uint _n_workers; 1620 1621 public: 1622 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1623 : AbstractGangTask("G1 final counting"), 1624 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1625 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1626 _n_workers(0) { 1627 // Use the value already set as the number of active threads 1628 // in the call to run_task(). 1629 if (G1CollectedHeap::use_parallel_gc_threads()) { 1630 assert( _g1h->workers()->active_workers() > 0, 1631 "Should have been previously set"); 1632 _n_workers = _g1h->workers()->active_workers(); 1633 } else { 1634 _n_workers = 1; 1635 } 1636 } 1637 1638 void work(uint worker_id) { 1639 assert(worker_id < _n_workers, "invariant"); 1640 1641 FinalCountDataUpdateClosure final_update_cl(_g1h, 1642 _actual_region_bm, 1643 _actual_card_bm); 1644 1645 if (G1CollectedHeap::use_parallel_gc_threads()) { 1646 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1647 worker_id, 1648 _n_workers, 1649 HeapRegion::FinalCountClaimValue); 1650 } else { 1651 _g1h->heap_region_iterate(&final_update_cl); 1652 } 1653 } 1654 }; 1655 1656 class G1ParNoteEndTask; 1657 1658 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1659 G1CollectedHeap* _g1; 1660 int _worker_num; 1661 size_t _max_live_bytes; 1662 uint _regions_claimed; 1663 size_t _freed_bytes; 1664 FreeRegionList* _local_cleanup_list; 1665 OldRegionSet* _old_proxy_set; 1666 HumongousRegionSet* _humongous_proxy_set; 1667 HRRSCleanupTask* _hrrs_cleanup_task; 1668 double _claimed_region_time; 1669 double _max_region_time; 1670 1671 public: 1672 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1673 int worker_num, 1674 FreeRegionList* local_cleanup_list, 1675 OldRegionSet* old_proxy_set, 1676 HumongousRegionSet* humongous_proxy_set, 1677 HRRSCleanupTask* hrrs_cleanup_task) : 1678 _g1(g1), _worker_num(worker_num), 1679 _max_live_bytes(0), _regions_claimed(0), 1680 _freed_bytes(0), 1681 _claimed_region_time(0.0), _max_region_time(0.0), 1682 _local_cleanup_list(local_cleanup_list), 1683 _old_proxy_set(old_proxy_set), 1684 _humongous_proxy_set(humongous_proxy_set), 1685 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1686 1687 size_t freed_bytes() { return _freed_bytes; } 1688 1689 bool doHeapRegion(HeapRegion *hr) { 1690 if (hr->continuesHumongous()) { 1691 return false; 1692 } 1693 // We use a claim value of zero here because all regions 1694 // were claimed with value 1 in the FinalCount task. 1695 _g1->reset_gc_time_stamps(hr); 1696 double start = os::elapsedTime(); 1697 _regions_claimed++; 1698 hr->note_end_of_marking(); 1699 _max_live_bytes += hr->max_live_bytes(); 1700 _g1->free_region_if_empty(hr, 1701 &_freed_bytes, 1702 _local_cleanup_list, 1703 _old_proxy_set, 1704 _humongous_proxy_set, 1705 _hrrs_cleanup_task, 1706 true /* par */); 1707 double region_time = (os::elapsedTime() - start); 1708 _claimed_region_time += region_time; 1709 if (region_time > _max_region_time) { 1710 _max_region_time = region_time; 1711 } 1712 return false; 1713 } 1714 1715 size_t max_live_bytes() { return _max_live_bytes; } 1716 uint regions_claimed() { return _regions_claimed; } 1717 double claimed_region_time_sec() { return _claimed_region_time; } 1718 double max_region_time_sec() { return _max_region_time; } 1719 }; 1720 1721 class G1ParNoteEndTask: public AbstractGangTask { 1722 friend class G1NoteEndOfConcMarkClosure; 1723 1724 protected: 1725 G1CollectedHeap* _g1h; 1726 size_t _max_live_bytes; 1727 size_t _freed_bytes; 1728 FreeRegionList* _cleanup_list; 1729 1730 public: 1731 G1ParNoteEndTask(G1CollectedHeap* g1h, 1732 FreeRegionList* cleanup_list) : 1733 AbstractGangTask("G1 note end"), _g1h(g1h), 1734 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1735 1736 void work(uint worker_id) { 1737 double start = os::elapsedTime(); 1738 FreeRegionList local_cleanup_list("Local Cleanup List"); 1739 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set"); 1740 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1741 HRRSCleanupTask hrrs_cleanup_task; 1742 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list, 1743 &old_proxy_set, 1744 &humongous_proxy_set, 1745 &hrrs_cleanup_task); 1746 if (G1CollectedHeap::use_parallel_gc_threads()) { 1747 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1748 _g1h->workers()->active_workers(), 1749 HeapRegion::NoteEndClaimValue); 1750 } else { 1751 _g1h->heap_region_iterate(&g1_note_end); 1752 } 1753 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1754 1755 // Now update the lists 1756 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1757 NULL /* free_list */, 1758 &old_proxy_set, 1759 &humongous_proxy_set, 1760 true /* par */); 1761 { 1762 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1763 _max_live_bytes += g1_note_end.max_live_bytes(); 1764 _freed_bytes += g1_note_end.freed_bytes(); 1765 1766 // If we iterate over the global cleanup list at the end of 1767 // cleanup to do this printing we will not guarantee to only 1768 // generate output for the newly-reclaimed regions (the list 1769 // might not be empty at the beginning of cleanup; we might 1770 // still be working on its previous contents). So we do the 1771 // printing here, before we append the new regions to the global 1772 // cleanup list. 1773 1774 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1775 if (hr_printer->is_active()) { 1776 HeapRegionLinkedListIterator iter(&local_cleanup_list); 1777 while (iter.more_available()) { 1778 HeapRegion* hr = iter.get_next(); 1779 hr_printer->cleanup(hr); 1780 } 1781 } 1782 1783 _cleanup_list->add_as_tail(&local_cleanup_list); 1784 assert(local_cleanup_list.is_empty(), "post-condition"); 1785 1786 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1787 } 1788 } 1789 size_t max_live_bytes() { return _max_live_bytes; } 1790 size_t freed_bytes() { return _freed_bytes; } 1791 }; 1792 1793 class G1ParScrubRemSetTask: public AbstractGangTask { 1794 protected: 1795 G1RemSet* _g1rs; 1796 BitMap* _region_bm; 1797 BitMap* _card_bm; 1798 public: 1799 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1800 BitMap* region_bm, BitMap* card_bm) : 1801 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1802 _region_bm(region_bm), _card_bm(card_bm) { } 1803 1804 void work(uint worker_id) { 1805 if (G1CollectedHeap::use_parallel_gc_threads()) { 1806 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1807 HeapRegion::ScrubRemSetClaimValue); 1808 } else { 1809 _g1rs->scrub(_region_bm, _card_bm); 1810 } 1811 } 1812 1813 }; 1814 1815 void ConcurrentMark::cleanup() { 1816 // world is stopped at this checkpoint 1817 assert(SafepointSynchronize::is_at_safepoint(), 1818 "world should be stopped"); 1819 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1820 1821 // If a full collection has happened, we shouldn't do this. 1822 if (has_aborted()) { 1823 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1824 return; 1825 } 1826 1827 HRSPhaseSetter x(HRSPhaseCleanup); 1828 g1h->verify_region_sets_optional(); 1829 1830 if (VerifyDuringGC) { 1831 HandleMark hm; // handle scope 1832 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1833 Universe::heap()->prepare_for_verify(); 1834 Universe::verify(/* silent */ false, 1835 /* option */ VerifyOption_G1UsePrevMarking); 1836 } 1837 1838 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1839 g1p->record_concurrent_mark_cleanup_start(); 1840 1841 double start = os::elapsedTime(); 1842 1843 HeapRegionRemSet::reset_for_cleanup_tasks(); 1844 1845 uint n_workers; 1846 1847 // Do counting once more with the world stopped for good measure. 1848 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1849 1850 if (G1CollectedHeap::use_parallel_gc_threads()) { 1851 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 1852 "sanity check"); 1853 1854 g1h->set_par_threads(); 1855 n_workers = g1h->n_par_threads(); 1856 assert(g1h->n_par_threads() == n_workers, 1857 "Should not have been reset"); 1858 g1h->workers()->run_task(&g1_par_count_task); 1859 // Done with the parallel phase so reset to 0. 1860 g1h->set_par_threads(0); 1861 1862 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 1863 "sanity check"); 1864 } else { 1865 n_workers = 1; 1866 g1_par_count_task.work(0); 1867 } 1868 1869 if (VerifyDuringGC) { 1870 // Verify that the counting data accumulated during marking matches 1871 // that calculated by walking the marking bitmap. 1872 1873 // Bitmaps to hold expected values 1874 BitMap expected_region_bm(_region_bm.size(), false); 1875 BitMap expected_card_bm(_card_bm.size(), false); 1876 1877 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1878 &_region_bm, 1879 &_card_bm, 1880 &expected_region_bm, 1881 &expected_card_bm); 1882 1883 if (G1CollectedHeap::use_parallel_gc_threads()) { 1884 g1h->set_par_threads((int)n_workers); 1885 g1h->workers()->run_task(&g1_par_verify_task); 1886 // Done with the parallel phase so reset to 0. 1887 g1h->set_par_threads(0); 1888 1889 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 1890 "sanity check"); 1891 } else { 1892 g1_par_verify_task.work(0); 1893 } 1894 1895 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1896 } 1897 1898 size_t start_used_bytes = g1h->used(); 1899 g1h->set_marking_complete(); 1900 1901 double count_end = os::elapsedTime(); 1902 double this_final_counting_time = (count_end - start); 1903 _total_counting_time += this_final_counting_time; 1904 1905 if (G1PrintRegionLivenessInfo) { 1906 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1907 _g1h->heap_region_iterate(&cl); 1908 } 1909 1910 // Install newly created mark bitMap as "prev". 1911 swapMarkBitMaps(); 1912 1913 g1h->reset_gc_time_stamp(); 1914 1915 // Note end of marking in all heap regions. 1916 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 1917 if (G1CollectedHeap::use_parallel_gc_threads()) { 1918 g1h->set_par_threads((int)n_workers); 1919 g1h->workers()->run_task(&g1_par_note_end_task); 1920 g1h->set_par_threads(0); 1921 1922 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 1923 "sanity check"); 1924 } else { 1925 g1_par_note_end_task.work(0); 1926 } 1927 g1h->check_gc_time_stamps(); 1928 1929 if (!cleanup_list_is_empty()) { 1930 // The cleanup list is not empty, so we'll have to process it 1931 // concurrently. Notify anyone else that might be wanting free 1932 // regions that there will be more free regions coming soon. 1933 g1h->set_free_regions_coming(); 1934 } 1935 1936 // call below, since it affects the metric by which we sort the heap 1937 // regions. 1938 if (G1ScrubRemSets) { 1939 double rs_scrub_start = os::elapsedTime(); 1940 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1941 if (G1CollectedHeap::use_parallel_gc_threads()) { 1942 g1h->set_par_threads((int)n_workers); 1943 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1944 g1h->set_par_threads(0); 1945 1946 assert(g1h->check_heap_region_claim_values( 1947 HeapRegion::ScrubRemSetClaimValue), 1948 "sanity check"); 1949 } else { 1950 g1_par_scrub_rs_task.work(0); 1951 } 1952 1953 double rs_scrub_end = os::elapsedTime(); 1954 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1955 _total_rs_scrub_time += this_rs_scrub_time; 1956 } 1957 1958 // this will also free any regions totally full of garbage objects, 1959 // and sort the regions. 1960 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 1961 1962 // Statistics. 1963 double end = os::elapsedTime(); 1964 _cleanup_times.add((end - start) * 1000.0); 1965 1966 if (G1Log::fine()) { 1967 g1h->print_size_transition(gclog_or_tty, 1968 start_used_bytes, 1969 g1h->used(), 1970 g1h->capacity()); 1971 } 1972 1973 // Clean up will have freed any regions completely full of garbage. 1974 // Update the soft reference policy with the new heap occupancy. 1975 Universe::update_heap_info_at_gc(); 1976 1977 // We need to make this be a "collection" so any collection pause that 1978 // races with it goes around and waits for completeCleanup to finish. 1979 g1h->increment_total_collections(); 1980 1981 // We reclaimed old regions so we should calculate the sizes to make 1982 // sure we update the old gen/space data. 1983 g1h->g1mm()->update_sizes(); 1984 1985 if (VerifyDuringGC) { 1986 HandleMark hm; // handle scope 1987 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1988 Universe::heap()->prepare_for_verify(); 1989 Universe::verify(/* silent */ false, 1990 /* option */ VerifyOption_G1UsePrevMarking); 1991 } 1992 1993 g1h->verify_region_sets_optional(); 1994 g1h->trace_heap_after_concurrent_cycle(); 1995 } 1996 1997 void ConcurrentMark::completeCleanup() { 1998 if (has_aborted()) return; 1999 2000 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2001 2002 _cleanup_list.verify_optional(); 2003 FreeRegionList tmp_free_list("Tmp Free List"); 2004 2005 if (G1ConcRegionFreeingVerbose) { 2006 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2007 "cleanup list has %u entries", 2008 _cleanup_list.length()); 2009 } 2010 2011 // Noone else should be accessing the _cleanup_list at this point, 2012 // so it's not necessary to take any locks 2013 while (!_cleanup_list.is_empty()) { 2014 HeapRegion* hr = _cleanup_list.remove_head(); 2015 assert(hr != NULL, "the list was not empty"); 2016 hr->par_clear(); 2017 tmp_free_list.add_as_tail(hr); 2018 2019 // Instead of adding one region at a time to the secondary_free_list, 2020 // we accumulate them in the local list and move them a few at a 2021 // time. This also cuts down on the number of notify_all() calls 2022 // we do during this process. We'll also append the local list when 2023 // _cleanup_list is empty (which means we just removed the last 2024 // region from the _cleanup_list). 2025 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2026 _cleanup_list.is_empty()) { 2027 if (G1ConcRegionFreeingVerbose) { 2028 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2029 "appending %u entries to the secondary_free_list, " 2030 "cleanup list still has %u entries", 2031 tmp_free_list.length(), 2032 _cleanup_list.length()); 2033 } 2034 2035 { 2036 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2037 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 2038 SecondaryFreeList_lock->notify_all(); 2039 } 2040 2041 if (G1StressConcRegionFreeing) { 2042 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2043 os::sleep(Thread::current(), (jlong) 1, false); 2044 } 2045 } 2046 } 2047 } 2048 assert(tmp_free_list.is_empty(), "post-condition"); 2049 } 2050 2051 // Support closures for reference procssing in G1 2052 2053 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2054 HeapWord* addr = (HeapWord*)obj; 2055 return addr != NULL && 2056 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2057 } 2058 2059 class G1CMKeepAliveClosure: public OopClosure { 2060 G1CollectedHeap* _g1; 2061 ConcurrentMark* _cm; 2062 public: 2063 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) : 2064 _g1(g1), _cm(cm) { 2065 assert(Thread::current()->is_VM_thread(), "otherwise fix worker id"); 2066 } 2067 2068 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2069 virtual void do_oop( oop* p) { do_oop_work(p); } 2070 2071 template <class T> void do_oop_work(T* p) { 2072 oop obj = oopDesc::load_decode_heap_oop(p); 2073 HeapWord* addr = (HeapWord*)obj; 2074 2075 if (_cm->verbose_high()) { 2076 gclog_or_tty->print_cr("\t[0] we're looking at location " 2077 "*"PTR_FORMAT" = "PTR_FORMAT, 2078 p, (void*) obj); 2079 } 2080 2081 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) { 2082 _cm->mark_and_count(obj); 2083 _cm->mark_stack_push(obj); 2084 } 2085 } 2086 }; 2087 2088 class G1CMDrainMarkingStackClosure: public VoidClosure { 2089 ConcurrentMark* _cm; 2090 CMMarkStack* _markStack; 2091 G1CMKeepAliveClosure* _oopClosure; 2092 public: 2093 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack, 2094 G1CMKeepAliveClosure* oopClosure) : 2095 _cm(cm), 2096 _markStack(markStack), 2097 _oopClosure(oopClosure) { } 2098 2099 void do_void() { 2100 _markStack->drain((OopClosure*)_oopClosure, _cm->nextMarkBitMap(), false); 2101 } 2102 }; 2103 2104 // 'Keep Alive' closure used by parallel reference processing. 2105 // An instance of this closure is used in the parallel reference processing 2106 // code rather than an instance of G1CMKeepAliveClosure. We could have used 2107 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are 2108 // placed on to discovered ref lists once so we can mark and push with no 2109 // need to check whether the object has already been marked. Using the 2110 // G1CMKeepAliveClosure would mean, however, having all the worker threads 2111 // operating on the global mark stack. This means that an individual 2112 // worker would be doing lock-free pushes while it processes its own 2113 // discovered ref list followed by drain call. If the discovered ref lists 2114 // are unbalanced then this could cause interference with the other 2115 // workers. Using a CMTask (and its embedded local data structures) 2116 // avoids that potential interference. 2117 class G1CMParKeepAliveAndDrainClosure: public OopClosure { 2118 ConcurrentMark* _cm; 2119 CMTask* _task; 2120 int _ref_counter_limit; 2121 int _ref_counter; 2122 public: 2123 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) : 2124 _cm(cm), _task(task), 2125 _ref_counter_limit(G1RefProcDrainInterval) { 2126 assert(_ref_counter_limit > 0, "sanity"); 2127 _ref_counter = _ref_counter_limit; 2128 } 2129 2130 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2131 virtual void do_oop( oop* p) { do_oop_work(p); } 2132 2133 template <class T> void do_oop_work(T* p) { 2134 if (!_cm->has_overflown()) { 2135 oop obj = oopDesc::load_decode_heap_oop(p); 2136 if (_cm->verbose_high()) { 2137 gclog_or_tty->print_cr("\t[%d] we're looking at location " 2138 "*"PTR_FORMAT" = "PTR_FORMAT, 2139 _task->task_id(), p, (void*) obj); 2140 } 2141 2142 _task->deal_with_reference(obj); 2143 _ref_counter--; 2144 2145 if (_ref_counter == 0) { 2146 // We have dealt with _ref_counter_limit references, pushing them and objects 2147 // reachable from them on to the local stack (and possibly the global stack). 2148 // Call do_marking_step() to process these entries. We call the routine in a 2149 // loop, which we'll exit if there's nothing more to do (i.e. we're done 2150 // with the entries that we've pushed as a result of the deal_with_reference 2151 // calls above) or we overflow. 2152 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2153 // while there may still be some work to do. (See the comment at the 2154 // beginning of CMTask::do_marking_step() for those conditions - one of which 2155 // is reaching the specified time target.) It is only when 2156 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2157 // that the marking has completed. 2158 do { 2159 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2160 _task->do_marking_step(mark_step_duration_ms, 2161 false /* do_stealing */, 2162 false /* do_termination */); 2163 } while (_task->has_aborted() && !_cm->has_overflown()); 2164 _ref_counter = _ref_counter_limit; 2165 } 2166 } else { 2167 if (_cm->verbose_high()) { 2168 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id()); 2169 } 2170 } 2171 } 2172 }; 2173 2174 class G1CMParDrainMarkingStackClosure: public VoidClosure { 2175 ConcurrentMark* _cm; 2176 CMTask* _task; 2177 public: 2178 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) : 2179 _cm(cm), _task(task) { } 2180 2181 void do_void() { 2182 do { 2183 if (_cm->verbose_high()) { 2184 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", 2185 _task->task_id()); 2186 } 2187 2188 // We call CMTask::do_marking_step() to completely drain the local and 2189 // global marking stacks. The routine is called in a loop, which we'll 2190 // exit if there's nothing more to do (i.e. we'completely drained the 2191 // entries that were pushed as a result of applying the 2192 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref 2193 // lists above) or we overflow the global marking stack. 2194 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2195 // while there may still be some work to do. (See the comment at the 2196 // beginning of CMTask::do_marking_step() for those conditions - one of which 2197 // is reaching the specified time target.) It is only when 2198 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2199 // that the marking has completed. 2200 2201 _task->do_marking_step(1000000000.0 /* something very large */, 2202 true /* do_stealing */, 2203 true /* do_termination */); 2204 } while (_task->has_aborted() && !_cm->has_overflown()); 2205 } 2206 }; 2207 2208 // Implementation of AbstractRefProcTaskExecutor for parallel 2209 // reference processing at the end of G1 concurrent marking 2210 2211 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2212 private: 2213 G1CollectedHeap* _g1h; 2214 ConcurrentMark* _cm; 2215 WorkGang* _workers; 2216 int _active_workers; 2217 2218 public: 2219 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2220 ConcurrentMark* cm, 2221 WorkGang* workers, 2222 int n_workers) : 2223 _g1h(g1h), _cm(cm), 2224 _workers(workers), _active_workers(n_workers) { } 2225 2226 // Executes the given task using concurrent marking worker threads. 2227 virtual void execute(ProcessTask& task); 2228 virtual void execute(EnqueueTask& task); 2229 }; 2230 2231 class G1CMRefProcTaskProxy: public AbstractGangTask { 2232 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2233 ProcessTask& _proc_task; 2234 G1CollectedHeap* _g1h; 2235 ConcurrentMark* _cm; 2236 2237 public: 2238 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2239 G1CollectedHeap* g1h, 2240 ConcurrentMark* cm) : 2241 AbstractGangTask("Process reference objects in parallel"), 2242 _proc_task(proc_task), _g1h(g1h), _cm(cm) { } 2243 2244 virtual void work(uint worker_id) { 2245 CMTask* marking_task = _cm->task(worker_id); 2246 G1CMIsAliveClosure g1_is_alive(_g1h); 2247 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task); 2248 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task); 2249 2250 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2251 } 2252 }; 2253 2254 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2255 assert(_workers != NULL, "Need parallel worker threads."); 2256 2257 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2258 2259 // We need to reset the phase for each task execution so that 2260 // the termination protocol of CMTask::do_marking_step works. 2261 _cm->set_phase(_active_workers, false /* concurrent */); 2262 _g1h->set_par_threads(_active_workers); 2263 _workers->run_task(&proc_task_proxy); 2264 _g1h->set_par_threads(0); 2265 } 2266 2267 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2268 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2269 EnqueueTask& _enq_task; 2270 2271 public: 2272 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2273 AbstractGangTask("Enqueue reference objects in parallel"), 2274 _enq_task(enq_task) { } 2275 2276 virtual void work(uint worker_id) { 2277 _enq_task.work(worker_id); 2278 } 2279 }; 2280 2281 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2282 assert(_workers != NULL, "Need parallel worker threads."); 2283 2284 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2285 2286 _g1h->set_par_threads(_active_workers); 2287 _workers->run_task(&enq_task_proxy); 2288 _g1h->set_par_threads(0); 2289 } 2290 2291 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2292 ResourceMark rm; 2293 HandleMark hm; 2294 2295 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2296 2297 // Is alive closure. 2298 G1CMIsAliveClosure g1_is_alive(g1h); 2299 2300 // Inner scope to exclude the cleaning of the string and symbol 2301 // tables from the displayed time. 2302 { 2303 if (G1Log::finer()) { 2304 gclog_or_tty->put(' '); 2305 } 2306 GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm()); 2307 2308 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2309 2310 // See the comment in G1CollectedHeap::ref_processing_init() 2311 // about how reference processing currently works in G1. 2312 2313 // Process weak references. 2314 rp->setup_policy(clear_all_soft_refs); 2315 assert(_markStack.isEmpty(), "mark stack should be empty"); 2316 2317 G1CMKeepAliveClosure g1_keep_alive(g1h, this); 2318 G1CMDrainMarkingStackClosure 2319 g1_drain_mark_stack(this, &_markStack, &g1_keep_alive); 2320 2321 // We use the work gang from the G1CollectedHeap and we utilize all 2322 // the worker threads. 2323 uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U; 2324 active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U); 2325 2326 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2327 g1h->workers(), active_workers); 2328 2329 ReferenceProcessorStats stats; 2330 if (rp->processing_is_mt()) { 2331 // Set the degree of MT here. If the discovery is done MT, there 2332 // may have been a different number of threads doing the discovery 2333 // and a different number of discovered lists may have Ref objects. 2334 // That is OK as long as the Reference lists are balanced (see 2335 // balance_all_queues() and balance_queues()). 2336 rp->set_active_mt_degree(active_workers); 2337 2338 stats = rp->process_discovered_references(&g1_is_alive, 2339 &g1_keep_alive, 2340 &g1_drain_mark_stack, 2341 &par_task_executor, 2342 g1h->gc_timer_cm()); 2343 2344 // The work routines of the parallel keep_alive and drain_marking_stack 2345 // will set the has_overflown flag if we overflow the global marking 2346 // stack. 2347 } else { 2348 stats = rp->process_discovered_references(&g1_is_alive, 2349 &g1_keep_alive, 2350 &g1_drain_mark_stack, 2351 NULL, 2352 g1h->gc_timer_cm()); 2353 } 2354 2355 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2356 2357 assert(_markStack.overflow() || _markStack.isEmpty(), 2358 "mark stack should be empty (unless it overflowed)"); 2359 if (_markStack.overflow()) { 2360 // Should have been done already when we tried to push an 2361 // entry on to the global mark stack. But let's do it again. 2362 set_has_overflown(); 2363 } 2364 2365 if (rp->processing_is_mt()) { 2366 assert(rp->num_q() == active_workers, "why not"); 2367 rp->enqueue_discovered_references(&par_task_executor); 2368 } else { 2369 rp->enqueue_discovered_references(); 2370 } 2371 2372 rp->verify_no_references_recorded(); 2373 assert(!rp->discovery_enabled(), "Post condition"); 2374 } 2375 2376 // Now clean up stale oops in StringTable 2377 StringTable::unlink(&g1_is_alive); 2378 // Clean up unreferenced symbols in symbol table. 2379 SymbolTable::unlink(); 2380 } 2381 2382 void ConcurrentMark::swapMarkBitMaps() { 2383 CMBitMapRO* temp = _prevMarkBitMap; 2384 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2385 _nextMarkBitMap = (CMBitMap*) temp; 2386 } 2387 2388 class CMRemarkTask: public AbstractGangTask { 2389 private: 2390 ConcurrentMark *_cm; 2391 2392 public: 2393 void work(uint worker_id) { 2394 // Since all available tasks are actually started, we should 2395 // only proceed if we're supposed to be actived. 2396 if (worker_id < _cm->active_tasks()) { 2397 CMTask* task = _cm->task(worker_id); 2398 task->record_start_time(); 2399 do { 2400 task->do_marking_step(1000000000.0 /* something very large */, 2401 true /* do_stealing */, 2402 true /* do_termination */); 2403 } while (task->has_aborted() && !_cm->has_overflown()); 2404 // If we overflow, then we do not want to restart. We instead 2405 // want to abort remark and do concurrent marking again. 2406 task->record_end_time(); 2407 } 2408 } 2409 2410 CMRemarkTask(ConcurrentMark* cm, int active_workers) : 2411 AbstractGangTask("Par Remark"), _cm(cm) { 2412 _cm->terminator()->reset_for_reuse(active_workers); 2413 } 2414 }; 2415 2416 void ConcurrentMark::checkpointRootsFinalWork() { 2417 ResourceMark rm; 2418 HandleMark hm; 2419 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2420 2421 g1h->ensure_parsability(false); 2422 2423 if (G1CollectedHeap::use_parallel_gc_threads()) { 2424 G1CollectedHeap::StrongRootsScope srs(g1h); 2425 // this is remark, so we'll use up all active threads 2426 uint active_workers = g1h->workers()->active_workers(); 2427 if (active_workers == 0) { 2428 assert(active_workers > 0, "Should have been set earlier"); 2429 active_workers = (uint) ParallelGCThreads; 2430 g1h->workers()->set_active_workers(active_workers); 2431 } 2432 set_phase(active_workers, false /* concurrent */); 2433 // Leave _parallel_marking_threads at it's 2434 // value originally calculated in the ConcurrentMark 2435 // constructor and pass values of the active workers 2436 // through the gang in the task. 2437 2438 CMRemarkTask remarkTask(this, active_workers); 2439 g1h->set_par_threads(active_workers); 2440 g1h->workers()->run_task(&remarkTask); 2441 g1h->set_par_threads(0); 2442 } else { 2443 G1CollectedHeap::StrongRootsScope srs(g1h); 2444 // this is remark, so we'll use up all available threads 2445 uint active_workers = 1; 2446 set_phase(active_workers, false /* concurrent */); 2447 2448 CMRemarkTask remarkTask(this, active_workers); 2449 // We will start all available threads, even if we decide that the 2450 // active_workers will be fewer. The extra ones will just bail out 2451 // immediately. 2452 remarkTask.work(0); 2453 } 2454 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2455 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2456 2457 print_stats(); 2458 2459 #if VERIFY_OBJS_PROCESSED 2460 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2461 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2462 _scan_obj_cl.objs_processed, 2463 ThreadLocalObjQueue::objs_enqueued); 2464 guarantee(_scan_obj_cl.objs_processed == 2465 ThreadLocalObjQueue::objs_enqueued, 2466 "Different number of objs processed and enqueued."); 2467 } 2468 #endif 2469 } 2470 2471 #ifndef PRODUCT 2472 2473 class PrintReachableOopClosure: public OopClosure { 2474 private: 2475 G1CollectedHeap* _g1h; 2476 outputStream* _out; 2477 VerifyOption _vo; 2478 bool _all; 2479 2480 public: 2481 PrintReachableOopClosure(outputStream* out, 2482 VerifyOption vo, 2483 bool all) : 2484 _g1h(G1CollectedHeap::heap()), 2485 _out(out), _vo(vo), _all(all) { } 2486 2487 void do_oop(narrowOop* p) { do_oop_work(p); } 2488 void do_oop( oop* p) { do_oop_work(p); } 2489 2490 template <class T> void do_oop_work(T* p) { 2491 oop obj = oopDesc::load_decode_heap_oop(p); 2492 const char* str = NULL; 2493 const char* str2 = ""; 2494 2495 if (obj == NULL) { 2496 str = ""; 2497 } else if (!_g1h->is_in_g1_reserved(obj)) { 2498 str = " O"; 2499 } else { 2500 HeapRegion* hr = _g1h->heap_region_containing(obj); 2501 guarantee(hr != NULL, "invariant"); 2502 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2503 bool marked = _g1h->is_marked(obj, _vo); 2504 2505 if (over_tams) { 2506 str = " >"; 2507 if (marked) { 2508 str2 = " AND MARKED"; 2509 } 2510 } else if (marked) { 2511 str = " M"; 2512 } else { 2513 str = " NOT"; 2514 } 2515 } 2516 2517 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2518 p, (void*) obj, str, str2); 2519 } 2520 }; 2521 2522 class PrintReachableObjectClosure : public ObjectClosure { 2523 private: 2524 G1CollectedHeap* _g1h; 2525 outputStream* _out; 2526 VerifyOption _vo; 2527 bool _all; 2528 HeapRegion* _hr; 2529 2530 public: 2531 PrintReachableObjectClosure(outputStream* out, 2532 VerifyOption vo, 2533 bool all, 2534 HeapRegion* hr) : 2535 _g1h(G1CollectedHeap::heap()), 2536 _out(out), _vo(vo), _all(all), _hr(hr) { } 2537 2538 void do_object(oop o) { 2539 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2540 bool marked = _g1h->is_marked(o, _vo); 2541 bool print_it = _all || over_tams || marked; 2542 2543 if (print_it) { 2544 _out->print_cr(" "PTR_FORMAT"%s", 2545 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2546 PrintReachableOopClosure oopCl(_out, _vo, _all); 2547 o->oop_iterate(&oopCl); 2548 } 2549 } 2550 }; 2551 2552 class PrintReachableRegionClosure : public HeapRegionClosure { 2553 private: 2554 G1CollectedHeap* _g1h; 2555 outputStream* _out; 2556 VerifyOption _vo; 2557 bool _all; 2558 2559 public: 2560 bool doHeapRegion(HeapRegion* hr) { 2561 HeapWord* b = hr->bottom(); 2562 HeapWord* e = hr->end(); 2563 HeapWord* t = hr->top(); 2564 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2565 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2566 "TAMS: "PTR_FORMAT, b, e, t, p); 2567 _out->cr(); 2568 2569 HeapWord* from = b; 2570 HeapWord* to = t; 2571 2572 if (to > from) { 2573 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2574 _out->cr(); 2575 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2576 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2577 _out->cr(); 2578 } 2579 2580 return false; 2581 } 2582 2583 PrintReachableRegionClosure(outputStream* out, 2584 VerifyOption vo, 2585 bool all) : 2586 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2587 }; 2588 2589 void ConcurrentMark::print_reachable(const char* str, 2590 VerifyOption vo, 2591 bool all) { 2592 gclog_or_tty->cr(); 2593 gclog_or_tty->print_cr("== Doing heap dump... "); 2594 2595 if (G1PrintReachableBaseFile == NULL) { 2596 gclog_or_tty->print_cr(" #### error: no base file defined"); 2597 return; 2598 } 2599 2600 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2601 (JVM_MAXPATHLEN - 1)) { 2602 gclog_or_tty->print_cr(" #### error: file name too long"); 2603 return; 2604 } 2605 2606 char file_name[JVM_MAXPATHLEN]; 2607 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2608 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2609 2610 fileStream fout(file_name); 2611 if (!fout.is_open()) { 2612 gclog_or_tty->print_cr(" #### error: could not open file"); 2613 return; 2614 } 2615 2616 outputStream* out = &fout; 2617 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2618 out->cr(); 2619 2620 out->print_cr("--- ITERATING OVER REGIONS"); 2621 out->cr(); 2622 PrintReachableRegionClosure rcl(out, vo, all); 2623 _g1h->heap_region_iterate(&rcl); 2624 out->cr(); 2625 2626 gclog_or_tty->print_cr(" done"); 2627 gclog_or_tty->flush(); 2628 } 2629 2630 #endif // PRODUCT 2631 2632 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2633 // Note we are overriding the read-only view of the prev map here, via 2634 // the cast. 2635 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2636 } 2637 2638 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2639 _nextMarkBitMap->clearRange(mr); 2640 } 2641 2642 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2643 clearRangePrevBitmap(mr); 2644 clearRangeNextBitmap(mr); 2645 } 2646 2647 HeapRegion* 2648 ConcurrentMark::claim_region(int task_num) { 2649 // "checkpoint" the finger 2650 HeapWord* finger = _finger; 2651 2652 // _heap_end will not change underneath our feet; it only changes at 2653 // yield points. 2654 while (finger < _heap_end) { 2655 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2656 2657 // Note on how this code handles humongous regions. In the 2658 // normal case the finger will reach the start of a "starts 2659 // humongous" (SH) region. Its end will either be the end of the 2660 // last "continues humongous" (CH) region in the sequence, or the 2661 // standard end of the SH region (if the SH is the only region in 2662 // the sequence). That way claim_region() will skip over the CH 2663 // regions. However, there is a subtle race between a CM thread 2664 // executing this method and a mutator thread doing a humongous 2665 // object allocation. The two are not mutually exclusive as the CM 2666 // thread does not need to hold the Heap_lock when it gets 2667 // here. So there is a chance that claim_region() will come across 2668 // a free region that's in the progress of becoming a SH or a CH 2669 // region. In the former case, it will either 2670 // a) Miss the update to the region's end, in which case it will 2671 // visit every subsequent CH region, will find their bitmaps 2672 // empty, and do nothing, or 2673 // b) Will observe the update of the region's end (in which case 2674 // it will skip the subsequent CH regions). 2675 // If it comes across a region that suddenly becomes CH, the 2676 // scenario will be similar to b). So, the race between 2677 // claim_region() and a humongous object allocation might force us 2678 // to do a bit of unnecessary work (due to some unnecessary bitmap 2679 // iterations) but it should not introduce and correctness issues. 2680 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2681 HeapWord* bottom = curr_region->bottom(); 2682 HeapWord* end = curr_region->end(); 2683 HeapWord* limit = curr_region->next_top_at_mark_start(); 2684 2685 if (verbose_low()) { 2686 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 2687 "["PTR_FORMAT", "PTR_FORMAT"), " 2688 "limit = "PTR_FORMAT, 2689 task_num, curr_region, bottom, end, limit); 2690 } 2691 2692 // Is the gap between reading the finger and doing the CAS too long? 2693 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2694 if (res == finger) { 2695 // we succeeded 2696 2697 // notice that _finger == end cannot be guaranteed here since, 2698 // someone else might have moved the finger even further 2699 assert(_finger >= end, "the finger should have moved forward"); 2700 2701 if (verbose_low()) { 2702 gclog_or_tty->print_cr("[%d] we were successful with region = " 2703 PTR_FORMAT, task_num, curr_region); 2704 } 2705 2706 if (limit > bottom) { 2707 if (verbose_low()) { 2708 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 2709 "returning it ", task_num, curr_region); 2710 } 2711 return curr_region; 2712 } else { 2713 assert(limit == bottom, 2714 "the region limit should be at bottom"); 2715 if (verbose_low()) { 2716 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 2717 "returning NULL", task_num, curr_region); 2718 } 2719 // we return NULL and the caller should try calling 2720 // claim_region() again. 2721 return NULL; 2722 } 2723 } else { 2724 assert(_finger > finger, "the finger should have moved forward"); 2725 if (verbose_low()) { 2726 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 2727 "global finger = "PTR_FORMAT", " 2728 "our finger = "PTR_FORMAT, 2729 task_num, _finger, finger); 2730 } 2731 2732 // read it again 2733 finger = _finger; 2734 } 2735 } 2736 2737 return NULL; 2738 } 2739 2740 #ifndef PRODUCT 2741 enum VerifyNoCSetOopsPhase { 2742 VerifyNoCSetOopsStack, 2743 VerifyNoCSetOopsQueues, 2744 VerifyNoCSetOopsSATBCompleted, 2745 VerifyNoCSetOopsSATBThread 2746 }; 2747 2748 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 2749 private: 2750 G1CollectedHeap* _g1h; 2751 VerifyNoCSetOopsPhase _phase; 2752 int _info; 2753 2754 const char* phase_str() { 2755 switch (_phase) { 2756 case VerifyNoCSetOopsStack: return "Stack"; 2757 case VerifyNoCSetOopsQueues: return "Queue"; 2758 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 2759 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 2760 default: ShouldNotReachHere(); 2761 } 2762 return NULL; 2763 } 2764 2765 void do_object_work(oop obj) { 2766 guarantee(!_g1h->obj_in_cs(obj), 2767 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 2768 (void*) obj, phase_str(), _info)); 2769 } 2770 2771 public: 2772 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 2773 2774 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 2775 _phase = phase; 2776 _info = info; 2777 } 2778 2779 virtual void do_oop(oop* p) { 2780 oop obj = oopDesc::load_decode_heap_oop(p); 2781 do_object_work(obj); 2782 } 2783 2784 virtual void do_oop(narrowOop* p) { 2785 // We should not come across narrow oops while scanning marking 2786 // stacks and SATB buffers. 2787 ShouldNotReachHere(); 2788 } 2789 2790 virtual void do_object(oop obj) { 2791 do_object_work(obj); 2792 } 2793 }; 2794 2795 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 2796 bool verify_enqueued_buffers, 2797 bool verify_thread_buffers, 2798 bool verify_fingers) { 2799 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2800 if (!G1CollectedHeap::heap()->mark_in_progress()) { 2801 return; 2802 } 2803 2804 VerifyNoCSetOopsClosure cl; 2805 2806 if (verify_stacks) { 2807 // Verify entries on the global mark stack 2808 cl.set_phase(VerifyNoCSetOopsStack); 2809 _markStack.oops_do(&cl); 2810 2811 // Verify entries on the task queues 2812 for (int i = 0; i < (int) _max_task_num; i += 1) { 2813 cl.set_phase(VerifyNoCSetOopsQueues, i); 2814 OopTaskQueue* queue = _task_queues->queue(i); 2815 queue->oops_do(&cl); 2816 } 2817 } 2818 2819 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 2820 2821 // Verify entries on the enqueued SATB buffers 2822 if (verify_enqueued_buffers) { 2823 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 2824 satb_qs.iterate_completed_buffers_read_only(&cl); 2825 } 2826 2827 // Verify entries on the per-thread SATB buffers 2828 if (verify_thread_buffers) { 2829 cl.set_phase(VerifyNoCSetOopsSATBThread); 2830 satb_qs.iterate_thread_buffers_read_only(&cl); 2831 } 2832 2833 if (verify_fingers) { 2834 // Verify the global finger 2835 HeapWord* global_finger = finger(); 2836 if (global_finger != NULL && global_finger < _heap_end) { 2837 // The global finger always points to a heap region boundary. We 2838 // use heap_region_containing_raw() to get the containing region 2839 // given that the global finger could be pointing to a free region 2840 // which subsequently becomes continues humongous. If that 2841 // happens, heap_region_containing() will return the bottom of the 2842 // corresponding starts humongous region and the check below will 2843 // not hold any more. 2844 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 2845 guarantee(global_finger == global_hr->bottom(), 2846 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 2847 global_finger, HR_FORMAT_PARAMS(global_hr))); 2848 } 2849 2850 // Verify the task fingers 2851 assert(parallel_marking_threads() <= _max_task_num, "sanity"); 2852 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 2853 CMTask* task = _tasks[i]; 2854 HeapWord* task_finger = task->finger(); 2855 if (task_finger != NULL && task_finger < _heap_end) { 2856 // See above note on the global finger verification. 2857 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 2858 guarantee(task_finger == task_hr->bottom() || 2859 !task_hr->in_collection_set(), 2860 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 2861 task_finger, HR_FORMAT_PARAMS(task_hr))); 2862 } 2863 } 2864 } 2865 } 2866 #endif // PRODUCT 2867 2868 // Aggregate the counting data that was constructed concurrently 2869 // with marking. 2870 class AggregateCountDataHRClosure: public HeapRegionClosure { 2871 G1CollectedHeap* _g1h; 2872 ConcurrentMark* _cm; 2873 CardTableModRefBS* _ct_bs; 2874 BitMap* _cm_card_bm; 2875 size_t _max_task_num; 2876 2877 public: 2878 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2879 BitMap* cm_card_bm, 2880 size_t max_task_num) : 2881 _g1h(g1h), _cm(g1h->concurrent_mark()), 2882 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 2883 _cm_card_bm(cm_card_bm), _max_task_num(max_task_num) { } 2884 2885 bool doHeapRegion(HeapRegion* hr) { 2886 if (hr->continuesHumongous()) { 2887 // We will ignore these here and process them when their 2888 // associated "starts humongous" region is processed. 2889 // Note that we cannot rely on their associated 2890 // "starts humongous" region to have their bit set to 1 2891 // since, due to the region chunking in the parallel region 2892 // iteration, a "continues humongous" region might be visited 2893 // before its associated "starts humongous". 2894 return false; 2895 } 2896 2897 HeapWord* start = hr->bottom(); 2898 HeapWord* limit = hr->next_top_at_mark_start(); 2899 HeapWord* end = hr->end(); 2900 2901 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2902 err_msg("Preconditions not met - " 2903 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 2904 "top: "PTR_FORMAT", end: "PTR_FORMAT, 2905 start, limit, hr->top(), hr->end())); 2906 2907 assert(hr->next_marked_bytes() == 0, "Precondition"); 2908 2909 if (start == limit) { 2910 // NTAMS of this region has not been set so nothing to do. 2911 return false; 2912 } 2913 2914 // 'start' should be in the heap. 2915 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2916 // 'end' *may* be just beyone the end of the heap (if hr is the last region) 2917 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2918 2919 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2920 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2921 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2922 2923 // If ntams is not card aligned then we bump card bitmap index 2924 // for limit so that we get the all the cards spanned by 2925 // the object ending at ntams. 2926 // Note: if this is the last region in the heap then ntams 2927 // could be actually just beyond the end of the the heap; 2928 // limit_idx will then correspond to a (non-existent) card 2929 // that is also outside the heap. 2930 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2931 limit_idx += 1; 2932 } 2933 2934 assert(limit_idx <= end_idx, "or else use atomics"); 2935 2936 // Aggregate the "stripe" in the count data associated with hr. 2937 uint hrs_index = hr->hrs_index(); 2938 size_t marked_bytes = 0; 2939 2940 for (int i = 0; (size_t)i < _max_task_num; i += 1) { 2941 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2942 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2943 2944 // Fetch the marked_bytes in this region for task i and 2945 // add it to the running total for this region. 2946 marked_bytes += marked_bytes_array[hrs_index]; 2947 2948 // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx) 2949 // into the global card bitmap. 2950 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2951 2952 while (scan_idx < limit_idx) { 2953 assert(task_card_bm->at(scan_idx) == true, "should be"); 2954 _cm_card_bm->set_bit(scan_idx); 2955 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2956 2957 // BitMap::get_next_one_offset() can handle the case when 2958 // its left_offset parameter is greater than its right_offset 2959 // parameter. It does, however, have an early exit if 2960 // left_offset == right_offset. So let's limit the value 2961 // passed in for left offset here. 2962 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2963 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2964 } 2965 } 2966 2967 // Update the marked bytes for this region. 2968 hr->add_to_marked_bytes(marked_bytes); 2969 2970 // Next heap region 2971 return false; 2972 } 2973 }; 2974 2975 class G1AggregateCountDataTask: public AbstractGangTask { 2976 protected: 2977 G1CollectedHeap* _g1h; 2978 ConcurrentMark* _cm; 2979 BitMap* _cm_card_bm; 2980 size_t _max_task_num; 2981 int _active_workers; 2982 2983 public: 2984 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2985 ConcurrentMark* cm, 2986 BitMap* cm_card_bm, 2987 size_t max_task_num, 2988 int n_workers) : 2989 AbstractGangTask("Count Aggregation"), 2990 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2991 _max_task_num(max_task_num), 2992 _active_workers(n_workers) { } 2993 2994 void work(uint worker_id) { 2995 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_task_num); 2996 2997 if (G1CollectedHeap::use_parallel_gc_threads()) { 2998 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 2999 _active_workers, 3000 HeapRegion::AggregateCountClaimValue); 3001 } else { 3002 _g1h->heap_region_iterate(&cl); 3003 } 3004 } 3005 }; 3006 3007 3008 void ConcurrentMark::aggregate_count_data() { 3009 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3010 _g1h->workers()->active_workers() : 3011 1); 3012 3013 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3014 _max_task_num, n_workers); 3015 3016 if (G1CollectedHeap::use_parallel_gc_threads()) { 3017 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3018 "sanity check"); 3019 _g1h->set_par_threads(n_workers); 3020 _g1h->workers()->run_task(&g1_par_agg_task); 3021 _g1h->set_par_threads(0); 3022 3023 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3024 "sanity check"); 3025 _g1h->reset_heap_region_claim_values(); 3026 } else { 3027 g1_par_agg_task.work(0); 3028 } 3029 } 3030 3031 // Clear the per-worker arrays used to store the per-region counting data 3032 void ConcurrentMark::clear_all_count_data() { 3033 // Clear the global card bitmap - it will be filled during 3034 // liveness count aggregation (during remark) and the 3035 // final counting task. 3036 _card_bm.clear(); 3037 3038 // Clear the global region bitmap - it will be filled as part 3039 // of the final counting task. 3040 _region_bm.clear(); 3041 3042 uint max_regions = _g1h->max_regions(); 3043 assert(_max_task_num != 0, "unitialized"); 3044 3045 for (int i = 0; (size_t) i < _max_task_num; i += 1) { 3046 BitMap* task_card_bm = count_card_bitmap_for(i); 3047 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3048 3049 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3050 assert(marked_bytes_array != NULL, "uninitialized"); 3051 3052 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3053 task_card_bm->clear(); 3054 } 3055 } 3056 3057 void ConcurrentMark::print_stats() { 3058 if (verbose_stats()) { 3059 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3060 for (size_t i = 0; i < _active_tasks; ++i) { 3061 _tasks[i]->print_stats(); 3062 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3063 } 3064 } 3065 } 3066 3067 // abandon current marking iteration due to a Full GC 3068 void ConcurrentMark::abort() { 3069 // Clear all marks to force marking thread to do nothing 3070 _nextMarkBitMap->clearAll(); 3071 // Clear the liveness counting data 3072 clear_all_count_data(); 3073 // Empty mark stack 3074 reset_marking_state(); 3075 for (int i = 0; i < (int)_max_task_num; ++i) { 3076 _tasks[i]->clear_region_fields(); 3077 } 3078 _has_aborted = true; 3079 3080 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3081 satb_mq_set.abandon_partial_marking(); 3082 // This can be called either during or outside marking, we'll read 3083 // the expected_active value from the SATB queue set. 3084 satb_mq_set.set_active_all_threads( 3085 false, /* new active value */ 3086 satb_mq_set.is_active() /* expected_active */); 3087 3088 _g1h->trace_heap_after_concurrent_cycle(); 3089 _g1h->register_concurrent_cycle_end(); 3090 } 3091 3092 static void print_ms_time_info(const char* prefix, const char* name, 3093 NumberSeq& ns) { 3094 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3095 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3096 if (ns.num() > 0) { 3097 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3098 prefix, ns.sd(), ns.maximum()); 3099 } 3100 } 3101 3102 void ConcurrentMark::print_summary_info() { 3103 gclog_or_tty->print_cr(" Concurrent marking:"); 3104 print_ms_time_info(" ", "init marks", _init_times); 3105 print_ms_time_info(" ", "remarks", _remark_times); 3106 { 3107 print_ms_time_info(" ", "final marks", _remark_mark_times); 3108 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3109 3110 } 3111 print_ms_time_info(" ", "cleanups", _cleanup_times); 3112 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3113 _total_counting_time, 3114 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3115 (double)_cleanup_times.num() 3116 : 0.0)); 3117 if (G1ScrubRemSets) { 3118 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3119 _total_rs_scrub_time, 3120 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3121 (double)_cleanup_times.num() 3122 : 0.0)); 3123 } 3124 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3125 (_init_times.sum() + _remark_times.sum() + 3126 _cleanup_times.sum())/1000.0); 3127 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3128 "(%8.2f s marking).", 3129 cmThread()->vtime_accum(), 3130 cmThread()->vtime_mark_accum()); 3131 } 3132 3133 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3134 if (use_parallel_marking_threads()) { 3135 _parallel_workers->print_worker_threads_on(st); 3136 } 3137 } 3138 3139 // We take a break if someone is trying to stop the world. 3140 bool ConcurrentMark::do_yield_check(uint worker_id) { 3141 if (should_yield()) { 3142 if (worker_id == 0) { 3143 _g1h->g1_policy()->record_concurrent_pause(); 3144 } 3145 cmThread()->yield(); 3146 return true; 3147 } else { 3148 return false; 3149 } 3150 } 3151 3152 bool ConcurrentMark::should_yield() { 3153 return cmThread()->should_yield(); 3154 } 3155 3156 bool ConcurrentMark::containing_card_is_marked(void* p) { 3157 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3158 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3159 } 3160 3161 bool ConcurrentMark::containing_cards_are_marked(void* start, 3162 void* last) { 3163 return containing_card_is_marked(start) && 3164 containing_card_is_marked(last); 3165 } 3166 3167 #ifndef PRODUCT 3168 // for debugging purposes 3169 void ConcurrentMark::print_finger() { 3170 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3171 _heap_start, _heap_end, _finger); 3172 for (int i = 0; i < (int) _max_task_num; ++i) { 3173 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 3174 } 3175 gclog_or_tty->print_cr(""); 3176 } 3177 #endif 3178 3179 void CMTask::scan_object(oop obj) { 3180 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3181 3182 if (_cm->verbose_high()) { 3183 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT, 3184 _task_id, (void*) obj); 3185 } 3186 3187 size_t obj_size = obj->size(); 3188 _words_scanned += obj_size; 3189 3190 obj->oop_iterate(_cm_oop_closure); 3191 statsOnly( ++_objs_scanned ); 3192 check_limits(); 3193 } 3194 3195 // Closure for iteration over bitmaps 3196 class CMBitMapClosure : public BitMapClosure { 3197 private: 3198 // the bitmap that is being iterated over 3199 CMBitMap* _nextMarkBitMap; 3200 ConcurrentMark* _cm; 3201 CMTask* _task; 3202 3203 public: 3204 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3205 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3206 3207 bool do_bit(size_t offset) { 3208 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3209 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3210 assert( addr < _cm->finger(), "invariant"); 3211 3212 statsOnly( _task->increase_objs_found_on_bitmap() ); 3213 assert(addr >= _task->finger(), "invariant"); 3214 3215 // We move that task's local finger along. 3216 _task->move_finger_to(addr); 3217 3218 _task->scan_object(oop(addr)); 3219 // we only partially drain the local queue and global stack 3220 _task->drain_local_queue(true); 3221 _task->drain_global_stack(true); 3222 3223 // if the has_aborted flag has been raised, we need to bail out of 3224 // the iteration 3225 return !_task->has_aborted(); 3226 } 3227 }; 3228 3229 // Closure for iterating over objects, currently only used for 3230 // processing SATB buffers. 3231 class CMObjectClosure : public ObjectClosure { 3232 private: 3233 CMTask* _task; 3234 3235 public: 3236 void do_object(oop obj) { 3237 _task->deal_with_reference(obj); 3238 } 3239 3240 CMObjectClosure(CMTask* task) : _task(task) { } 3241 }; 3242 3243 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3244 ConcurrentMark* cm, 3245 CMTask* task) 3246 : _g1h(g1h), _cm(cm), _task(task) { 3247 assert(_ref_processor == NULL, "should be initialized to NULL"); 3248 3249 if (G1UseConcMarkReferenceProcessing) { 3250 _ref_processor = g1h->ref_processor_cm(); 3251 assert(_ref_processor != NULL, "should not be NULL"); 3252 } 3253 } 3254 3255 void CMTask::setup_for_region(HeapRegion* hr) { 3256 // Separated the asserts so that we know which one fires. 3257 assert(hr != NULL, 3258 "claim_region() should have filtered out continues humongous regions"); 3259 assert(!hr->continuesHumongous(), 3260 "claim_region() should have filtered out continues humongous regions"); 3261 3262 if (_cm->verbose_low()) { 3263 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 3264 _task_id, hr); 3265 } 3266 3267 _curr_region = hr; 3268 _finger = hr->bottom(); 3269 update_region_limit(); 3270 } 3271 3272 void CMTask::update_region_limit() { 3273 HeapRegion* hr = _curr_region; 3274 HeapWord* bottom = hr->bottom(); 3275 HeapWord* limit = hr->next_top_at_mark_start(); 3276 3277 if (limit == bottom) { 3278 if (_cm->verbose_low()) { 3279 gclog_or_tty->print_cr("[%d] found an empty region " 3280 "["PTR_FORMAT", "PTR_FORMAT")", 3281 _task_id, bottom, limit); 3282 } 3283 // The region was collected underneath our feet. 3284 // We set the finger to bottom to ensure that the bitmap 3285 // iteration that will follow this will not do anything. 3286 // (this is not a condition that holds when we set the region up, 3287 // as the region is not supposed to be empty in the first place) 3288 _finger = bottom; 3289 } else if (limit >= _region_limit) { 3290 assert(limit >= _finger, "peace of mind"); 3291 } else { 3292 assert(limit < _region_limit, "only way to get here"); 3293 // This can happen under some pretty unusual circumstances. An 3294 // evacuation pause empties the region underneath our feet (NTAMS 3295 // at bottom). We then do some allocation in the region (NTAMS 3296 // stays at bottom), followed by the region being used as a GC 3297 // alloc region (NTAMS will move to top() and the objects 3298 // originally below it will be grayed). All objects now marked in 3299 // the region are explicitly grayed, if below the global finger, 3300 // and we do not need in fact to scan anything else. So, we simply 3301 // set _finger to be limit to ensure that the bitmap iteration 3302 // doesn't do anything. 3303 _finger = limit; 3304 } 3305 3306 _region_limit = limit; 3307 } 3308 3309 void CMTask::giveup_current_region() { 3310 assert(_curr_region != NULL, "invariant"); 3311 if (_cm->verbose_low()) { 3312 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 3313 _task_id, _curr_region); 3314 } 3315 clear_region_fields(); 3316 } 3317 3318 void CMTask::clear_region_fields() { 3319 // Values for these three fields that indicate that we're not 3320 // holding on to a region. 3321 _curr_region = NULL; 3322 _finger = NULL; 3323 _region_limit = NULL; 3324 } 3325 3326 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3327 if (cm_oop_closure == NULL) { 3328 assert(_cm_oop_closure != NULL, "invariant"); 3329 } else { 3330 assert(_cm_oop_closure == NULL, "invariant"); 3331 } 3332 _cm_oop_closure = cm_oop_closure; 3333 } 3334 3335 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3336 guarantee(nextMarkBitMap != NULL, "invariant"); 3337 3338 if (_cm->verbose_low()) { 3339 gclog_or_tty->print_cr("[%d] resetting", _task_id); 3340 } 3341 3342 _nextMarkBitMap = nextMarkBitMap; 3343 clear_region_fields(); 3344 3345 _calls = 0; 3346 _elapsed_time_ms = 0.0; 3347 _termination_time_ms = 0.0; 3348 _termination_start_time_ms = 0.0; 3349 3350 #if _MARKING_STATS_ 3351 _local_pushes = 0; 3352 _local_pops = 0; 3353 _local_max_size = 0; 3354 _objs_scanned = 0; 3355 _global_pushes = 0; 3356 _global_pops = 0; 3357 _global_max_size = 0; 3358 _global_transfers_to = 0; 3359 _global_transfers_from = 0; 3360 _regions_claimed = 0; 3361 _objs_found_on_bitmap = 0; 3362 _satb_buffers_processed = 0; 3363 _steal_attempts = 0; 3364 _steals = 0; 3365 _aborted = 0; 3366 _aborted_overflow = 0; 3367 _aborted_cm_aborted = 0; 3368 _aborted_yield = 0; 3369 _aborted_timed_out = 0; 3370 _aborted_satb = 0; 3371 _aborted_termination = 0; 3372 #endif // _MARKING_STATS_ 3373 } 3374 3375 bool CMTask::should_exit_termination() { 3376 regular_clock_call(); 3377 // This is called when we are in the termination protocol. We should 3378 // quit if, for some reason, this task wants to abort or the global 3379 // stack is not empty (this means that we can get work from it). 3380 return !_cm->mark_stack_empty() || has_aborted(); 3381 } 3382 3383 void CMTask::reached_limit() { 3384 assert(_words_scanned >= _words_scanned_limit || 3385 _refs_reached >= _refs_reached_limit , 3386 "shouldn't have been called otherwise"); 3387 regular_clock_call(); 3388 } 3389 3390 void CMTask::regular_clock_call() { 3391 if (has_aborted()) return; 3392 3393 // First, we need to recalculate the words scanned and refs reached 3394 // limits for the next clock call. 3395 recalculate_limits(); 3396 3397 // During the regular clock call we do the following 3398 3399 // (1) If an overflow has been flagged, then we abort. 3400 if (_cm->has_overflown()) { 3401 set_has_aborted(); 3402 return; 3403 } 3404 3405 // If we are not concurrent (i.e. we're doing remark) we don't need 3406 // to check anything else. The other steps are only needed during 3407 // the concurrent marking phase. 3408 if (!concurrent()) return; 3409 3410 // (2) If marking has been aborted for Full GC, then we also abort. 3411 if (_cm->has_aborted()) { 3412 set_has_aborted(); 3413 statsOnly( ++_aborted_cm_aborted ); 3414 return; 3415 } 3416 3417 double curr_time_ms = os::elapsedVTime() * 1000.0; 3418 3419 // (3) If marking stats are enabled, then we update the step history. 3420 #if _MARKING_STATS_ 3421 if (_words_scanned >= _words_scanned_limit) { 3422 ++_clock_due_to_scanning; 3423 } 3424 if (_refs_reached >= _refs_reached_limit) { 3425 ++_clock_due_to_marking; 3426 } 3427 3428 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3429 _interval_start_time_ms = curr_time_ms; 3430 _all_clock_intervals_ms.add(last_interval_ms); 3431 3432 if (_cm->verbose_medium()) { 3433 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 3434 "scanned = %d%s, refs reached = %d%s", 3435 _task_id, last_interval_ms, 3436 _words_scanned, 3437 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3438 _refs_reached, 3439 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3440 } 3441 #endif // _MARKING_STATS_ 3442 3443 // (4) We check whether we should yield. If we have to, then we abort. 3444 if (_cm->should_yield()) { 3445 // We should yield. To do this we abort the task. The caller is 3446 // responsible for yielding. 3447 set_has_aborted(); 3448 statsOnly( ++_aborted_yield ); 3449 return; 3450 } 3451 3452 // (5) We check whether we've reached our time quota. If we have, 3453 // then we abort. 3454 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3455 if (elapsed_time_ms > _time_target_ms) { 3456 set_has_aborted(); 3457 _has_timed_out = true; 3458 statsOnly( ++_aborted_timed_out ); 3459 return; 3460 } 3461 3462 // (6) Finally, we check whether there are enough completed STAB 3463 // buffers available for processing. If there are, we abort. 3464 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3465 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3466 if (_cm->verbose_low()) { 3467 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 3468 _task_id); 3469 } 3470 // we do need to process SATB buffers, we'll abort and restart 3471 // the marking task to do so 3472 set_has_aborted(); 3473 statsOnly( ++_aborted_satb ); 3474 return; 3475 } 3476 } 3477 3478 void CMTask::recalculate_limits() { 3479 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3480 _words_scanned_limit = _real_words_scanned_limit; 3481 3482 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3483 _refs_reached_limit = _real_refs_reached_limit; 3484 } 3485 3486 void CMTask::decrease_limits() { 3487 // This is called when we believe that we're going to do an infrequent 3488 // operation which will increase the per byte scanned cost (i.e. move 3489 // entries to/from the global stack). It basically tries to decrease the 3490 // scanning limit so that the clock is called earlier. 3491 3492 if (_cm->verbose_medium()) { 3493 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 3494 } 3495 3496 _words_scanned_limit = _real_words_scanned_limit - 3497 3 * words_scanned_period / 4; 3498 _refs_reached_limit = _real_refs_reached_limit - 3499 3 * refs_reached_period / 4; 3500 } 3501 3502 void CMTask::move_entries_to_global_stack() { 3503 // local array where we'll store the entries that will be popped 3504 // from the local queue 3505 oop buffer[global_stack_transfer_size]; 3506 3507 int n = 0; 3508 oop obj; 3509 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3510 buffer[n] = obj; 3511 ++n; 3512 } 3513 3514 if (n > 0) { 3515 // we popped at least one entry from the local queue 3516 3517 statsOnly( ++_global_transfers_to; _local_pops += n ); 3518 3519 if (!_cm->mark_stack_push(buffer, n)) { 3520 if (_cm->verbose_low()) { 3521 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", 3522 _task_id); 3523 } 3524 set_has_aborted(); 3525 } else { 3526 // the transfer was successful 3527 3528 if (_cm->verbose_medium()) { 3529 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 3530 _task_id, n); 3531 } 3532 statsOnly( int tmp_size = _cm->mark_stack_size(); 3533 if (tmp_size > _global_max_size) { 3534 _global_max_size = tmp_size; 3535 } 3536 _global_pushes += n ); 3537 } 3538 } 3539 3540 // this operation was quite expensive, so decrease the limits 3541 decrease_limits(); 3542 } 3543 3544 void CMTask::get_entries_from_global_stack() { 3545 // local array where we'll store the entries that will be popped 3546 // from the global stack. 3547 oop buffer[global_stack_transfer_size]; 3548 int n; 3549 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3550 assert(n <= global_stack_transfer_size, 3551 "we should not pop more than the given limit"); 3552 if (n > 0) { 3553 // yes, we did actually pop at least one entry 3554 3555 statsOnly( ++_global_transfers_from; _global_pops += n ); 3556 if (_cm->verbose_medium()) { 3557 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 3558 _task_id, n); 3559 } 3560 for (int i = 0; i < n; ++i) { 3561 bool success = _task_queue->push(buffer[i]); 3562 // We only call this when the local queue is empty or under a 3563 // given target limit. So, we do not expect this push to fail. 3564 assert(success, "invariant"); 3565 } 3566 3567 statsOnly( int tmp_size = _task_queue->size(); 3568 if (tmp_size > _local_max_size) { 3569 _local_max_size = tmp_size; 3570 } 3571 _local_pushes += n ); 3572 } 3573 3574 // this operation was quite expensive, so decrease the limits 3575 decrease_limits(); 3576 } 3577 3578 void CMTask::drain_local_queue(bool partially) { 3579 if (has_aborted()) return; 3580 3581 // Decide what the target size is, depending whether we're going to 3582 // drain it partially (so that other tasks can steal if they run out 3583 // of things to do) or totally (at the very end). 3584 size_t target_size; 3585 if (partially) { 3586 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3587 } else { 3588 target_size = 0; 3589 } 3590 3591 if (_task_queue->size() > target_size) { 3592 if (_cm->verbose_high()) { 3593 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 3594 _task_id, target_size); 3595 } 3596 3597 oop obj; 3598 bool ret = _task_queue->pop_local(obj); 3599 while (ret) { 3600 statsOnly( ++_local_pops ); 3601 3602 if (_cm->verbose_high()) { 3603 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 3604 (void*) obj); 3605 } 3606 3607 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3608 assert(!_g1h->is_on_master_free_list( 3609 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3610 3611 scan_object(obj); 3612 3613 if (_task_queue->size() <= target_size || has_aborted()) { 3614 ret = false; 3615 } else { 3616 ret = _task_queue->pop_local(obj); 3617 } 3618 } 3619 3620 if (_cm->verbose_high()) { 3621 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 3622 _task_id, _task_queue->size()); 3623 } 3624 } 3625 } 3626 3627 void CMTask::drain_global_stack(bool partially) { 3628 if (has_aborted()) return; 3629 3630 // We have a policy to drain the local queue before we attempt to 3631 // drain the global stack. 3632 assert(partially || _task_queue->size() == 0, "invariant"); 3633 3634 // Decide what the target size is, depending whether we're going to 3635 // drain it partially (so that other tasks can steal if they run out 3636 // of things to do) or totally (at the very end). Notice that, 3637 // because we move entries from the global stack in chunks or 3638 // because another task might be doing the same, we might in fact 3639 // drop below the target. But, this is not a problem. 3640 size_t target_size; 3641 if (partially) { 3642 target_size = _cm->partial_mark_stack_size_target(); 3643 } else { 3644 target_size = 0; 3645 } 3646 3647 if (_cm->mark_stack_size() > target_size) { 3648 if (_cm->verbose_low()) { 3649 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 3650 _task_id, target_size); 3651 } 3652 3653 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3654 get_entries_from_global_stack(); 3655 drain_local_queue(partially); 3656 } 3657 3658 if (_cm->verbose_low()) { 3659 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 3660 _task_id, _cm->mark_stack_size()); 3661 } 3662 } 3663 } 3664 3665 // SATB Queue has several assumptions on whether to call the par or 3666 // non-par versions of the methods. this is why some of the code is 3667 // replicated. We should really get rid of the single-threaded version 3668 // of the code to simplify things. 3669 void CMTask::drain_satb_buffers() { 3670 if (has_aborted()) return; 3671 3672 // We set this so that the regular clock knows that we're in the 3673 // middle of draining buffers and doesn't set the abort flag when it 3674 // notices that SATB buffers are available for draining. It'd be 3675 // very counter productive if it did that. :-) 3676 _draining_satb_buffers = true; 3677 3678 CMObjectClosure oc(this); 3679 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3680 if (G1CollectedHeap::use_parallel_gc_threads()) { 3681 satb_mq_set.set_par_closure(_task_id, &oc); 3682 } else { 3683 satb_mq_set.set_closure(&oc); 3684 } 3685 3686 // This keeps claiming and applying the closure to completed buffers 3687 // until we run out of buffers or we need to abort. 3688 if (G1CollectedHeap::use_parallel_gc_threads()) { 3689 while (!has_aborted() && 3690 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 3691 if (_cm->verbose_medium()) { 3692 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3693 } 3694 statsOnly( ++_satb_buffers_processed ); 3695 regular_clock_call(); 3696 } 3697 } else { 3698 while (!has_aborted() && 3699 satb_mq_set.apply_closure_to_completed_buffer()) { 3700 if (_cm->verbose_medium()) { 3701 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3702 } 3703 statsOnly( ++_satb_buffers_processed ); 3704 regular_clock_call(); 3705 } 3706 } 3707 3708 if (!concurrent() && !has_aborted()) { 3709 // We should only do this during remark. 3710 if (G1CollectedHeap::use_parallel_gc_threads()) { 3711 satb_mq_set.par_iterate_closure_all_threads(_task_id); 3712 } else { 3713 satb_mq_set.iterate_closure_all_threads(); 3714 } 3715 } 3716 3717 _draining_satb_buffers = false; 3718 3719 assert(has_aborted() || 3720 concurrent() || 3721 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3722 3723 if (G1CollectedHeap::use_parallel_gc_threads()) { 3724 satb_mq_set.set_par_closure(_task_id, NULL); 3725 } else { 3726 satb_mq_set.set_closure(NULL); 3727 } 3728 3729 // again, this was a potentially expensive operation, decrease the 3730 // limits to get the regular clock call early 3731 decrease_limits(); 3732 } 3733 3734 void CMTask::print_stats() { 3735 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 3736 _task_id, _calls); 3737 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3738 _elapsed_time_ms, _termination_time_ms); 3739 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3740 _step_times_ms.num(), _step_times_ms.avg(), 3741 _step_times_ms.sd()); 3742 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3743 _step_times_ms.maximum(), _step_times_ms.sum()); 3744 3745 #if _MARKING_STATS_ 3746 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3747 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3748 _all_clock_intervals_ms.sd()); 3749 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3750 _all_clock_intervals_ms.maximum(), 3751 _all_clock_intervals_ms.sum()); 3752 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3753 _clock_due_to_scanning, _clock_due_to_marking); 3754 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3755 _objs_scanned, _objs_found_on_bitmap); 3756 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3757 _local_pushes, _local_pops, _local_max_size); 3758 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3759 _global_pushes, _global_pops, _global_max_size); 3760 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3761 _global_transfers_to,_global_transfers_from); 3762 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 3763 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3764 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3765 _steal_attempts, _steals); 3766 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3767 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3768 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3769 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3770 _aborted_timed_out, _aborted_satb, _aborted_termination); 3771 #endif // _MARKING_STATS_ 3772 } 3773 3774 /***************************************************************************** 3775 3776 The do_marking_step(time_target_ms) method is the building block 3777 of the parallel marking framework. It can be called in parallel 3778 with other invocations of do_marking_step() on different tasks 3779 (but only one per task, obviously) and concurrently with the 3780 mutator threads, or during remark, hence it eliminates the need 3781 for two versions of the code. When called during remark, it will 3782 pick up from where the task left off during the concurrent marking 3783 phase. Interestingly, tasks are also claimable during evacuation 3784 pauses too, since do_marking_step() ensures that it aborts before 3785 it needs to yield. 3786 3787 The data structures that is uses to do marking work are the 3788 following: 3789 3790 (1) Marking Bitmap. If there are gray objects that appear only 3791 on the bitmap (this happens either when dealing with an overflow 3792 or when the initial marking phase has simply marked the roots 3793 and didn't push them on the stack), then tasks claim heap 3794 regions whose bitmap they then scan to find gray objects. A 3795 global finger indicates where the end of the last claimed region 3796 is. A local finger indicates how far into the region a task has 3797 scanned. The two fingers are used to determine how to gray an 3798 object (i.e. whether simply marking it is OK, as it will be 3799 visited by a task in the future, or whether it needs to be also 3800 pushed on a stack). 3801 3802 (2) Local Queue. The local queue of the task which is accessed 3803 reasonably efficiently by the task. Other tasks can steal from 3804 it when they run out of work. Throughout the marking phase, a 3805 task attempts to keep its local queue short but not totally 3806 empty, so that entries are available for stealing by other 3807 tasks. Only when there is no more work, a task will totally 3808 drain its local queue. 3809 3810 (3) Global Mark Stack. This handles local queue overflow. During 3811 marking only sets of entries are moved between it and the local 3812 queues, as access to it requires a mutex and more fine-grain 3813 interaction with it which might cause contention. If it 3814 overflows, then the marking phase should restart and iterate 3815 over the bitmap to identify gray objects. Throughout the marking 3816 phase, tasks attempt to keep the global mark stack at a small 3817 length but not totally empty, so that entries are available for 3818 popping by other tasks. Only when there is no more work, tasks 3819 will totally drain the global mark stack. 3820 3821 (4) SATB Buffer Queue. This is where completed SATB buffers are 3822 made available. Buffers are regularly removed from this queue 3823 and scanned for roots, so that the queue doesn't get too 3824 long. During remark, all completed buffers are processed, as 3825 well as the filled in parts of any uncompleted buffers. 3826 3827 The do_marking_step() method tries to abort when the time target 3828 has been reached. There are a few other cases when the 3829 do_marking_step() method also aborts: 3830 3831 (1) When the marking phase has been aborted (after a Full GC). 3832 3833 (2) When a global overflow (on the global stack) has been 3834 triggered. Before the task aborts, it will actually sync up with 3835 the other tasks to ensure that all the marking data structures 3836 (local queues, stacks, fingers etc.) are re-initialised so that 3837 when do_marking_step() completes, the marking phase can 3838 immediately restart. 3839 3840 (3) When enough completed SATB buffers are available. The 3841 do_marking_step() method only tries to drain SATB buffers right 3842 at the beginning. So, if enough buffers are available, the 3843 marking step aborts and the SATB buffers are processed at 3844 the beginning of the next invocation. 3845 3846 (4) To yield. when we have to yield then we abort and yield 3847 right at the end of do_marking_step(). This saves us from a lot 3848 of hassle as, by yielding we might allow a Full GC. If this 3849 happens then objects will be compacted underneath our feet, the 3850 heap might shrink, etc. We save checking for this by just 3851 aborting and doing the yield right at the end. 3852 3853 From the above it follows that the do_marking_step() method should 3854 be called in a loop (or, otherwise, regularly) until it completes. 3855 3856 If a marking step completes without its has_aborted() flag being 3857 true, it means it has completed the current marking phase (and 3858 also all other marking tasks have done so and have all synced up). 3859 3860 A method called regular_clock_call() is invoked "regularly" (in 3861 sub ms intervals) throughout marking. It is this clock method that 3862 checks all the abort conditions which were mentioned above and 3863 decides when the task should abort. A work-based scheme is used to 3864 trigger this clock method: when the number of object words the 3865 marking phase has scanned or the number of references the marking 3866 phase has visited reach a given limit. Additional invocations to 3867 the method clock have been planted in a few other strategic places 3868 too. The initial reason for the clock method was to avoid calling 3869 vtime too regularly, as it is quite expensive. So, once it was in 3870 place, it was natural to piggy-back all the other conditions on it 3871 too and not constantly check them throughout the code. 3872 3873 *****************************************************************************/ 3874 3875 void CMTask::do_marking_step(double time_target_ms, 3876 bool do_stealing, 3877 bool do_termination) { 3878 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3879 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3880 3881 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3882 assert(_task_queues != NULL, "invariant"); 3883 assert(_task_queue != NULL, "invariant"); 3884 assert(_task_queues->queue(_task_id) == _task_queue, "invariant"); 3885 3886 assert(!_claimed, 3887 "only one thread should claim this task at any one time"); 3888 3889 // OK, this doesn't safeguard again all possible scenarios, as it is 3890 // possible for two threads to set the _claimed flag at the same 3891 // time. But it is only for debugging purposes anyway and it will 3892 // catch most problems. 3893 _claimed = true; 3894 3895 _start_time_ms = os::elapsedVTime() * 1000.0; 3896 statsOnly( _interval_start_time_ms = _start_time_ms ); 3897 3898 double diff_prediction_ms = 3899 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 3900 _time_target_ms = time_target_ms - diff_prediction_ms; 3901 3902 // set up the variables that are used in the work-based scheme to 3903 // call the regular clock method 3904 _words_scanned = 0; 3905 _refs_reached = 0; 3906 recalculate_limits(); 3907 3908 // clear all flags 3909 clear_has_aborted(); 3910 _has_timed_out = false; 3911 _draining_satb_buffers = false; 3912 3913 ++_calls; 3914 3915 if (_cm->verbose_low()) { 3916 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 3917 "target = %1.2lfms >>>>>>>>>>", 3918 _task_id, _calls, _time_target_ms); 3919 } 3920 3921 // Set up the bitmap and oop closures. Anything that uses them is 3922 // eventually called from this method, so it is OK to allocate these 3923 // statically. 3924 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3925 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3926 set_cm_oop_closure(&cm_oop_closure); 3927 3928 if (_cm->has_overflown()) { 3929 // This can happen if the mark stack overflows during a GC pause 3930 // and this task, after a yield point, restarts. We have to abort 3931 // as we need to get into the overflow protocol which happens 3932 // right at the end of this task. 3933 set_has_aborted(); 3934 } 3935 3936 // First drain any available SATB buffers. After this, we will not 3937 // look at SATB buffers before the next invocation of this method. 3938 // If enough completed SATB buffers are queued up, the regular clock 3939 // will abort this task so that it restarts. 3940 drain_satb_buffers(); 3941 // ...then partially drain the local queue and the global stack 3942 drain_local_queue(true); 3943 drain_global_stack(true); 3944 3945 do { 3946 if (!has_aborted() && _curr_region != NULL) { 3947 // This means that we're already holding on to a region. 3948 assert(_finger != NULL, "if region is not NULL, then the finger " 3949 "should not be NULL either"); 3950 3951 // We might have restarted this task after an evacuation pause 3952 // which might have evacuated the region we're holding on to 3953 // underneath our feet. Let's read its limit again to make sure 3954 // that we do not iterate over a region of the heap that 3955 // contains garbage (update_region_limit() will also move 3956 // _finger to the start of the region if it is found empty). 3957 update_region_limit(); 3958 // We will start from _finger not from the start of the region, 3959 // as we might be restarting this task after aborting half-way 3960 // through scanning this region. In this case, _finger points to 3961 // the address where we last found a marked object. If this is a 3962 // fresh region, _finger points to start(). 3963 MemRegion mr = MemRegion(_finger, _region_limit); 3964 3965 if (_cm->verbose_low()) { 3966 gclog_or_tty->print_cr("[%d] we're scanning part " 3967 "["PTR_FORMAT", "PTR_FORMAT") " 3968 "of region "PTR_FORMAT, 3969 _task_id, _finger, _region_limit, _curr_region); 3970 } 3971 3972 // Let's iterate over the bitmap of the part of the 3973 // region that is left. 3974 if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3975 // We successfully completed iterating over the region. Now, 3976 // let's give up the region. 3977 giveup_current_region(); 3978 regular_clock_call(); 3979 } else { 3980 assert(has_aborted(), "currently the only way to do so"); 3981 // The only way to abort the bitmap iteration is to return 3982 // false from the do_bit() method. However, inside the 3983 // do_bit() method we move the _finger to point to the 3984 // object currently being looked at. So, if we bail out, we 3985 // have definitely set _finger to something non-null. 3986 assert(_finger != NULL, "invariant"); 3987 3988 // Region iteration was actually aborted. So now _finger 3989 // points to the address of the object we last scanned. If we 3990 // leave it there, when we restart this task, we will rescan 3991 // the object. It is easy to avoid this. We move the finger by 3992 // enough to point to the next possible object header (the 3993 // bitmap knows by how much we need to move it as it knows its 3994 // granularity). 3995 assert(_finger < _region_limit, "invariant"); 3996 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 3997 // Check if bitmap iteration was aborted while scanning the last object 3998 if (new_finger >= _region_limit) { 3999 giveup_current_region(); 4000 } else { 4001 move_finger_to(new_finger); 4002 } 4003 } 4004 } 4005 // At this point we have either completed iterating over the 4006 // region we were holding on to, or we have aborted. 4007 4008 // We then partially drain the local queue and the global stack. 4009 // (Do we really need this?) 4010 drain_local_queue(true); 4011 drain_global_stack(true); 4012 4013 // Read the note on the claim_region() method on why it might 4014 // return NULL with potentially more regions available for 4015 // claiming and why we have to check out_of_regions() to determine 4016 // whether we're done or not. 4017 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4018 // We are going to try to claim a new region. We should have 4019 // given up on the previous one. 4020 // Separated the asserts so that we know which one fires. 4021 assert(_curr_region == NULL, "invariant"); 4022 assert(_finger == NULL, "invariant"); 4023 assert(_region_limit == NULL, "invariant"); 4024 if (_cm->verbose_low()) { 4025 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 4026 } 4027 HeapRegion* claimed_region = _cm->claim_region(_task_id); 4028 if (claimed_region != NULL) { 4029 // Yes, we managed to claim one 4030 statsOnly( ++_regions_claimed ); 4031 4032 if (_cm->verbose_low()) { 4033 gclog_or_tty->print_cr("[%d] we successfully claimed " 4034 "region "PTR_FORMAT, 4035 _task_id, claimed_region); 4036 } 4037 4038 setup_for_region(claimed_region); 4039 assert(_curr_region == claimed_region, "invariant"); 4040 } 4041 // It is important to call the regular clock here. It might take 4042 // a while to claim a region if, for example, we hit a large 4043 // block of empty regions. So we need to call the regular clock 4044 // method once round the loop to make sure it's called 4045 // frequently enough. 4046 regular_clock_call(); 4047 } 4048 4049 if (!has_aborted() && _curr_region == NULL) { 4050 assert(_cm->out_of_regions(), 4051 "at this point we should be out of regions"); 4052 } 4053 } while ( _curr_region != NULL && !has_aborted()); 4054 4055 if (!has_aborted()) { 4056 // We cannot check whether the global stack is empty, since other 4057 // tasks might be pushing objects to it concurrently. 4058 assert(_cm->out_of_regions(), 4059 "at this point we should be out of regions"); 4060 4061 if (_cm->verbose_low()) { 4062 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 4063 } 4064 4065 // Try to reduce the number of available SATB buffers so that 4066 // remark has less work to do. 4067 drain_satb_buffers(); 4068 } 4069 4070 // Since we've done everything else, we can now totally drain the 4071 // local queue and global stack. 4072 drain_local_queue(false); 4073 drain_global_stack(false); 4074 4075 // Attempt at work stealing from other task's queues. 4076 if (do_stealing && !has_aborted()) { 4077 // We have not aborted. This means that we have finished all that 4078 // we could. Let's try to do some stealing... 4079 4080 // We cannot check whether the global stack is empty, since other 4081 // tasks might be pushing objects to it concurrently. 4082 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4083 "only way to reach here"); 4084 4085 if (_cm->verbose_low()) { 4086 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 4087 } 4088 4089 while (!has_aborted()) { 4090 oop obj; 4091 statsOnly( ++_steal_attempts ); 4092 4093 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 4094 if (_cm->verbose_medium()) { 4095 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 4096 _task_id, (void*) obj); 4097 } 4098 4099 statsOnly( ++_steals ); 4100 4101 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4102 "any stolen object should be marked"); 4103 scan_object(obj); 4104 4105 // And since we're towards the end, let's totally drain the 4106 // local queue and global stack. 4107 drain_local_queue(false); 4108 drain_global_stack(false); 4109 } else { 4110 break; 4111 } 4112 } 4113 } 4114 4115 // If we are about to wrap up and go into termination, check if we 4116 // should raise the overflow flag. 4117 if (do_termination && !has_aborted()) { 4118 if (_cm->force_overflow()->should_force()) { 4119 _cm->set_has_overflown(); 4120 regular_clock_call(); 4121 } 4122 } 4123 4124 // We still haven't aborted. Now, let's try to get into the 4125 // termination protocol. 4126 if (do_termination && !has_aborted()) { 4127 // We cannot check whether the global stack is empty, since other 4128 // tasks might be concurrently pushing objects on it. 4129 // Separated the asserts so that we know which one fires. 4130 assert(_cm->out_of_regions(), "only way to reach here"); 4131 assert(_task_queue->size() == 0, "only way to reach here"); 4132 4133 if (_cm->verbose_low()) { 4134 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 4135 } 4136 4137 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4138 // The CMTask class also extends the TerminatorTerminator class, 4139 // hence its should_exit_termination() method will also decide 4140 // whether to exit the termination protocol or not. 4141 bool finished = _cm->terminator()->offer_termination(this); 4142 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4143 _termination_time_ms += 4144 termination_end_time_ms - _termination_start_time_ms; 4145 4146 if (finished) { 4147 // We're all done. 4148 4149 if (_task_id == 0) { 4150 // let's allow task 0 to do this 4151 if (concurrent()) { 4152 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4153 // we need to set this to false before the next 4154 // safepoint. This way we ensure that the marking phase 4155 // doesn't observe any more heap expansions. 4156 _cm->clear_concurrent_marking_in_progress(); 4157 } 4158 } 4159 4160 // We can now guarantee that the global stack is empty, since 4161 // all other tasks have finished. We separated the guarantees so 4162 // that, if a condition is false, we can immediately find out 4163 // which one. 4164 guarantee(_cm->out_of_regions(), "only way to reach here"); 4165 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4166 guarantee(_task_queue->size() == 0, "only way to reach here"); 4167 guarantee(!_cm->has_overflown(), "only way to reach here"); 4168 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4169 4170 if (_cm->verbose_low()) { 4171 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 4172 } 4173 } else { 4174 // Apparently there's more work to do. Let's abort this task. It 4175 // will restart it and we can hopefully find more things to do. 4176 4177 if (_cm->verbose_low()) { 4178 gclog_or_tty->print_cr("[%d] apparently there is more work to do", 4179 _task_id); 4180 } 4181 4182 set_has_aborted(); 4183 statsOnly( ++_aborted_termination ); 4184 } 4185 } 4186 4187 // Mainly for debugging purposes to make sure that a pointer to the 4188 // closure which was statically allocated in this frame doesn't 4189 // escape it by accident. 4190 set_cm_oop_closure(NULL); 4191 double end_time_ms = os::elapsedVTime() * 1000.0; 4192 double elapsed_time_ms = end_time_ms - _start_time_ms; 4193 // Update the step history. 4194 _step_times_ms.add(elapsed_time_ms); 4195 4196 if (has_aborted()) { 4197 // The task was aborted for some reason. 4198 4199 statsOnly( ++_aborted ); 4200 4201 if (_has_timed_out) { 4202 double diff_ms = elapsed_time_ms - _time_target_ms; 4203 // Keep statistics of how well we did with respect to hitting 4204 // our target only if we actually timed out (if we aborted for 4205 // other reasons, then the results might get skewed). 4206 _marking_step_diffs_ms.add(diff_ms); 4207 } 4208 4209 if (_cm->has_overflown()) { 4210 // This is the interesting one. We aborted because a global 4211 // overflow was raised. This means we have to restart the 4212 // marking phase and start iterating over regions. However, in 4213 // order to do this we have to make sure that all tasks stop 4214 // what they are doing and re-initialise in a safe manner. We 4215 // will achieve this with the use of two barrier sync points. 4216 4217 if (_cm->verbose_low()) { 4218 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 4219 } 4220 4221 _cm->enter_first_sync_barrier(_task_id); 4222 // When we exit this sync barrier we know that all tasks have 4223 // stopped doing marking work. So, it's now safe to 4224 // re-initialise our data structures. At the end of this method, 4225 // task 0 will clear the global data structures. 4226 4227 statsOnly( ++_aborted_overflow ); 4228 4229 // We clear the local state of this task... 4230 clear_region_fields(); 4231 4232 // ...and enter the second barrier. 4233 _cm->enter_second_sync_barrier(_task_id); 4234 // At this point everything has bee re-initialised and we're 4235 // ready to restart. 4236 } 4237 4238 if (_cm->verbose_low()) { 4239 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4240 "elapsed = %1.2lfms <<<<<<<<<<", 4241 _task_id, _time_target_ms, elapsed_time_ms); 4242 if (_cm->has_aborted()) { 4243 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 4244 _task_id); 4245 } 4246 } 4247 } else { 4248 if (_cm->verbose_low()) { 4249 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4250 "elapsed = %1.2lfms <<<<<<<<<<", 4251 _task_id, _time_target_ms, elapsed_time_ms); 4252 } 4253 } 4254 4255 _claimed = false; 4256 } 4257 4258 CMTask::CMTask(int task_id, 4259 ConcurrentMark* cm, 4260 size_t* marked_bytes, 4261 BitMap* card_bm, 4262 CMTaskQueue* task_queue, 4263 CMTaskQueueSet* task_queues) 4264 : _g1h(G1CollectedHeap::heap()), 4265 _task_id(task_id), _cm(cm), 4266 _claimed(false), 4267 _nextMarkBitMap(NULL), _hash_seed(17), 4268 _task_queue(task_queue), 4269 _task_queues(task_queues), 4270 _cm_oop_closure(NULL), 4271 _marked_bytes_array(marked_bytes), 4272 _card_bm(card_bm) { 4273 guarantee(task_queue != NULL, "invariant"); 4274 guarantee(task_queues != NULL, "invariant"); 4275 4276 statsOnly( _clock_due_to_scanning = 0; 4277 _clock_due_to_marking = 0 ); 4278 4279 _marking_step_diffs_ms.add(0.5); 4280 } 4281 4282 // These are formatting macros that are used below to ensure 4283 // consistent formatting. The *_H_* versions are used to format the 4284 // header for a particular value and they should be kept consistent 4285 // with the corresponding macro. Also note that most of the macros add 4286 // the necessary white space (as a prefix) which makes them a bit 4287 // easier to compose. 4288 4289 // All the output lines are prefixed with this string to be able to 4290 // identify them easily in a large log file. 4291 #define G1PPRL_LINE_PREFIX "###" 4292 4293 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4294 #ifdef _LP64 4295 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4296 #else // _LP64 4297 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4298 #endif // _LP64 4299 4300 // For per-region info 4301 #define G1PPRL_TYPE_FORMAT " %-4s" 4302 #define G1PPRL_TYPE_H_FORMAT " %4s" 4303 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4304 #define G1PPRL_BYTE_H_FORMAT " %9s" 4305 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4306 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4307 4308 // For summary info 4309 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4310 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4311 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4312 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4313 4314 G1PrintRegionLivenessInfoClosure:: 4315 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4316 : _out(out), 4317 _total_used_bytes(0), _total_capacity_bytes(0), 4318 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4319 _hum_used_bytes(0), _hum_capacity_bytes(0), 4320 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) { 4321 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4322 MemRegion g1_committed = g1h->g1_committed(); 4323 MemRegion g1_reserved = g1h->g1_reserved(); 4324 double now = os::elapsedTime(); 4325 4326 // Print the header of the output. 4327 _out->cr(); 4328 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4329 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4330 G1PPRL_SUM_ADDR_FORMAT("committed") 4331 G1PPRL_SUM_ADDR_FORMAT("reserved") 4332 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4333 g1_committed.start(), g1_committed.end(), 4334 g1_reserved.start(), g1_reserved.end(), 4335 HeapRegion::GrainBytes); 4336 _out->print_cr(G1PPRL_LINE_PREFIX); 4337 _out->print_cr(G1PPRL_LINE_PREFIX 4338 G1PPRL_TYPE_H_FORMAT 4339 G1PPRL_ADDR_BASE_H_FORMAT 4340 G1PPRL_BYTE_H_FORMAT 4341 G1PPRL_BYTE_H_FORMAT 4342 G1PPRL_BYTE_H_FORMAT 4343 G1PPRL_DOUBLE_H_FORMAT, 4344 "type", "address-range", 4345 "used", "prev-live", "next-live", "gc-eff"); 4346 _out->print_cr(G1PPRL_LINE_PREFIX 4347 G1PPRL_TYPE_H_FORMAT 4348 G1PPRL_ADDR_BASE_H_FORMAT 4349 G1PPRL_BYTE_H_FORMAT 4350 G1PPRL_BYTE_H_FORMAT 4351 G1PPRL_BYTE_H_FORMAT 4352 G1PPRL_DOUBLE_H_FORMAT, 4353 "", "", 4354 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)"); 4355 } 4356 4357 // It takes as a parameter a reference to one of the _hum_* fields, it 4358 // deduces the corresponding value for a region in a humongous region 4359 // series (either the region size, or what's left if the _hum_* field 4360 // is < the region size), and updates the _hum_* field accordingly. 4361 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4362 size_t bytes = 0; 4363 // The > 0 check is to deal with the prev and next live bytes which 4364 // could be 0. 4365 if (*hum_bytes > 0) { 4366 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4367 *hum_bytes -= bytes; 4368 } 4369 return bytes; 4370 } 4371 4372 // It deduces the values for a region in a humongous region series 4373 // from the _hum_* fields and updates those accordingly. It assumes 4374 // that that _hum_* fields have already been set up from the "starts 4375 // humongous" region and we visit the regions in address order. 4376 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4377 size_t* capacity_bytes, 4378 size_t* prev_live_bytes, 4379 size_t* next_live_bytes) { 4380 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4381 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4382 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4383 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4384 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4385 } 4386 4387 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4388 const char* type = ""; 4389 HeapWord* bottom = r->bottom(); 4390 HeapWord* end = r->end(); 4391 size_t capacity_bytes = r->capacity(); 4392 size_t used_bytes = r->used(); 4393 size_t prev_live_bytes = r->live_bytes(); 4394 size_t next_live_bytes = r->next_live_bytes(); 4395 double gc_eff = r->gc_efficiency(); 4396 if (r->used() == 0) { 4397 type = "FREE"; 4398 } else if (r->is_survivor()) { 4399 type = "SURV"; 4400 } else if (r->is_young()) { 4401 type = "EDEN"; 4402 } else if (r->startsHumongous()) { 4403 type = "HUMS"; 4404 4405 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4406 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4407 "they should have been zeroed after the last time we used them"); 4408 // Set up the _hum_* fields. 4409 _hum_capacity_bytes = capacity_bytes; 4410 _hum_used_bytes = used_bytes; 4411 _hum_prev_live_bytes = prev_live_bytes; 4412 _hum_next_live_bytes = next_live_bytes; 4413 get_hum_bytes(&used_bytes, &capacity_bytes, 4414 &prev_live_bytes, &next_live_bytes); 4415 end = bottom + HeapRegion::GrainWords; 4416 } else if (r->continuesHumongous()) { 4417 type = "HUMC"; 4418 get_hum_bytes(&used_bytes, &capacity_bytes, 4419 &prev_live_bytes, &next_live_bytes); 4420 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4421 } else { 4422 type = "OLD"; 4423 } 4424 4425 _total_used_bytes += used_bytes; 4426 _total_capacity_bytes += capacity_bytes; 4427 _total_prev_live_bytes += prev_live_bytes; 4428 _total_next_live_bytes += next_live_bytes; 4429 4430 // Print a line for this particular region. 4431 _out->print_cr(G1PPRL_LINE_PREFIX 4432 G1PPRL_TYPE_FORMAT 4433 G1PPRL_ADDR_BASE_FORMAT 4434 G1PPRL_BYTE_FORMAT 4435 G1PPRL_BYTE_FORMAT 4436 G1PPRL_BYTE_FORMAT 4437 G1PPRL_DOUBLE_FORMAT, 4438 type, bottom, end, 4439 used_bytes, prev_live_bytes, next_live_bytes, gc_eff); 4440 4441 return false; 4442 } 4443 4444 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4445 // Print the footer of the output. 4446 _out->print_cr(G1PPRL_LINE_PREFIX); 4447 _out->print_cr(G1PPRL_LINE_PREFIX 4448 " SUMMARY" 4449 G1PPRL_SUM_MB_FORMAT("capacity") 4450 G1PPRL_SUM_MB_PERC_FORMAT("used") 4451 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4452 G1PPRL_SUM_MB_PERC_FORMAT("next-live"), 4453 bytes_to_mb(_total_capacity_bytes), 4454 bytes_to_mb(_total_used_bytes), 4455 perc(_total_used_bytes, _total_capacity_bytes), 4456 bytes_to_mb(_total_prev_live_bytes), 4457 perc(_total_prev_live_bytes, _total_capacity_bytes), 4458 bytes_to_mb(_total_next_live_bytes), 4459 perc(_total_next_live_bytes, _total_capacity_bytes)); 4460 _out->cr(); 4461 }