1 /* 2 * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 32 #include "gc_implementation/g1/g1Log.hpp" 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 34 #include "gc_implementation/g1/g1RemSet.hpp" 35 #include "gc_implementation/g1/heapRegion.inline.hpp" 36 #include "gc_implementation/g1/heapRegionRemSet.hpp" 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 38 #include "gc_implementation/shared/vmGCOperations.hpp" 39 #include "memory/genOopClosures.inline.hpp" 40 #include "memory/referencePolicy.hpp" 41 #include "memory/resourceArea.hpp" 42 #include "oops/oop.inline.hpp" 43 #include "runtime/handles.inline.hpp" 44 #include "runtime/java.hpp" 45 46 // Concurrent marking bit map wrapper 47 48 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) : 49 _bm((uintptr_t*)NULL,0), 50 _shifter(shifter) { 51 _bmStartWord = (HeapWord*)(rs.base()); 52 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 53 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 54 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 55 56 guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map"); 57 // For now we'll just commit all of the bit map up fromt. 58 // Later on we'll try to be more parsimonious with swap. 59 guarantee(_virtual_space.initialize(brs, brs.size()), 60 "couldn't reseve backing store for concurrent marking bit map"); 61 assert(_virtual_space.committed_size() == brs.size(), 62 "didn't reserve backing store for all of concurrent marking bit map?"); 63 _bm.set_map((uintptr_t*)_virtual_space.low()); 64 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 65 _bmWordSize, "inconsistency in bit map sizing"); 66 _bm.set_size(_bmWordSize >> _shifter); 67 } 68 69 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 70 HeapWord* limit) const { 71 // First we must round addr *up* to a possible object boundary. 72 addr = (HeapWord*)align_size_up((intptr_t)addr, 73 HeapWordSize << _shifter); 74 size_t addrOffset = heapWordToOffset(addr); 75 if (limit == NULL) { 76 limit = _bmStartWord + _bmWordSize; 77 } 78 size_t limitOffset = heapWordToOffset(limit); 79 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 80 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 81 assert(nextAddr >= addr, "get_next_one postcondition"); 82 assert(nextAddr == limit || isMarked(nextAddr), 83 "get_next_one postcondition"); 84 return nextAddr; 85 } 86 87 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 88 HeapWord* limit) const { 89 size_t addrOffset = heapWordToOffset(addr); 90 if (limit == NULL) { 91 limit = _bmStartWord + _bmWordSize; 92 } 93 size_t limitOffset = heapWordToOffset(limit); 94 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 95 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 96 assert(nextAddr >= addr, "get_next_one postcondition"); 97 assert(nextAddr == limit || !isMarked(nextAddr), 98 "get_next_one postcondition"); 99 return nextAddr; 100 } 101 102 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 103 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 104 return (int) (diff >> _shifter); 105 } 106 107 #ifndef PRODUCT 108 bool CMBitMapRO::covers(ReservedSpace rs) const { 109 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 110 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize, 111 "size inconsistency"); 112 return _bmStartWord == (HeapWord*)(rs.base()) && 113 _bmWordSize == rs.size()>>LogHeapWordSize; 114 } 115 #endif 116 117 void CMBitMap::clearAll() { 118 _bm.clear(); 119 return; 120 } 121 122 void CMBitMap::markRange(MemRegion mr) { 123 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 124 assert(!mr.is_empty(), "unexpected empty region"); 125 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 126 ((HeapWord *) mr.end())), 127 "markRange memory region end is not card aligned"); 128 // convert address range into offset range 129 _bm.at_put_range(heapWordToOffset(mr.start()), 130 heapWordToOffset(mr.end()), true); 131 } 132 133 void CMBitMap::clearRange(MemRegion mr) { 134 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 135 assert(!mr.is_empty(), "unexpected empty region"); 136 // convert address range into offset range 137 _bm.at_put_range(heapWordToOffset(mr.start()), 138 heapWordToOffset(mr.end()), false); 139 } 140 141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 142 HeapWord* end_addr) { 143 HeapWord* start = getNextMarkedWordAddress(addr); 144 start = MIN2(start, end_addr); 145 HeapWord* end = getNextUnmarkedWordAddress(start); 146 end = MIN2(end, end_addr); 147 assert(start <= end, "Consistency check"); 148 MemRegion mr(start, end); 149 if (!mr.is_empty()) { 150 clearRange(mr); 151 } 152 return mr; 153 } 154 155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 156 _base(NULL), _cm(cm) 157 #ifdef ASSERT 158 , _drain_in_progress(false) 159 , _drain_in_progress_yields(false) 160 #endif 161 {} 162 163 void CMMarkStack::allocate(size_t size) { 164 _base = NEW_C_HEAP_ARRAY(oop, size); 165 if (_base == NULL) { 166 vm_exit_during_initialization("Failed to allocate CM region mark stack"); 167 } 168 _index = 0; 169 _capacity = (jint) size; 170 _saved_index = -1; 171 NOT_PRODUCT(_max_depth = 0); 172 } 173 174 CMMarkStack::~CMMarkStack() { 175 if (_base != NULL) { 176 FREE_C_HEAP_ARRAY(oop, _base); 177 } 178 } 179 180 void CMMarkStack::par_push(oop ptr) { 181 while (true) { 182 if (isFull()) { 183 _overflow = true; 184 return; 185 } 186 // Otherwise... 187 jint index = _index; 188 jint next_index = index+1; 189 jint res = Atomic::cmpxchg(next_index, &_index, index); 190 if (res == index) { 191 _base[index] = ptr; 192 // Note that we don't maintain this atomically. We could, but it 193 // doesn't seem necessary. 194 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 195 return; 196 } 197 // Otherwise, we need to try again. 198 } 199 } 200 201 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 202 while (true) { 203 if (isFull()) { 204 _overflow = true; 205 return; 206 } 207 // Otherwise... 208 jint index = _index; 209 jint next_index = index + n; 210 if (next_index > _capacity) { 211 _overflow = true; 212 return; 213 } 214 jint res = Atomic::cmpxchg(next_index, &_index, index); 215 if (res == index) { 216 for (int i = 0; i < n; i++) { 217 int ind = index + i; 218 assert(ind < _capacity, "By overflow test above."); 219 _base[ind] = ptr_arr[i]; 220 } 221 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 222 return; 223 } 224 // Otherwise, we need to try again. 225 } 226 } 227 228 229 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 230 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 231 jint start = _index; 232 jint next_index = start + n; 233 if (next_index > _capacity) { 234 _overflow = true; 235 return; 236 } 237 // Otherwise. 238 _index = next_index; 239 for (int i = 0; i < n; i++) { 240 int ind = start + i; 241 assert(ind < _capacity, "By overflow test above."); 242 _base[ind] = ptr_arr[i]; 243 } 244 } 245 246 247 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 248 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 249 jint index = _index; 250 if (index == 0) { 251 *n = 0; 252 return false; 253 } else { 254 int k = MIN2(max, index); 255 jint new_ind = index - k; 256 for (int j = 0; j < k; j++) { 257 ptr_arr[j] = _base[new_ind + j]; 258 } 259 _index = new_ind; 260 *n = k; 261 return true; 262 } 263 } 264 265 template<class OopClosureClass> 266 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 267 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 268 || SafepointSynchronize::is_at_safepoint(), 269 "Drain recursion must be yield-safe."); 270 bool res = true; 271 debug_only(_drain_in_progress = true); 272 debug_only(_drain_in_progress_yields = yield_after); 273 while (!isEmpty()) { 274 oop newOop = pop(); 275 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 276 assert(newOop->is_oop(), "Expected an oop"); 277 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 278 "only grey objects on this stack"); 279 newOop->oop_iterate(cl); 280 if (yield_after && _cm->do_yield_check()) { 281 res = false; 282 break; 283 } 284 } 285 debug_only(_drain_in_progress = false); 286 return res; 287 } 288 289 void CMMarkStack::note_start_of_gc() { 290 assert(_saved_index == -1, 291 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 292 _saved_index = _index; 293 } 294 295 void CMMarkStack::note_end_of_gc() { 296 // This is intentionally a guarantee, instead of an assert. If we 297 // accidentally add something to the mark stack during GC, it 298 // will be a correctness issue so it's better if we crash. we'll 299 // only check this once per GC anyway, so it won't be a performance 300 // issue in any way. 301 guarantee(_saved_index == _index, 302 err_msg("saved index: %d index: %d", _saved_index, _index)); 303 _saved_index = -1; 304 } 305 306 void CMMarkStack::oops_do(OopClosure* f) { 307 assert(_saved_index == _index, 308 err_msg("saved index: %d index: %d", _saved_index, _index)); 309 for (int i = 0; i < _index; i += 1) { 310 f->do_oop(&_base[i]); 311 } 312 } 313 314 bool ConcurrentMark::not_yet_marked(oop obj) const { 315 return (_g1h->is_obj_ill(obj) 316 || (_g1h->is_in_permanent(obj) 317 && !nextMarkBitMap()->isMarked((HeapWord*)obj))); 318 } 319 320 CMRootRegions::CMRootRegions() : 321 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 322 _should_abort(false), _next_survivor(NULL) { } 323 324 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 325 _young_list = g1h->young_list(); 326 _cm = cm; 327 } 328 329 void CMRootRegions::prepare_for_scan() { 330 assert(!scan_in_progress(), "pre-condition"); 331 332 // Currently, only survivors can be root regions. 333 assert(_next_survivor == NULL, "pre-condition"); 334 _next_survivor = _young_list->first_survivor_region(); 335 _scan_in_progress = (_next_survivor != NULL); 336 _should_abort = false; 337 } 338 339 HeapRegion* CMRootRegions::claim_next() { 340 if (_should_abort) { 341 // If someone has set the should_abort flag, we return NULL to 342 // force the caller to bail out of their loop. 343 return NULL; 344 } 345 346 // Currently, only survivors can be root regions. 347 HeapRegion* res = _next_survivor; 348 if (res != NULL) { 349 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 350 // Read it again in case it changed while we were waiting for the lock. 351 res = _next_survivor; 352 if (res != NULL) { 353 if (res == _young_list->last_survivor_region()) { 354 // We just claimed the last survivor so store NULL to indicate 355 // that we're done. 356 _next_survivor = NULL; 357 } else { 358 _next_survivor = res->get_next_young_region(); 359 } 360 } else { 361 // Someone else claimed the last survivor while we were trying 362 // to take the lock so nothing else to do. 363 } 364 } 365 assert(res == NULL || res->is_survivor(), "post-condition"); 366 367 return res; 368 } 369 370 void CMRootRegions::scan_finished() { 371 assert(scan_in_progress(), "pre-condition"); 372 373 // Currently, only survivors can be root regions. 374 if (!_should_abort) { 375 assert(_next_survivor == NULL, "we should have claimed all survivors"); 376 } 377 _next_survivor = NULL; 378 379 { 380 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 381 _scan_in_progress = false; 382 RootRegionScan_lock->notify_all(); 383 } 384 } 385 386 bool CMRootRegions::wait_until_scan_finished() { 387 if (!scan_in_progress()) return false; 388 389 { 390 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 391 while (scan_in_progress()) { 392 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 393 } 394 } 395 return true; 396 } 397 398 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 399 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 400 #endif // _MSC_VER 401 402 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 403 return MAX2((n_par_threads + 2) / 4, 1U); 404 } 405 406 ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) : 407 _markBitMap1(rs, MinObjAlignment - 1), 408 _markBitMap2(rs, MinObjAlignment - 1), 409 410 _parallel_marking_threads(0), 411 _max_parallel_marking_threads(0), 412 _sleep_factor(0.0), 413 _marking_task_overhead(1.0), 414 _cleanup_sleep_factor(0.0), 415 _cleanup_task_overhead(1.0), 416 _cleanup_list("Cleanup List"), 417 _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/), 418 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 419 CardTableModRefBS::card_shift, 420 false /* in_resource_area*/), 421 422 _prevMarkBitMap(&_markBitMap1), 423 _nextMarkBitMap(&_markBitMap2), 424 425 _markStack(this), 426 // _finger set in set_non_marking_state 427 428 _max_task_num(MAX2((uint)ParallelGCThreads, 1U)), 429 // _active_tasks set in set_non_marking_state 430 // _tasks set inside the constructor 431 _task_queues(new CMTaskQueueSet((int) _max_task_num)), 432 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 433 434 _has_overflown(false), 435 _concurrent(false), 436 _has_aborted(false), 437 _restart_for_overflow(false), 438 _concurrent_marking_in_progress(false), 439 440 // _verbose_level set below 441 442 _init_times(), 443 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 444 _cleanup_times(), 445 _total_counting_time(0.0), 446 _total_rs_scrub_time(0.0), 447 448 _parallel_workers(NULL), 449 450 _count_card_bitmaps(NULL), 451 _count_marked_bytes(NULL) { 452 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 453 if (verbose_level < no_verbose) { 454 verbose_level = no_verbose; 455 } 456 if (verbose_level > high_verbose) { 457 verbose_level = high_verbose; 458 } 459 _verbose_level = verbose_level; 460 461 if (verbose_low()) { 462 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 463 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 464 } 465 466 _markStack.allocate(MarkStackSize); 467 468 // Create & start a ConcurrentMark thread. 469 _cmThread = new ConcurrentMarkThread(this); 470 assert(cmThread() != NULL, "CM Thread should have been created"); 471 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 472 473 _g1h = G1CollectedHeap::heap(); 474 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 475 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 476 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 477 478 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 479 satb_qs.set_buffer_size(G1SATBBufferSize); 480 481 _root_regions.init(_g1h, this); 482 483 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num); 484 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num); 485 486 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_task_num); 487 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num); 488 489 BitMap::idx_t card_bm_size = _card_bm.size(); 490 491 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 492 _active_tasks = _max_task_num; 493 for (int i = 0; i < (int) _max_task_num; ++i) { 494 CMTaskQueue* task_queue = new CMTaskQueue(); 495 task_queue->initialize(); 496 _task_queues->register_queue(i, task_queue); 497 498 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 499 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions); 500 501 _tasks[i] = new CMTask(i, this, 502 _count_marked_bytes[i], 503 &_count_card_bitmaps[i], 504 task_queue, _task_queues); 505 506 _accum_task_vtime[i] = 0.0; 507 } 508 509 // Calculate the card number for the bottom of the heap. Used 510 // in biasing indexes into the accounting card bitmaps. 511 _heap_bottom_card_num = 512 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 513 CardTableModRefBS::card_shift); 514 515 // Clear all the liveness counting data 516 clear_all_count_data(); 517 518 if (ConcGCThreads > ParallelGCThreads) { 519 vm_exit_during_initialization("Can't have more ConcGCThreads " 520 "than ParallelGCThreads."); 521 } 522 if (ParallelGCThreads == 0) { 523 // if we are not running with any parallel GC threads we will not 524 // spawn any marking threads either 525 _parallel_marking_threads = 0; 526 _max_parallel_marking_threads = 0; 527 _sleep_factor = 0.0; 528 _marking_task_overhead = 1.0; 529 } else { 530 if (ConcGCThreads > 0) { 531 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 532 // if both are set 533 534 _parallel_marking_threads = (uint) ConcGCThreads; 535 _max_parallel_marking_threads = _parallel_marking_threads; 536 _sleep_factor = 0.0; 537 _marking_task_overhead = 1.0; 538 } else if (G1MarkingOverheadPercent > 0) { 539 // we will calculate the number of parallel marking threads 540 // based on a target overhead with respect to the soft real-time 541 // goal 542 543 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 544 double overall_cm_overhead = 545 (double) MaxGCPauseMillis * marking_overhead / 546 (double) GCPauseIntervalMillis; 547 double cpu_ratio = 1.0 / (double) os::processor_count(); 548 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 549 double marking_task_overhead = 550 overall_cm_overhead / marking_thread_num * 551 (double) os::processor_count(); 552 double sleep_factor = 553 (1.0 - marking_task_overhead) / marking_task_overhead; 554 555 _parallel_marking_threads = (uint) marking_thread_num; 556 _max_parallel_marking_threads = _parallel_marking_threads; 557 _sleep_factor = sleep_factor; 558 _marking_task_overhead = marking_task_overhead; 559 } else { 560 _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads); 561 _max_parallel_marking_threads = _parallel_marking_threads; 562 _sleep_factor = 0.0; 563 _marking_task_overhead = 1.0; 564 } 565 566 if (parallel_marking_threads() > 1) { 567 _cleanup_task_overhead = 1.0; 568 } else { 569 _cleanup_task_overhead = marking_task_overhead(); 570 } 571 _cleanup_sleep_factor = 572 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 573 574 #if 0 575 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 576 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 577 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 578 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 579 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 580 #endif 581 582 guarantee(parallel_marking_threads() > 0, "peace of mind"); 583 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 584 _max_parallel_marking_threads, false, true); 585 if (_parallel_workers == NULL) { 586 vm_exit_during_initialization("Failed necessary allocation."); 587 } else { 588 _parallel_workers->initialize_workers(); 589 } 590 } 591 592 // so that the call below can read a sensible value 593 _heap_start = (HeapWord*) rs.base(); 594 set_non_marking_state(); 595 } 596 597 void ConcurrentMark::update_g1_committed(bool force) { 598 // If concurrent marking is not in progress, then we do not need to 599 // update _heap_end. 600 if (!concurrent_marking_in_progress() && !force) return; 601 602 MemRegion committed = _g1h->g1_committed(); 603 assert(committed.start() == _heap_start, "start shouldn't change"); 604 HeapWord* new_end = committed.end(); 605 if (new_end > _heap_end) { 606 // The heap has been expanded. 607 608 _heap_end = new_end; 609 } 610 // Notice that the heap can also shrink. However, this only happens 611 // during a Full GC (at least currently) and the entire marking 612 // phase will bail out and the task will not be restarted. So, let's 613 // do nothing. 614 } 615 616 void ConcurrentMark::reset() { 617 // Starting values for these two. This should be called in a STW 618 // phase. CM will be notified of any future g1_committed expansions 619 // will be at the end of evacuation pauses, when tasks are 620 // inactive. 621 MemRegion committed = _g1h->g1_committed(); 622 _heap_start = committed.start(); 623 _heap_end = committed.end(); 624 625 // Separated the asserts so that we know which one fires. 626 assert(_heap_start != NULL, "heap bounds should look ok"); 627 assert(_heap_end != NULL, "heap bounds should look ok"); 628 assert(_heap_start < _heap_end, "heap bounds should look ok"); 629 630 // reset all the marking data structures and any necessary flags 631 clear_marking_state(); 632 633 if (verbose_low()) { 634 gclog_or_tty->print_cr("[global] resetting"); 635 } 636 637 // We do reset all of them, since different phases will use 638 // different number of active threads. So, it's easiest to have all 639 // of them ready. 640 for (int i = 0; i < (int) _max_task_num; ++i) { 641 _tasks[i]->reset(_nextMarkBitMap); 642 } 643 644 // we need this to make sure that the flag is on during the evac 645 // pause with initial mark piggy-backed 646 set_concurrent_marking_in_progress(); 647 } 648 649 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) { 650 assert(active_tasks <= _max_task_num, "we should not have more"); 651 652 _active_tasks = active_tasks; 653 // Need to update the three data structures below according to the 654 // number of active threads for this phase. 655 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 656 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 657 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 658 659 _concurrent = concurrent; 660 // We propagate this to all tasks, not just the active ones. 661 for (int i = 0; i < (int) _max_task_num; ++i) 662 _tasks[i]->set_concurrent(concurrent); 663 664 if (concurrent) { 665 set_concurrent_marking_in_progress(); 666 } else { 667 // We currently assume that the concurrent flag has been set to 668 // false before we start remark. At this point we should also be 669 // in a STW phase. 670 assert(!concurrent_marking_in_progress(), "invariant"); 671 assert(_finger == _heap_end, "only way to get here"); 672 update_g1_committed(true); 673 } 674 } 675 676 void ConcurrentMark::set_non_marking_state() { 677 // We set the global marking state to some default values when we're 678 // not doing marking. 679 clear_marking_state(); 680 _active_tasks = 0; 681 clear_concurrent_marking_in_progress(); 682 } 683 684 ConcurrentMark::~ConcurrentMark() { 685 // The ConcurrentMark instance is never freed. 686 ShouldNotReachHere(); 687 } 688 689 void ConcurrentMark::clearNextBitmap() { 690 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 691 G1CollectorPolicy* g1p = g1h->g1_policy(); 692 693 // Make sure that the concurrent mark thread looks to still be in 694 // the current cycle. 695 guarantee(cmThread()->during_cycle(), "invariant"); 696 697 // We are finishing up the current cycle by clearing the next 698 // marking bitmap and getting it ready for the next cycle. During 699 // this time no other cycle can start. So, let's make sure that this 700 // is the case. 701 guarantee(!g1h->mark_in_progress(), "invariant"); 702 703 // clear the mark bitmap (no grey objects to start with). 704 // We need to do this in chunks and offer to yield in between 705 // each chunk. 706 HeapWord* start = _nextMarkBitMap->startWord(); 707 HeapWord* end = _nextMarkBitMap->endWord(); 708 HeapWord* cur = start; 709 size_t chunkSize = M; 710 while (cur < end) { 711 HeapWord* next = cur + chunkSize; 712 if (next > end) { 713 next = end; 714 } 715 MemRegion mr(cur,next); 716 _nextMarkBitMap->clearRange(mr); 717 cur = next; 718 do_yield_check(); 719 720 // Repeat the asserts from above. We'll do them as asserts here to 721 // minimize their overhead on the product. However, we'll have 722 // them as guarantees at the beginning / end of the bitmap 723 // clearing to get some checking in the product. 724 assert(cmThread()->during_cycle(), "invariant"); 725 assert(!g1h->mark_in_progress(), "invariant"); 726 } 727 728 // Clear the liveness counting data 729 clear_all_count_data(); 730 731 // Repeat the asserts from above. 732 guarantee(cmThread()->during_cycle(), "invariant"); 733 guarantee(!g1h->mark_in_progress(), "invariant"); 734 } 735 736 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 737 public: 738 bool doHeapRegion(HeapRegion* r) { 739 if (!r->continuesHumongous()) { 740 r->note_start_of_marking(); 741 } 742 return false; 743 } 744 }; 745 746 void ConcurrentMark::checkpointRootsInitialPre() { 747 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 748 G1CollectorPolicy* g1p = g1h->g1_policy(); 749 750 _has_aborted = false; 751 752 #ifndef PRODUCT 753 if (G1PrintReachableAtInitialMark) { 754 print_reachable("at-cycle-start", 755 VerifyOption_G1UsePrevMarking, true /* all */); 756 } 757 #endif 758 759 // Initialise marking structures. This has to be done in a STW phase. 760 reset(); 761 762 // For each region note start of marking. 763 NoteStartOfMarkHRClosure startcl; 764 g1h->heap_region_iterate(&startcl); 765 } 766 767 768 void ConcurrentMark::checkpointRootsInitialPost() { 769 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 770 771 // If we force an overflow during remark, the remark operation will 772 // actually abort and we'll restart concurrent marking. If we always 773 // force an oveflow during remark we'll never actually complete the 774 // marking phase. So, we initilize this here, at the start of the 775 // cycle, so that at the remaining overflow number will decrease at 776 // every remark and we'll eventually not need to cause one. 777 force_overflow_stw()->init(); 778 779 // Start Concurrent Marking weak-reference discovery. 780 ReferenceProcessor* rp = g1h->ref_processor_cm(); 781 // enable ("weak") refs discovery 782 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 783 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 784 785 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 786 // This is the start of the marking cycle, we're expected all 787 // threads to have SATB queues with active set to false. 788 satb_mq_set.set_active_all_threads(true, /* new active value */ 789 false /* expected_active */); 790 791 _root_regions.prepare_for_scan(); 792 793 // update_g1_committed() will be called at the end of an evac pause 794 // when marking is on. So, it's also called at the end of the 795 // initial-mark pause to update the heap end, if the heap expands 796 // during it. No need to call it here. 797 } 798 799 /* 800 * Notice that in the next two methods, we actually leave the STS 801 * during the barrier sync and join it immediately afterwards. If we 802 * do not do this, the following deadlock can occur: one thread could 803 * be in the barrier sync code, waiting for the other thread to also 804 * sync up, whereas another one could be trying to yield, while also 805 * waiting for the other threads to sync up too. 806 * 807 * Note, however, that this code is also used during remark and in 808 * this case we should not attempt to leave / enter the STS, otherwise 809 * we'll either hit an asseert (debug / fastdebug) or deadlock 810 * (product). So we should only leave / enter the STS if we are 811 * operating concurrently. 812 * 813 * Because the thread that does the sync barrier has left the STS, it 814 * is possible to be suspended for a Full GC or an evacuation pause 815 * could occur. This is actually safe, since the entering the sync 816 * barrier is one of the last things do_marking_step() does, and it 817 * doesn't manipulate any data structures afterwards. 818 */ 819 820 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 821 if (verbose_low()) { 822 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 823 } 824 825 if (concurrent()) { 826 ConcurrentGCThread::stsLeave(); 827 } 828 _first_overflow_barrier_sync.enter(); 829 if (concurrent()) { 830 ConcurrentGCThread::stsJoin(); 831 } 832 // at this point everyone should have synced up and not be doing any 833 // more work 834 835 if (verbose_low()) { 836 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 837 } 838 839 // let task 0 do this 840 if (task_num == 0) { 841 // task 0 is responsible for clearing the global data structures 842 // We should be here because of an overflow. During STW we should 843 // not clear the overflow flag since we rely on it being true when 844 // we exit this method to abort the pause and restart concurent 845 // marking. 846 clear_marking_state(concurrent() /* clear_overflow */); 847 force_overflow()->update(); 848 849 if (G1Log::fine()) { 850 gclog_or_tty->date_stamp(PrintGCDateStamps); 851 gclog_or_tty->stamp(PrintGCTimeStamps); 852 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 853 } 854 } 855 856 // after this, each task should reset its own data structures then 857 // then go into the second barrier 858 } 859 860 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 861 if (verbose_low()) { 862 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 863 } 864 865 if (concurrent()) { 866 ConcurrentGCThread::stsLeave(); 867 } 868 _second_overflow_barrier_sync.enter(); 869 if (concurrent()) { 870 ConcurrentGCThread::stsJoin(); 871 } 872 // at this point everything should be re-initialised and ready to go 873 874 if (verbose_low()) { 875 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 876 } 877 } 878 879 #ifndef PRODUCT 880 void ForceOverflowSettings::init() { 881 _num_remaining = G1ConcMarkForceOverflow; 882 _force = false; 883 update(); 884 } 885 886 void ForceOverflowSettings::update() { 887 if (_num_remaining > 0) { 888 _num_remaining -= 1; 889 _force = true; 890 } else { 891 _force = false; 892 } 893 } 894 895 bool ForceOverflowSettings::should_force() { 896 if (_force) { 897 _force = false; 898 return true; 899 } else { 900 return false; 901 } 902 } 903 #endif // !PRODUCT 904 905 class CMConcurrentMarkingTask: public AbstractGangTask { 906 private: 907 ConcurrentMark* _cm; 908 ConcurrentMarkThread* _cmt; 909 910 public: 911 void work(uint worker_id) { 912 assert(Thread::current()->is_ConcurrentGC_thread(), 913 "this should only be done by a conc GC thread"); 914 ResourceMark rm; 915 916 double start_vtime = os::elapsedVTime(); 917 918 ConcurrentGCThread::stsJoin(); 919 920 assert(worker_id < _cm->active_tasks(), "invariant"); 921 CMTask* the_task = _cm->task(worker_id); 922 the_task->record_start_time(); 923 if (!_cm->has_aborted()) { 924 do { 925 double start_vtime_sec = os::elapsedVTime(); 926 double start_time_sec = os::elapsedTime(); 927 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 928 929 the_task->do_marking_step(mark_step_duration_ms, 930 true /* do_stealing */, 931 true /* do_termination */); 932 933 double end_time_sec = os::elapsedTime(); 934 double end_vtime_sec = os::elapsedVTime(); 935 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 936 double elapsed_time_sec = end_time_sec - start_time_sec; 937 _cm->clear_has_overflown(); 938 939 bool ret = _cm->do_yield_check(worker_id); 940 941 jlong sleep_time_ms; 942 if (!_cm->has_aborted() && the_task->has_aborted()) { 943 sleep_time_ms = 944 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 945 ConcurrentGCThread::stsLeave(); 946 os::sleep(Thread::current(), sleep_time_ms, false); 947 ConcurrentGCThread::stsJoin(); 948 } 949 double end_time2_sec = os::elapsedTime(); 950 double elapsed_time2_sec = end_time2_sec - start_time_sec; 951 952 #if 0 953 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 954 "overhead %1.4lf", 955 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 956 the_task->conc_overhead(os::elapsedTime()) * 8.0); 957 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 958 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 959 #endif 960 } while (!_cm->has_aborted() && the_task->has_aborted()); 961 } 962 the_task->record_end_time(); 963 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 964 965 ConcurrentGCThread::stsLeave(); 966 967 double end_vtime = os::elapsedVTime(); 968 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 969 } 970 971 CMConcurrentMarkingTask(ConcurrentMark* cm, 972 ConcurrentMarkThread* cmt) : 973 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 974 975 ~CMConcurrentMarkingTask() { } 976 }; 977 978 // Calculates the number of active workers for a concurrent 979 // phase. 980 uint ConcurrentMark::calc_parallel_marking_threads() { 981 if (G1CollectedHeap::use_parallel_gc_threads()) { 982 uint n_conc_workers = 0; 983 if (!UseDynamicNumberOfGCThreads || 984 (!FLAG_IS_DEFAULT(ConcGCThreads) && 985 !ForceDynamicNumberOfGCThreads)) { 986 n_conc_workers = max_parallel_marking_threads(); 987 } else { 988 n_conc_workers = 989 AdaptiveSizePolicy::calc_default_active_workers( 990 max_parallel_marking_threads(), 991 1, /* Minimum workers */ 992 parallel_marking_threads(), 993 Threads::number_of_non_daemon_threads()); 994 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 995 // that scaling has already gone into "_max_parallel_marking_threads". 996 } 997 assert(n_conc_workers > 0, "Always need at least 1"); 998 return n_conc_workers; 999 } 1000 // If we are not running with any parallel GC threads we will not 1001 // have spawned any marking threads either. Hence the number of 1002 // concurrent workers should be 0. 1003 return 0; 1004 } 1005 1006 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1007 // Currently, only survivors can be root regions. 1008 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1009 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1010 1011 const uintx interval = PrefetchScanIntervalInBytes; 1012 HeapWord* curr = hr->bottom(); 1013 const HeapWord* end = hr->top(); 1014 while (curr < end) { 1015 Prefetch::read(curr, interval); 1016 oop obj = oop(curr); 1017 int size = obj->oop_iterate(&cl); 1018 assert(size == obj->size(), "sanity"); 1019 curr += size; 1020 } 1021 } 1022 1023 class CMRootRegionScanTask : public AbstractGangTask { 1024 private: 1025 ConcurrentMark* _cm; 1026 1027 public: 1028 CMRootRegionScanTask(ConcurrentMark* cm) : 1029 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1030 1031 void work(uint worker_id) { 1032 assert(Thread::current()->is_ConcurrentGC_thread(), 1033 "this should only be done by a conc GC thread"); 1034 1035 CMRootRegions* root_regions = _cm->root_regions(); 1036 HeapRegion* hr = root_regions->claim_next(); 1037 while (hr != NULL) { 1038 _cm->scanRootRegion(hr, worker_id); 1039 hr = root_regions->claim_next(); 1040 } 1041 } 1042 }; 1043 1044 void ConcurrentMark::scanRootRegions() { 1045 // scan_in_progress() will have been set to true only if there was 1046 // at least one root region to scan. So, if it's false, we 1047 // should not attempt to do any further work. 1048 if (root_regions()->scan_in_progress()) { 1049 _parallel_marking_threads = calc_parallel_marking_threads(); 1050 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1051 "Maximum number of marking threads exceeded"); 1052 uint active_workers = MAX2(1U, parallel_marking_threads()); 1053 1054 CMRootRegionScanTask task(this); 1055 if (parallel_marking_threads() > 0) { 1056 _parallel_workers->set_active_workers((int) active_workers); 1057 _parallel_workers->run_task(&task); 1058 } else { 1059 task.work(0); 1060 } 1061 1062 // It's possible that has_aborted() is true here without actually 1063 // aborting the survivor scan earlier. This is OK as it's 1064 // mainly used for sanity checking. 1065 root_regions()->scan_finished(); 1066 } 1067 } 1068 1069 void ConcurrentMark::markFromRoots() { 1070 // we might be tempted to assert that: 1071 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1072 // "inconsistent argument?"); 1073 // However that wouldn't be right, because it's possible that 1074 // a safepoint is indeed in progress as a younger generation 1075 // stop-the-world GC happens even as we mark in this generation. 1076 1077 _restart_for_overflow = false; 1078 force_overflow_conc()->init(); 1079 1080 // _g1h has _n_par_threads 1081 _parallel_marking_threads = calc_parallel_marking_threads(); 1082 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1083 "Maximum number of marking threads exceeded"); 1084 1085 uint active_workers = MAX2(1U, parallel_marking_threads()); 1086 1087 // Parallel task terminator is set in "set_phase()" 1088 set_phase(active_workers, true /* concurrent */); 1089 1090 CMConcurrentMarkingTask markingTask(this, cmThread()); 1091 if (parallel_marking_threads() > 0) { 1092 _parallel_workers->set_active_workers((int)active_workers); 1093 // Don't set _n_par_threads because it affects MT in proceess_strong_roots() 1094 // and the decisions on that MT processing is made elsewhere. 1095 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1096 _parallel_workers->run_task(&markingTask); 1097 } else { 1098 markingTask.work(0); 1099 } 1100 print_stats(); 1101 } 1102 1103 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1104 // world is stopped at this checkpoint 1105 assert(SafepointSynchronize::is_at_safepoint(), 1106 "world should be stopped"); 1107 1108 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1109 1110 // If a full collection has happened, we shouldn't do this. 1111 if (has_aborted()) { 1112 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1113 return; 1114 } 1115 1116 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1117 1118 if (VerifyDuringGC) { 1119 HandleMark hm; // handle scope 1120 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1121 Universe::heap()->prepare_for_verify(); 1122 Universe::verify(/* silent */ false, 1123 /* option */ VerifyOption_G1UsePrevMarking); 1124 } 1125 1126 G1CollectorPolicy* g1p = g1h->g1_policy(); 1127 g1p->record_concurrent_mark_remark_start(); 1128 1129 double start = os::elapsedTime(); 1130 1131 checkpointRootsFinalWork(); 1132 1133 double mark_work_end = os::elapsedTime(); 1134 1135 weakRefsWork(clear_all_soft_refs); 1136 1137 if (has_overflown()) { 1138 // Oops. We overflowed. Restart concurrent marking. 1139 _restart_for_overflow = true; 1140 // Clear the flag. We do not need it any more. 1141 clear_has_overflown(); 1142 if (G1TraceMarkStackOverflow) { 1143 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1144 } 1145 } else { 1146 // Aggregate the per-task counting data that we have accumulated 1147 // while marking. 1148 aggregate_count_data(); 1149 1150 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1151 // We're done with marking. 1152 // This is the end of the marking cycle, we're expected all 1153 // threads to have SATB queues with active set to true. 1154 satb_mq_set.set_active_all_threads(false, /* new active value */ 1155 true /* expected_active */); 1156 1157 if (VerifyDuringGC) { 1158 HandleMark hm; // handle scope 1159 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1160 Universe::heap()->prepare_for_verify(); 1161 Universe::verify(/* silent */ false, 1162 /* option */ VerifyOption_G1UseNextMarking); 1163 } 1164 assert(!restart_for_overflow(), "sanity"); 1165 } 1166 1167 // Reset the marking state if marking completed 1168 if (!restart_for_overflow()) { 1169 set_non_marking_state(); 1170 } 1171 1172 #if VERIFY_OBJS_PROCESSED 1173 _scan_obj_cl.objs_processed = 0; 1174 ThreadLocalObjQueue::objs_enqueued = 0; 1175 #endif 1176 1177 // Statistics 1178 double now = os::elapsedTime(); 1179 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1180 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1181 _remark_times.add((now - start) * 1000.0); 1182 1183 g1p->record_concurrent_mark_remark_end(); 1184 } 1185 1186 // Base class of the closures that finalize and verify the 1187 // liveness counting data. 1188 class CMCountDataClosureBase: public HeapRegionClosure { 1189 protected: 1190 ConcurrentMark* _cm; 1191 BitMap* _region_bm; 1192 BitMap* _card_bm; 1193 1194 void set_card_bitmap_range(BitMap::idx_t start_idx, BitMap::idx_t last_idx) { 1195 assert(start_idx <= last_idx, "sanity"); 1196 1197 // Set the inclusive bit range [start_idx, last_idx]. 1198 // For small ranges (up to 8 cards) use a simple loop; otherwise 1199 // use par_at_put_range. 1200 if ((last_idx - start_idx) < 8) { 1201 for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) { 1202 _card_bm->par_set_bit(i); 1203 } 1204 } else { 1205 assert(last_idx < _card_bm->size(), "sanity"); 1206 // Note BitMap::par_at_put_range() is exclusive. 1207 _card_bm->par_at_put_range(start_idx, last_idx+1, true); 1208 } 1209 } 1210 1211 // It takes a region that's not empty (i.e., it has at least one 1212 // live object in it and sets its corresponding bit on the region 1213 // bitmap to 1. If the region is "starts humongous" it will also set 1214 // to 1 the bits on the region bitmap that correspond to its 1215 // associated "continues humongous" regions. 1216 void set_bit_for_region(HeapRegion* hr) { 1217 assert(!hr->continuesHumongous(), "should have filtered those out"); 1218 1219 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1220 if (!hr->startsHumongous()) { 1221 // Normal (non-humongous) case: just set the bit. 1222 _region_bm->par_at_put(index, true); 1223 } else { 1224 // Starts humongous case: calculate how many regions are part of 1225 // this humongous region and then set the bit range. 1226 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1227 HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1); 1228 BitMap::idx_t end_index = (BitMap::idx_t) last_hr->hrs_index() + 1; 1229 _region_bm->par_at_put_range(index, end_index, true); 1230 } 1231 } 1232 1233 public: 1234 CMCountDataClosureBase(ConcurrentMark *cm, 1235 BitMap* region_bm, BitMap* card_bm): 1236 _cm(cm), _region_bm(region_bm), _card_bm(card_bm) { } 1237 }; 1238 1239 // Closure that calculates the # live objects per region. Used 1240 // for verification purposes during the cleanup pause. 1241 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1242 CMBitMapRO* _bm; 1243 size_t _region_marked_bytes; 1244 1245 public: 1246 CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm, 1247 BitMap* region_bm, BitMap* card_bm) : 1248 CMCountDataClosureBase(cm, region_bm, card_bm), 1249 _bm(bm), _region_marked_bytes(0) { } 1250 1251 bool doHeapRegion(HeapRegion* hr) { 1252 1253 if (hr->continuesHumongous()) { 1254 // We will ignore these here and process them when their 1255 // associated "starts humongous" region is processed (see 1256 // set_bit_for_heap_region()). Note that we cannot rely on their 1257 // associated "starts humongous" region to have their bit set to 1258 // 1 since, due to the region chunking in the parallel region 1259 // iteration, a "continues humongous" region might be visited 1260 // before its associated "starts humongous". 1261 return false; 1262 } 1263 1264 HeapWord* nextTop = hr->next_top_at_mark_start(); 1265 HeapWord* start = hr->bottom(); 1266 1267 assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(), 1268 err_msg("Preconditions not met - " 1269 "start: "PTR_FORMAT", nextTop: "PTR_FORMAT", end: "PTR_FORMAT, 1270 start, nextTop, hr->end())); 1271 1272 // Find the first marked object at or after "start". 1273 start = _bm->getNextMarkedWordAddress(start, nextTop); 1274 1275 size_t marked_bytes = 0; 1276 1277 while (start < nextTop) { 1278 oop obj = oop(start); 1279 int obj_sz = obj->size(); 1280 HeapWord* obj_last = start + obj_sz - 1; 1281 1282 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1283 BitMap::idx_t last_idx = _cm->card_bitmap_index_for(obj_last); 1284 1285 // Set the bits in the card BM for this object (inclusive). 1286 set_card_bitmap_range(start_idx, last_idx); 1287 1288 // Add the size of this object to the number of marked bytes. 1289 marked_bytes += (size_t)obj_sz * HeapWordSize; 1290 1291 // Find the next marked object after this one. 1292 start = _bm->getNextMarkedWordAddress(obj_last + 1, nextTop); 1293 } 1294 1295 // Mark the allocated-since-marking portion... 1296 HeapWord* top = hr->top(); 1297 if (nextTop < top) { 1298 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(nextTop); 1299 BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top - 1); 1300 1301 set_card_bitmap_range(start_idx, last_idx); 1302 1303 // This definitely means the region has live objects. 1304 set_bit_for_region(hr); 1305 } 1306 1307 // Update the live region bitmap. 1308 if (marked_bytes > 0) { 1309 set_bit_for_region(hr); 1310 } 1311 1312 // Set the marked bytes for the current region so that 1313 // it can be queried by a calling verificiation routine 1314 _region_marked_bytes = marked_bytes; 1315 1316 return false; 1317 } 1318 1319 size_t region_marked_bytes() const { return _region_marked_bytes; } 1320 }; 1321 1322 // Heap region closure used for verifying the counting data 1323 // that was accumulated concurrently and aggregated during 1324 // the remark pause. This closure is applied to the heap 1325 // regions during the STW cleanup pause. 1326 1327 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1328 ConcurrentMark* _cm; 1329 CalcLiveObjectsClosure _calc_cl; 1330 BitMap* _region_bm; // Region BM to be verified 1331 BitMap* _card_bm; // Card BM to be verified 1332 bool _verbose; // verbose output? 1333 1334 BitMap* _exp_region_bm; // Expected Region BM values 1335 BitMap* _exp_card_bm; // Expected card BM values 1336 1337 int _failures; 1338 1339 public: 1340 VerifyLiveObjectDataHRClosure(ConcurrentMark* cm, 1341 BitMap* region_bm, 1342 BitMap* card_bm, 1343 BitMap* exp_region_bm, 1344 BitMap* exp_card_bm, 1345 bool verbose) : 1346 _cm(cm), 1347 _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm), 1348 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1349 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1350 _failures(0) { } 1351 1352 int failures() const { return _failures; } 1353 1354 bool doHeapRegion(HeapRegion* hr) { 1355 if (hr->continuesHumongous()) { 1356 // We will ignore these here and process them when their 1357 // associated "starts humongous" region is processed (see 1358 // set_bit_for_heap_region()). Note that we cannot rely on their 1359 // associated "starts humongous" region to have their bit set to 1360 // 1 since, due to the region chunking in the parallel region 1361 // iteration, a "continues humongous" region might be visited 1362 // before its associated "starts humongous". 1363 return false; 1364 } 1365 1366 int failures = 0; 1367 1368 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1369 // this region and set the corresponding bits in the expected region 1370 // and card bitmaps. 1371 bool res = _calc_cl.doHeapRegion(hr); 1372 assert(res == false, "should be continuing"); 1373 1374 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1375 Mutex::_no_safepoint_check_flag); 1376 1377 // Verify the marked bytes for this region. 1378 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1379 size_t act_marked_bytes = hr->next_marked_bytes(); 1380 1381 // We're not OK if expected marked bytes > actual marked bytes. It means 1382 // we have missed accounting some objects during the actual marking. 1383 if (exp_marked_bytes > act_marked_bytes) { 1384 if (_verbose) { 1385 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1386 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1387 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1388 } 1389 failures += 1; 1390 } 1391 1392 // Verify the bit, for this region, in the actual and expected 1393 // (which was just calculated) region bit maps. 1394 // We're not OK if the bit in the calculated expected region 1395 // bitmap is set and the bit in the actual region bitmap is not. 1396 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1397 1398 bool expected = _exp_region_bm->at(index); 1399 bool actual = _region_bm->at(index); 1400 if (expected && !actual) { 1401 if (_verbose) { 1402 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1403 "expected: %s, actual: %s", 1404 hr->hrs_index(), 1405 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1406 } 1407 failures += 1; 1408 } 1409 1410 // Verify that the card bit maps for the cards spanned by the current 1411 // region match. We have an error if we have a set bit in the expected 1412 // bit map and the corresponding bit in the actual bitmap is not set. 1413 1414 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1415 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1416 1417 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1418 expected = _exp_card_bm->at(i); 1419 actual = _card_bm->at(i); 1420 1421 if (expected && !actual) { 1422 if (_verbose) { 1423 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1424 "expected: %s, actual: %s", 1425 hr->hrs_index(), i, 1426 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1427 } 1428 failures += 1; 1429 } 1430 } 1431 1432 if (failures > 0 && _verbose) { 1433 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1434 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1435 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(), 1436 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1437 } 1438 1439 _failures += failures; 1440 1441 // We could stop iteration over the heap when we 1442 // find the first violating region by returning true. 1443 return false; 1444 } 1445 }; 1446 1447 1448 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1449 protected: 1450 G1CollectedHeap* _g1h; 1451 ConcurrentMark* _cm; 1452 BitMap* _actual_region_bm; 1453 BitMap* _actual_card_bm; 1454 1455 uint _n_workers; 1456 1457 BitMap* _expected_region_bm; 1458 BitMap* _expected_card_bm; 1459 1460 int _failures; 1461 bool _verbose; 1462 1463 public: 1464 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1465 BitMap* region_bm, BitMap* card_bm, 1466 BitMap* expected_region_bm, BitMap* expected_card_bm) 1467 : AbstractGangTask("G1 verify final counting"), 1468 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1469 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1470 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1471 _failures(0), _verbose(false), 1472 _n_workers(0) { 1473 assert(VerifyDuringGC, "don't call this otherwise"); 1474 1475 // Use the value already set as the number of active threads 1476 // in the call to run_task(). 1477 if (G1CollectedHeap::use_parallel_gc_threads()) { 1478 assert( _g1h->workers()->active_workers() > 0, 1479 "Should have been previously set"); 1480 _n_workers = _g1h->workers()->active_workers(); 1481 } else { 1482 _n_workers = 1; 1483 } 1484 1485 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1486 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1487 1488 _verbose = _cm->verbose_medium(); 1489 } 1490 1491 void work(uint worker_id) { 1492 assert(worker_id < _n_workers, "invariant"); 1493 1494 VerifyLiveObjectDataHRClosure verify_cl(_cm, 1495 _actual_region_bm, _actual_card_bm, 1496 _expected_region_bm, 1497 _expected_card_bm, 1498 _verbose); 1499 1500 if (G1CollectedHeap::use_parallel_gc_threads()) { 1501 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1502 worker_id, 1503 _n_workers, 1504 HeapRegion::VerifyCountClaimValue); 1505 } else { 1506 _g1h->heap_region_iterate(&verify_cl); 1507 } 1508 1509 Atomic::add(verify_cl.failures(), &_failures); 1510 } 1511 1512 int failures() const { return _failures; } 1513 }; 1514 1515 // Closure that finalizes the liveness counting data. 1516 // Used during the cleanup pause. 1517 // Sets the bits corresponding to the interval [NTAMS, top] 1518 // (which contains the implicitly live objects) in the 1519 // card liveness bitmap. Also sets the bit for each region, 1520 // containing live data, in the region liveness bitmap. 1521 1522 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1523 public: 1524 FinalCountDataUpdateClosure(ConcurrentMark* cm, 1525 BitMap* region_bm, 1526 BitMap* card_bm) : 1527 CMCountDataClosureBase(cm, region_bm, card_bm) { } 1528 1529 bool doHeapRegion(HeapRegion* hr) { 1530 1531 if (hr->continuesHumongous()) { 1532 // We will ignore these here and process them when their 1533 // associated "starts humongous" region is processed (see 1534 // set_bit_for_heap_region()). Note that we cannot rely on their 1535 // associated "starts humongous" region to have their bit set to 1536 // 1 since, due to the region chunking in the parallel region 1537 // iteration, a "continues humongous" region might be visited 1538 // before its associated "starts humongous". 1539 return false; 1540 } 1541 1542 HeapWord* ntams = hr->next_top_at_mark_start(); 1543 HeapWord* top = hr->top(); 1544 1545 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1546 1547 // Mark the allocated-since-marking portion... 1548 if (ntams < top) { 1549 // This definitely means the region has live objects. 1550 set_bit_for_region(hr); 1551 } 1552 1553 // Now set the bits for [ntams, top] 1554 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1555 BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top); 1556 set_card_bitmap_range(start_idx, last_idx); 1557 1558 // Set the bit for the region if it contains live data 1559 if (hr->next_marked_bytes() > 0) { 1560 set_bit_for_region(hr); 1561 } 1562 1563 return false; 1564 } 1565 }; 1566 1567 class G1ParFinalCountTask: public AbstractGangTask { 1568 protected: 1569 G1CollectedHeap* _g1h; 1570 ConcurrentMark* _cm; 1571 BitMap* _actual_region_bm; 1572 BitMap* _actual_card_bm; 1573 1574 uint _n_workers; 1575 1576 public: 1577 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1578 : AbstractGangTask("G1 final counting"), 1579 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1580 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1581 _n_workers(0) { 1582 // Use the value already set as the number of active threads 1583 // in the call to run_task(). 1584 if (G1CollectedHeap::use_parallel_gc_threads()) { 1585 assert( _g1h->workers()->active_workers() > 0, 1586 "Should have been previously set"); 1587 _n_workers = _g1h->workers()->active_workers(); 1588 } else { 1589 _n_workers = 1; 1590 } 1591 } 1592 1593 void work(uint worker_id) { 1594 assert(worker_id < _n_workers, "invariant"); 1595 1596 FinalCountDataUpdateClosure final_update_cl(_cm, 1597 _actual_region_bm, 1598 _actual_card_bm); 1599 1600 if (G1CollectedHeap::use_parallel_gc_threads()) { 1601 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1602 worker_id, 1603 _n_workers, 1604 HeapRegion::FinalCountClaimValue); 1605 } else { 1606 _g1h->heap_region_iterate(&final_update_cl); 1607 } 1608 } 1609 }; 1610 1611 class G1ParNoteEndTask; 1612 1613 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1614 G1CollectedHeap* _g1; 1615 int _worker_num; 1616 size_t _max_live_bytes; 1617 uint _regions_claimed; 1618 size_t _freed_bytes; 1619 FreeRegionList* _local_cleanup_list; 1620 OldRegionSet* _old_proxy_set; 1621 HumongousRegionSet* _humongous_proxy_set; 1622 HRRSCleanupTask* _hrrs_cleanup_task; 1623 double _claimed_region_time; 1624 double _max_region_time; 1625 1626 public: 1627 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1628 int worker_num, 1629 FreeRegionList* local_cleanup_list, 1630 OldRegionSet* old_proxy_set, 1631 HumongousRegionSet* humongous_proxy_set, 1632 HRRSCleanupTask* hrrs_cleanup_task) : 1633 _g1(g1), _worker_num(worker_num), 1634 _max_live_bytes(0), _regions_claimed(0), 1635 _freed_bytes(0), 1636 _claimed_region_time(0.0), _max_region_time(0.0), 1637 _local_cleanup_list(local_cleanup_list), 1638 _old_proxy_set(old_proxy_set), 1639 _humongous_proxy_set(humongous_proxy_set), 1640 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1641 1642 size_t freed_bytes() { return _freed_bytes; } 1643 1644 bool doHeapRegion(HeapRegion *hr) { 1645 // We use a claim value of zero here because all regions 1646 // were claimed with value 1 in the FinalCount task. 1647 hr->reset_gc_time_stamp(); 1648 if (!hr->continuesHumongous()) { 1649 double start = os::elapsedTime(); 1650 _regions_claimed++; 1651 hr->note_end_of_marking(); 1652 _max_live_bytes += hr->max_live_bytes(); 1653 _g1->free_region_if_empty(hr, 1654 &_freed_bytes, 1655 _local_cleanup_list, 1656 _old_proxy_set, 1657 _humongous_proxy_set, 1658 _hrrs_cleanup_task, 1659 true /* par */); 1660 double region_time = (os::elapsedTime() - start); 1661 _claimed_region_time += region_time; 1662 if (region_time > _max_region_time) { 1663 _max_region_time = region_time; 1664 } 1665 } 1666 return false; 1667 } 1668 1669 size_t max_live_bytes() { return _max_live_bytes; } 1670 uint regions_claimed() { return _regions_claimed; } 1671 double claimed_region_time_sec() { return _claimed_region_time; } 1672 double max_region_time_sec() { return _max_region_time; } 1673 }; 1674 1675 class G1ParNoteEndTask: public AbstractGangTask { 1676 friend class G1NoteEndOfConcMarkClosure; 1677 1678 protected: 1679 G1CollectedHeap* _g1h; 1680 size_t _max_live_bytes; 1681 size_t _freed_bytes; 1682 FreeRegionList* _cleanup_list; 1683 1684 public: 1685 G1ParNoteEndTask(G1CollectedHeap* g1h, 1686 FreeRegionList* cleanup_list) : 1687 AbstractGangTask("G1 note end"), _g1h(g1h), 1688 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1689 1690 void work(uint worker_id) { 1691 double start = os::elapsedTime(); 1692 FreeRegionList local_cleanup_list("Local Cleanup List"); 1693 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set"); 1694 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1695 HRRSCleanupTask hrrs_cleanup_task; 1696 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list, 1697 &old_proxy_set, 1698 &humongous_proxy_set, 1699 &hrrs_cleanup_task); 1700 if (G1CollectedHeap::use_parallel_gc_threads()) { 1701 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1702 _g1h->workers()->active_workers(), 1703 HeapRegion::NoteEndClaimValue); 1704 } else { 1705 _g1h->heap_region_iterate(&g1_note_end); 1706 } 1707 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1708 1709 // Now update the lists 1710 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1711 NULL /* free_list */, 1712 &old_proxy_set, 1713 &humongous_proxy_set, 1714 true /* par */); 1715 { 1716 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1717 _max_live_bytes += g1_note_end.max_live_bytes(); 1718 _freed_bytes += g1_note_end.freed_bytes(); 1719 1720 // If we iterate over the global cleanup list at the end of 1721 // cleanup to do this printing we will not guarantee to only 1722 // generate output for the newly-reclaimed regions (the list 1723 // might not be empty at the beginning of cleanup; we might 1724 // still be working on its previous contents). So we do the 1725 // printing here, before we append the new regions to the global 1726 // cleanup list. 1727 1728 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1729 if (hr_printer->is_active()) { 1730 HeapRegionLinkedListIterator iter(&local_cleanup_list); 1731 while (iter.more_available()) { 1732 HeapRegion* hr = iter.get_next(); 1733 hr_printer->cleanup(hr); 1734 } 1735 } 1736 1737 _cleanup_list->add_as_tail(&local_cleanup_list); 1738 assert(local_cleanup_list.is_empty(), "post-condition"); 1739 1740 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1741 } 1742 } 1743 size_t max_live_bytes() { return _max_live_bytes; } 1744 size_t freed_bytes() { return _freed_bytes; } 1745 }; 1746 1747 class G1ParScrubRemSetTask: public AbstractGangTask { 1748 protected: 1749 G1RemSet* _g1rs; 1750 BitMap* _region_bm; 1751 BitMap* _card_bm; 1752 public: 1753 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1754 BitMap* region_bm, BitMap* card_bm) : 1755 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1756 _region_bm(region_bm), _card_bm(card_bm) { } 1757 1758 void work(uint worker_id) { 1759 if (G1CollectedHeap::use_parallel_gc_threads()) { 1760 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1761 HeapRegion::ScrubRemSetClaimValue); 1762 } else { 1763 _g1rs->scrub(_region_bm, _card_bm); 1764 } 1765 } 1766 1767 }; 1768 1769 void ConcurrentMark::cleanup() { 1770 // world is stopped at this checkpoint 1771 assert(SafepointSynchronize::is_at_safepoint(), 1772 "world should be stopped"); 1773 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1774 1775 // If a full collection has happened, we shouldn't do this. 1776 if (has_aborted()) { 1777 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1778 return; 1779 } 1780 1781 HRSPhaseSetter x(HRSPhaseCleanup); 1782 g1h->verify_region_sets_optional(); 1783 1784 if (VerifyDuringGC) { 1785 HandleMark hm; // handle scope 1786 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1787 Universe::heap()->prepare_for_verify(); 1788 Universe::verify(/* silent */ false, 1789 /* option */ VerifyOption_G1UsePrevMarking); 1790 } 1791 1792 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1793 g1p->record_concurrent_mark_cleanup_start(); 1794 1795 double start = os::elapsedTime(); 1796 1797 HeapRegionRemSet::reset_for_cleanup_tasks(); 1798 1799 uint n_workers; 1800 1801 // Do counting once more with the world stopped for good measure. 1802 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1803 1804 if (G1CollectedHeap::use_parallel_gc_threads()) { 1805 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 1806 "sanity check"); 1807 1808 g1h->set_par_threads(); 1809 n_workers = g1h->n_par_threads(); 1810 assert(g1h->n_par_threads() == n_workers, 1811 "Should not have been reset"); 1812 g1h->workers()->run_task(&g1_par_count_task); 1813 // Done with the parallel phase so reset to 0. 1814 g1h->set_par_threads(0); 1815 1816 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 1817 "sanity check"); 1818 } else { 1819 n_workers = 1; 1820 g1_par_count_task.work(0); 1821 } 1822 1823 if (VerifyDuringGC) { 1824 // Verify that the counting data accumulated during marking matches 1825 // that calculated by walking the marking bitmap. 1826 1827 // Bitmaps to hold expected values 1828 BitMap expected_region_bm(_region_bm.size(), false); 1829 BitMap expected_card_bm(_card_bm.size(), false); 1830 1831 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1832 &_region_bm, 1833 &_card_bm, 1834 &expected_region_bm, 1835 &expected_card_bm); 1836 1837 if (G1CollectedHeap::use_parallel_gc_threads()) { 1838 g1h->set_par_threads((int)n_workers); 1839 g1h->workers()->run_task(&g1_par_verify_task); 1840 // Done with the parallel phase so reset to 0. 1841 g1h->set_par_threads(0); 1842 1843 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 1844 "sanity check"); 1845 } else { 1846 g1_par_verify_task.work(0); 1847 } 1848 1849 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1850 } 1851 1852 size_t start_used_bytes = g1h->used(); 1853 g1h->set_marking_complete(); 1854 1855 double count_end = os::elapsedTime(); 1856 double this_final_counting_time = (count_end - start); 1857 _total_counting_time += this_final_counting_time; 1858 1859 if (G1PrintRegionLivenessInfo) { 1860 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1861 _g1h->heap_region_iterate(&cl); 1862 } 1863 1864 // Install newly created mark bitMap as "prev". 1865 swapMarkBitMaps(); 1866 1867 g1h->reset_gc_time_stamp(); 1868 1869 // Note end of marking in all heap regions. 1870 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 1871 if (G1CollectedHeap::use_parallel_gc_threads()) { 1872 g1h->set_par_threads((int)n_workers); 1873 g1h->workers()->run_task(&g1_par_note_end_task); 1874 g1h->set_par_threads(0); 1875 1876 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 1877 "sanity check"); 1878 } else { 1879 g1_par_note_end_task.work(0); 1880 } 1881 1882 if (!cleanup_list_is_empty()) { 1883 // The cleanup list is not empty, so we'll have to process it 1884 // concurrently. Notify anyone else that might be wanting free 1885 // regions that there will be more free regions coming soon. 1886 g1h->set_free_regions_coming(); 1887 } 1888 1889 // call below, since it affects the metric by which we sort the heap 1890 // regions. 1891 if (G1ScrubRemSets) { 1892 double rs_scrub_start = os::elapsedTime(); 1893 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1894 if (G1CollectedHeap::use_parallel_gc_threads()) { 1895 g1h->set_par_threads((int)n_workers); 1896 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1897 g1h->set_par_threads(0); 1898 1899 assert(g1h->check_heap_region_claim_values( 1900 HeapRegion::ScrubRemSetClaimValue), 1901 "sanity check"); 1902 } else { 1903 g1_par_scrub_rs_task.work(0); 1904 } 1905 1906 double rs_scrub_end = os::elapsedTime(); 1907 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1908 _total_rs_scrub_time += this_rs_scrub_time; 1909 } 1910 1911 // this will also free any regions totally full of garbage objects, 1912 // and sort the regions. 1913 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 1914 1915 // Statistics. 1916 double end = os::elapsedTime(); 1917 _cleanup_times.add((end - start) * 1000.0); 1918 1919 if (G1Log::fine()) { 1920 g1h->print_size_transition(gclog_or_tty, 1921 start_used_bytes, 1922 g1h->used(), 1923 g1h->capacity()); 1924 } 1925 1926 // Clean up will have freed any regions completely full of garbage. 1927 // Update the soft reference policy with the new heap occupancy. 1928 Universe::update_heap_info_at_gc(); 1929 1930 // We need to make this be a "collection" so any collection pause that 1931 // races with it goes around and waits for completeCleanup to finish. 1932 g1h->increment_total_collections(); 1933 1934 // We reclaimed old regions so we should calculate the sizes to make 1935 // sure we update the old gen/space data. 1936 g1h->g1mm()->update_sizes(); 1937 1938 if (VerifyDuringGC) { 1939 HandleMark hm; // handle scope 1940 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1941 Universe::heap()->prepare_for_verify(); 1942 Universe::verify(/* silent */ false, 1943 /* option */ VerifyOption_G1UsePrevMarking); 1944 } 1945 1946 g1h->verify_region_sets_optional(); 1947 } 1948 1949 void ConcurrentMark::completeCleanup() { 1950 if (has_aborted()) return; 1951 1952 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1953 1954 _cleanup_list.verify_optional(); 1955 FreeRegionList tmp_free_list("Tmp Free List"); 1956 1957 if (G1ConcRegionFreeingVerbose) { 1958 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1959 "cleanup list has %u entries", 1960 _cleanup_list.length()); 1961 } 1962 1963 // Noone else should be accessing the _cleanup_list at this point, 1964 // so it's not necessary to take any locks 1965 while (!_cleanup_list.is_empty()) { 1966 HeapRegion* hr = _cleanup_list.remove_head(); 1967 assert(hr != NULL, "the list was not empty"); 1968 hr->par_clear(); 1969 tmp_free_list.add_as_tail(hr); 1970 1971 // Instead of adding one region at a time to the secondary_free_list, 1972 // we accumulate them in the local list and move them a few at a 1973 // time. This also cuts down on the number of notify_all() calls 1974 // we do during this process. We'll also append the local list when 1975 // _cleanup_list is empty (which means we just removed the last 1976 // region from the _cleanup_list). 1977 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1978 _cleanup_list.is_empty()) { 1979 if (G1ConcRegionFreeingVerbose) { 1980 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1981 "appending %u entries to the secondary_free_list, " 1982 "cleanup list still has %u entries", 1983 tmp_free_list.length(), 1984 _cleanup_list.length()); 1985 } 1986 1987 { 1988 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1989 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 1990 SecondaryFreeList_lock->notify_all(); 1991 } 1992 1993 if (G1StressConcRegionFreeing) { 1994 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1995 os::sleep(Thread::current(), (jlong) 1, false); 1996 } 1997 } 1998 } 1999 } 2000 assert(tmp_free_list.is_empty(), "post-condition"); 2001 } 2002 2003 // Support closures for reference procssing in G1 2004 2005 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2006 HeapWord* addr = (HeapWord*)obj; 2007 return addr != NULL && 2008 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2009 } 2010 2011 class G1CMKeepAliveClosure: public OopClosure { 2012 G1CollectedHeap* _g1; 2013 ConcurrentMark* _cm; 2014 public: 2015 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) : 2016 _g1(g1), _cm(cm) { 2017 assert(Thread::current()->is_VM_thread(), "otherwise fix worker id"); 2018 } 2019 2020 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2021 virtual void do_oop( oop* p) { do_oop_work(p); } 2022 2023 template <class T> void do_oop_work(T* p) { 2024 oop obj = oopDesc::load_decode_heap_oop(p); 2025 HeapWord* addr = (HeapWord*)obj; 2026 2027 if (_cm->verbose_high()) { 2028 gclog_or_tty->print_cr("\t[0] we're looking at location " 2029 "*"PTR_FORMAT" = "PTR_FORMAT, 2030 p, (void*) obj); 2031 } 2032 2033 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) { 2034 _cm->mark_and_count(obj); 2035 _cm->mark_stack_push(obj); 2036 } 2037 } 2038 }; 2039 2040 class G1CMDrainMarkingStackClosure: public VoidClosure { 2041 ConcurrentMark* _cm; 2042 CMMarkStack* _markStack; 2043 G1CMKeepAliveClosure* _oopClosure; 2044 public: 2045 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack, 2046 G1CMKeepAliveClosure* oopClosure) : 2047 _cm(cm), 2048 _markStack(markStack), 2049 _oopClosure(oopClosure) { } 2050 2051 void do_void() { 2052 _markStack->drain((OopClosure*)_oopClosure, _cm->nextMarkBitMap(), false); 2053 } 2054 }; 2055 2056 // 'Keep Alive' closure used by parallel reference processing. 2057 // An instance of this closure is used in the parallel reference processing 2058 // code rather than an instance of G1CMKeepAliveClosure. We could have used 2059 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are 2060 // placed on to discovered ref lists once so we can mark and push with no 2061 // need to check whether the object has already been marked. Using the 2062 // G1CMKeepAliveClosure would mean, however, having all the worker threads 2063 // operating on the global mark stack. This means that an individual 2064 // worker would be doing lock-free pushes while it processes its own 2065 // discovered ref list followed by drain call. If the discovered ref lists 2066 // are unbalanced then this could cause interference with the other 2067 // workers. Using a CMTask (and its embedded local data structures) 2068 // avoids that potential interference. 2069 class G1CMParKeepAliveAndDrainClosure: public OopClosure { 2070 ConcurrentMark* _cm; 2071 CMTask* _task; 2072 int _ref_counter_limit; 2073 int _ref_counter; 2074 public: 2075 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) : 2076 _cm(cm), _task(task), 2077 _ref_counter_limit(G1RefProcDrainInterval) { 2078 assert(_ref_counter_limit > 0, "sanity"); 2079 _ref_counter = _ref_counter_limit; 2080 } 2081 2082 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2083 virtual void do_oop( oop* p) { do_oop_work(p); } 2084 2085 template <class T> void do_oop_work(T* p) { 2086 if (!_cm->has_overflown()) { 2087 oop obj = oopDesc::load_decode_heap_oop(p); 2088 if (_cm->verbose_high()) { 2089 gclog_or_tty->print_cr("\t[%d] we're looking at location " 2090 "*"PTR_FORMAT" = "PTR_FORMAT, 2091 _task->task_id(), p, (void*) obj); 2092 } 2093 2094 _task->deal_with_reference(obj); 2095 _ref_counter--; 2096 2097 if (_ref_counter == 0) { 2098 // We have dealt with _ref_counter_limit references, pushing them and objects 2099 // reachable from them on to the local stack (and possibly the global stack). 2100 // Call do_marking_step() to process these entries. We call the routine in a 2101 // loop, which we'll exit if there's nothing more to do (i.e. we're done 2102 // with the entries that we've pushed as a result of the deal_with_reference 2103 // calls above) or we overflow. 2104 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2105 // while there may still be some work to do. (See the comment at the 2106 // beginning of CMTask::do_marking_step() for those conditions - one of which 2107 // is reaching the specified time target.) It is only when 2108 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2109 // that the marking has completed. 2110 do { 2111 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2112 _task->do_marking_step(mark_step_duration_ms, 2113 false /* do_stealing */, 2114 false /* do_termination */); 2115 } while (_task->has_aborted() && !_cm->has_overflown()); 2116 _ref_counter = _ref_counter_limit; 2117 } 2118 } else { 2119 if (_cm->verbose_high()) { 2120 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id()); 2121 } 2122 } 2123 } 2124 }; 2125 2126 class G1CMParDrainMarkingStackClosure: public VoidClosure { 2127 ConcurrentMark* _cm; 2128 CMTask* _task; 2129 public: 2130 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) : 2131 _cm(cm), _task(task) { } 2132 2133 void do_void() { 2134 do { 2135 if (_cm->verbose_high()) { 2136 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", 2137 _task->task_id()); 2138 } 2139 2140 // We call CMTask::do_marking_step() to completely drain the local and 2141 // global marking stacks. The routine is called in a loop, which we'll 2142 // exit if there's nothing more to do (i.e. we'completely drained the 2143 // entries that were pushed as a result of applying the 2144 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref 2145 // lists above) or we overflow the global marking stack. 2146 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2147 // while there may still be some work to do. (See the comment at the 2148 // beginning of CMTask::do_marking_step() for those conditions - one of which 2149 // is reaching the specified time target.) It is only when 2150 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2151 // that the marking has completed. 2152 2153 _task->do_marking_step(1000000000.0 /* something very large */, 2154 true /* do_stealing */, 2155 true /* do_termination */); 2156 } while (_task->has_aborted() && !_cm->has_overflown()); 2157 } 2158 }; 2159 2160 // Implementation of AbstractRefProcTaskExecutor for parallel 2161 // reference processing at the end of G1 concurrent marking 2162 2163 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2164 private: 2165 G1CollectedHeap* _g1h; 2166 ConcurrentMark* _cm; 2167 WorkGang* _workers; 2168 int _active_workers; 2169 2170 public: 2171 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2172 ConcurrentMark* cm, 2173 WorkGang* workers, 2174 int n_workers) : 2175 _g1h(g1h), _cm(cm), 2176 _workers(workers), _active_workers(n_workers) { } 2177 2178 // Executes the given task using concurrent marking worker threads. 2179 virtual void execute(ProcessTask& task); 2180 virtual void execute(EnqueueTask& task); 2181 }; 2182 2183 class G1CMRefProcTaskProxy: public AbstractGangTask { 2184 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2185 ProcessTask& _proc_task; 2186 G1CollectedHeap* _g1h; 2187 ConcurrentMark* _cm; 2188 2189 public: 2190 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2191 G1CollectedHeap* g1h, 2192 ConcurrentMark* cm) : 2193 AbstractGangTask("Process reference objects in parallel"), 2194 _proc_task(proc_task), _g1h(g1h), _cm(cm) { } 2195 2196 virtual void work(uint worker_id) { 2197 CMTask* marking_task = _cm->task(worker_id); 2198 G1CMIsAliveClosure g1_is_alive(_g1h); 2199 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task); 2200 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task); 2201 2202 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2203 } 2204 }; 2205 2206 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2207 assert(_workers != NULL, "Need parallel worker threads."); 2208 2209 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2210 2211 // We need to reset the phase for each task execution so that 2212 // the termination protocol of CMTask::do_marking_step works. 2213 _cm->set_phase(_active_workers, false /* concurrent */); 2214 _g1h->set_par_threads(_active_workers); 2215 _workers->run_task(&proc_task_proxy); 2216 _g1h->set_par_threads(0); 2217 } 2218 2219 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2220 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2221 EnqueueTask& _enq_task; 2222 2223 public: 2224 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2225 AbstractGangTask("Enqueue reference objects in parallel"), 2226 _enq_task(enq_task) { } 2227 2228 virtual void work(uint worker_id) { 2229 _enq_task.work(worker_id); 2230 } 2231 }; 2232 2233 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2234 assert(_workers != NULL, "Need parallel worker threads."); 2235 2236 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2237 2238 _g1h->set_par_threads(_active_workers); 2239 _workers->run_task(&enq_task_proxy); 2240 _g1h->set_par_threads(0); 2241 } 2242 2243 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2244 ResourceMark rm; 2245 HandleMark hm; 2246 2247 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2248 2249 // Is alive closure. 2250 G1CMIsAliveClosure g1_is_alive(g1h); 2251 2252 // Inner scope to exclude the cleaning of the string and symbol 2253 // tables from the displayed time. 2254 { 2255 if (G1Log::finer()) { 2256 gclog_or_tty->put(' '); 2257 } 2258 TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty); 2259 2260 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2261 2262 // See the comment in G1CollectedHeap::ref_processing_init() 2263 // about how reference processing currently works in G1. 2264 2265 // Process weak references. 2266 rp->setup_policy(clear_all_soft_refs); 2267 assert(_markStack.isEmpty(), "mark stack should be empty"); 2268 2269 G1CMKeepAliveClosure g1_keep_alive(g1h, this); 2270 G1CMDrainMarkingStackClosure 2271 g1_drain_mark_stack(this, &_markStack, &g1_keep_alive); 2272 2273 // We use the work gang from the G1CollectedHeap and we utilize all 2274 // the worker threads. 2275 uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U; 2276 active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U); 2277 2278 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2279 g1h->workers(), active_workers); 2280 2281 if (rp->processing_is_mt()) { 2282 // Set the degree of MT here. If the discovery is done MT, there 2283 // may have been a different number of threads doing the discovery 2284 // and a different number of discovered lists may have Ref objects. 2285 // That is OK as long as the Reference lists are balanced (see 2286 // balance_all_queues() and balance_queues()). 2287 rp->set_active_mt_degree(active_workers); 2288 2289 rp->process_discovered_references(&g1_is_alive, 2290 &g1_keep_alive, 2291 &g1_drain_mark_stack, 2292 &par_task_executor); 2293 2294 // The work routines of the parallel keep_alive and drain_marking_stack 2295 // will set the has_overflown flag if we overflow the global marking 2296 // stack. 2297 } else { 2298 rp->process_discovered_references(&g1_is_alive, 2299 &g1_keep_alive, 2300 &g1_drain_mark_stack, 2301 NULL); 2302 } 2303 2304 assert(_markStack.overflow() || _markStack.isEmpty(), 2305 "mark stack should be empty (unless it overflowed)"); 2306 if (_markStack.overflow()) { 2307 // Should have been done already when we tried to push an 2308 // entry on to the global mark stack. But let's do it again. 2309 set_has_overflown(); 2310 } 2311 2312 if (rp->processing_is_mt()) { 2313 assert(rp->num_q() == active_workers, "why not"); 2314 rp->enqueue_discovered_references(&par_task_executor); 2315 } else { 2316 rp->enqueue_discovered_references(); 2317 } 2318 2319 rp->verify_no_references_recorded(); 2320 assert(!rp->discovery_enabled(), "Post condition"); 2321 } 2322 2323 // Now clean up stale oops in StringTable 2324 StringTable::unlink(&g1_is_alive); 2325 // Clean up unreferenced symbols in symbol table. 2326 SymbolTable::unlink(); 2327 } 2328 2329 void ConcurrentMark::swapMarkBitMaps() { 2330 CMBitMapRO* temp = _prevMarkBitMap; 2331 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2332 _nextMarkBitMap = (CMBitMap*) temp; 2333 } 2334 2335 class CMRemarkTask: public AbstractGangTask { 2336 private: 2337 ConcurrentMark *_cm; 2338 2339 public: 2340 void work(uint worker_id) { 2341 // Since all available tasks are actually started, we should 2342 // only proceed if we're supposed to be actived. 2343 if (worker_id < _cm->active_tasks()) { 2344 CMTask* task = _cm->task(worker_id); 2345 task->record_start_time(); 2346 do { 2347 task->do_marking_step(1000000000.0 /* something very large */, 2348 true /* do_stealing */, 2349 true /* do_termination */); 2350 } while (task->has_aborted() && !_cm->has_overflown()); 2351 // If we overflow, then we do not want to restart. We instead 2352 // want to abort remark and do concurrent marking again. 2353 task->record_end_time(); 2354 } 2355 } 2356 2357 CMRemarkTask(ConcurrentMark* cm, int active_workers) : 2358 AbstractGangTask("Par Remark"), _cm(cm) { 2359 _cm->terminator()->reset_for_reuse(active_workers); 2360 } 2361 }; 2362 2363 void ConcurrentMark::checkpointRootsFinalWork() { 2364 ResourceMark rm; 2365 HandleMark hm; 2366 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2367 2368 g1h->ensure_parsability(false); 2369 2370 if (G1CollectedHeap::use_parallel_gc_threads()) { 2371 G1CollectedHeap::StrongRootsScope srs(g1h); 2372 // this is remark, so we'll use up all active threads 2373 uint active_workers = g1h->workers()->active_workers(); 2374 if (active_workers == 0) { 2375 assert(active_workers > 0, "Should have been set earlier"); 2376 active_workers = (uint) ParallelGCThreads; 2377 g1h->workers()->set_active_workers(active_workers); 2378 } 2379 set_phase(active_workers, false /* concurrent */); 2380 // Leave _parallel_marking_threads at it's 2381 // value originally calculated in the ConcurrentMark 2382 // constructor and pass values of the active workers 2383 // through the gang in the task. 2384 2385 CMRemarkTask remarkTask(this, active_workers); 2386 g1h->set_par_threads(active_workers); 2387 g1h->workers()->run_task(&remarkTask); 2388 g1h->set_par_threads(0); 2389 } else { 2390 G1CollectedHeap::StrongRootsScope srs(g1h); 2391 // this is remark, so we'll use up all available threads 2392 uint active_workers = 1; 2393 set_phase(active_workers, false /* concurrent */); 2394 2395 CMRemarkTask remarkTask(this, active_workers); 2396 // We will start all available threads, even if we decide that the 2397 // active_workers will be fewer. The extra ones will just bail out 2398 // immediately. 2399 remarkTask.work(0); 2400 } 2401 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2402 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2403 2404 print_stats(); 2405 2406 #if VERIFY_OBJS_PROCESSED 2407 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2408 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2409 _scan_obj_cl.objs_processed, 2410 ThreadLocalObjQueue::objs_enqueued); 2411 guarantee(_scan_obj_cl.objs_processed == 2412 ThreadLocalObjQueue::objs_enqueued, 2413 "Different number of objs processed and enqueued."); 2414 } 2415 #endif 2416 } 2417 2418 #ifndef PRODUCT 2419 2420 class PrintReachableOopClosure: public OopClosure { 2421 private: 2422 G1CollectedHeap* _g1h; 2423 outputStream* _out; 2424 VerifyOption _vo; 2425 bool _all; 2426 2427 public: 2428 PrintReachableOopClosure(outputStream* out, 2429 VerifyOption vo, 2430 bool all) : 2431 _g1h(G1CollectedHeap::heap()), 2432 _out(out), _vo(vo), _all(all) { } 2433 2434 void do_oop(narrowOop* p) { do_oop_work(p); } 2435 void do_oop( oop* p) { do_oop_work(p); } 2436 2437 template <class T> void do_oop_work(T* p) { 2438 oop obj = oopDesc::load_decode_heap_oop(p); 2439 const char* str = NULL; 2440 const char* str2 = ""; 2441 2442 if (obj == NULL) { 2443 str = ""; 2444 } else if (!_g1h->is_in_g1_reserved(obj)) { 2445 str = " O"; 2446 } else { 2447 HeapRegion* hr = _g1h->heap_region_containing(obj); 2448 guarantee(hr != NULL, "invariant"); 2449 bool over_tams = false; 2450 bool marked = false; 2451 2452 switch (_vo) { 2453 case VerifyOption_G1UsePrevMarking: 2454 over_tams = hr->obj_allocated_since_prev_marking(obj); 2455 marked = _g1h->isMarkedPrev(obj); 2456 break; 2457 case VerifyOption_G1UseNextMarking: 2458 over_tams = hr->obj_allocated_since_next_marking(obj); 2459 marked = _g1h->isMarkedNext(obj); 2460 break; 2461 case VerifyOption_G1UseMarkWord: 2462 marked = obj->is_gc_marked(); 2463 break; 2464 default: 2465 ShouldNotReachHere(); 2466 } 2467 2468 if (over_tams) { 2469 str = " >"; 2470 if (marked) { 2471 str2 = " AND MARKED"; 2472 } 2473 } else if (marked) { 2474 str = " M"; 2475 } else { 2476 str = " NOT"; 2477 } 2478 } 2479 2480 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2481 p, (void*) obj, str, str2); 2482 } 2483 }; 2484 2485 class PrintReachableObjectClosure : public ObjectClosure { 2486 private: 2487 G1CollectedHeap* _g1h; 2488 outputStream* _out; 2489 VerifyOption _vo; 2490 bool _all; 2491 HeapRegion* _hr; 2492 2493 public: 2494 PrintReachableObjectClosure(outputStream* out, 2495 VerifyOption vo, 2496 bool all, 2497 HeapRegion* hr) : 2498 _g1h(G1CollectedHeap::heap()), 2499 _out(out), _vo(vo), _all(all), _hr(hr) { } 2500 2501 void do_object(oop o) { 2502 bool over_tams = false; 2503 bool marked = false; 2504 2505 switch (_vo) { 2506 case VerifyOption_G1UsePrevMarking: 2507 over_tams = _hr->obj_allocated_since_prev_marking(o); 2508 marked = _g1h->isMarkedPrev(o); 2509 break; 2510 case VerifyOption_G1UseNextMarking: 2511 over_tams = _hr->obj_allocated_since_next_marking(o); 2512 marked = _g1h->isMarkedNext(o); 2513 break; 2514 case VerifyOption_G1UseMarkWord: 2515 marked = o->is_gc_marked(); 2516 break; 2517 default: 2518 ShouldNotReachHere(); 2519 } 2520 bool print_it = _all || over_tams || marked; 2521 2522 if (print_it) { 2523 _out->print_cr(" "PTR_FORMAT"%s", 2524 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2525 PrintReachableOopClosure oopCl(_out, _vo, _all); 2526 o->oop_iterate(&oopCl); 2527 } 2528 } 2529 }; 2530 2531 class PrintReachableRegionClosure : public HeapRegionClosure { 2532 private: 2533 outputStream* _out; 2534 VerifyOption _vo; 2535 bool _all; 2536 2537 public: 2538 bool doHeapRegion(HeapRegion* hr) { 2539 HeapWord* b = hr->bottom(); 2540 HeapWord* e = hr->end(); 2541 HeapWord* t = hr->top(); 2542 HeapWord* p = NULL; 2543 2544 switch (_vo) { 2545 case VerifyOption_G1UsePrevMarking: 2546 p = hr->prev_top_at_mark_start(); 2547 break; 2548 case VerifyOption_G1UseNextMarking: 2549 p = hr->next_top_at_mark_start(); 2550 break; 2551 case VerifyOption_G1UseMarkWord: 2552 // When we are verifying marking using the mark word 2553 // TAMS has no relevance. 2554 assert(p == NULL, "post-condition"); 2555 break; 2556 default: 2557 ShouldNotReachHere(); 2558 } 2559 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2560 "TAMS: "PTR_FORMAT, b, e, t, p); 2561 _out->cr(); 2562 2563 HeapWord* from = b; 2564 HeapWord* to = t; 2565 2566 if (to > from) { 2567 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2568 _out->cr(); 2569 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2570 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2571 _out->cr(); 2572 } 2573 2574 return false; 2575 } 2576 2577 PrintReachableRegionClosure(outputStream* out, 2578 VerifyOption vo, 2579 bool all) : 2580 _out(out), _vo(vo), _all(all) { } 2581 }; 2582 2583 static const char* verify_option_to_tams(VerifyOption vo) { 2584 switch (vo) { 2585 case VerifyOption_G1UsePrevMarking: 2586 return "PTAMS"; 2587 case VerifyOption_G1UseNextMarking: 2588 return "NTAMS"; 2589 default: 2590 return "NONE"; 2591 } 2592 } 2593 2594 void ConcurrentMark::print_reachable(const char* str, 2595 VerifyOption vo, 2596 bool all) { 2597 gclog_or_tty->cr(); 2598 gclog_or_tty->print_cr("== Doing heap dump... "); 2599 2600 if (G1PrintReachableBaseFile == NULL) { 2601 gclog_or_tty->print_cr(" #### error: no base file defined"); 2602 return; 2603 } 2604 2605 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2606 (JVM_MAXPATHLEN - 1)) { 2607 gclog_or_tty->print_cr(" #### error: file name too long"); 2608 return; 2609 } 2610 2611 char file_name[JVM_MAXPATHLEN]; 2612 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2613 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2614 2615 fileStream fout(file_name); 2616 if (!fout.is_open()) { 2617 gclog_or_tty->print_cr(" #### error: could not open file"); 2618 return; 2619 } 2620 2621 outputStream* out = &fout; 2622 out->print_cr("-- USING %s", verify_option_to_tams(vo)); 2623 out->cr(); 2624 2625 out->print_cr("--- ITERATING OVER REGIONS"); 2626 out->cr(); 2627 PrintReachableRegionClosure rcl(out, vo, all); 2628 _g1h->heap_region_iterate(&rcl); 2629 out->cr(); 2630 2631 gclog_or_tty->print_cr(" done"); 2632 gclog_or_tty->flush(); 2633 } 2634 2635 #endif // PRODUCT 2636 2637 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2638 // Note we are overriding the read-only view of the prev map here, via 2639 // the cast. 2640 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2641 } 2642 2643 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2644 _nextMarkBitMap->clearRange(mr); 2645 } 2646 2647 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2648 clearRangePrevBitmap(mr); 2649 clearRangeNextBitmap(mr); 2650 } 2651 2652 HeapRegion* 2653 ConcurrentMark::claim_region(int task_num) { 2654 // "checkpoint" the finger 2655 HeapWord* finger = _finger; 2656 2657 // _heap_end will not change underneath our feet; it only changes at 2658 // yield points. 2659 while (finger < _heap_end) { 2660 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2661 2662 // Note on how this code handles humongous regions. In the 2663 // normal case the finger will reach the start of a "starts 2664 // humongous" (SH) region. Its end will either be the end of the 2665 // last "continues humongous" (CH) region in the sequence, or the 2666 // standard end of the SH region (if the SH is the only region in 2667 // the sequence). That way claim_region() will skip over the CH 2668 // regions. However, there is a subtle race between a CM thread 2669 // executing this method and a mutator thread doing a humongous 2670 // object allocation. The two are not mutually exclusive as the CM 2671 // thread does not need to hold the Heap_lock when it gets 2672 // here. So there is a chance that claim_region() will come across 2673 // a free region that's in the progress of becoming a SH or a CH 2674 // region. In the former case, it will either 2675 // a) Miss the update to the region's end, in which case it will 2676 // visit every subsequent CH region, will find their bitmaps 2677 // empty, and do nothing, or 2678 // b) Will observe the update of the region's end (in which case 2679 // it will skip the subsequent CH regions). 2680 // If it comes across a region that suddenly becomes CH, the 2681 // scenario will be similar to b). So, the race between 2682 // claim_region() and a humongous object allocation might force us 2683 // to do a bit of unnecessary work (due to some unnecessary bitmap 2684 // iterations) but it should not introduce and correctness issues. 2685 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2686 HeapWord* bottom = curr_region->bottom(); 2687 HeapWord* end = curr_region->end(); 2688 HeapWord* limit = curr_region->next_top_at_mark_start(); 2689 2690 if (verbose_low()) { 2691 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 2692 "["PTR_FORMAT", "PTR_FORMAT"), " 2693 "limit = "PTR_FORMAT, 2694 task_num, curr_region, bottom, end, limit); 2695 } 2696 2697 // Is the gap between reading the finger and doing the CAS too long? 2698 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2699 if (res == finger) { 2700 // we succeeded 2701 2702 // notice that _finger == end cannot be guaranteed here since, 2703 // someone else might have moved the finger even further 2704 assert(_finger >= end, "the finger should have moved forward"); 2705 2706 if (verbose_low()) { 2707 gclog_or_tty->print_cr("[%d] we were successful with region = " 2708 PTR_FORMAT, task_num, curr_region); 2709 } 2710 2711 if (limit > bottom) { 2712 if (verbose_low()) { 2713 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 2714 "returning it ", task_num, curr_region); 2715 } 2716 return curr_region; 2717 } else { 2718 assert(limit == bottom, 2719 "the region limit should be at bottom"); 2720 if (verbose_low()) { 2721 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 2722 "returning NULL", task_num, curr_region); 2723 } 2724 // we return NULL and the caller should try calling 2725 // claim_region() again. 2726 return NULL; 2727 } 2728 } else { 2729 assert(_finger > finger, "the finger should have moved forward"); 2730 if (verbose_low()) { 2731 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 2732 "global finger = "PTR_FORMAT", " 2733 "our finger = "PTR_FORMAT, 2734 task_num, _finger, finger); 2735 } 2736 2737 // read it again 2738 finger = _finger; 2739 } 2740 } 2741 2742 return NULL; 2743 } 2744 2745 #ifndef PRODUCT 2746 enum VerifyNoCSetOopsPhase { 2747 VerifyNoCSetOopsStack, 2748 VerifyNoCSetOopsQueues, 2749 VerifyNoCSetOopsSATBCompleted, 2750 VerifyNoCSetOopsSATBThread 2751 }; 2752 2753 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 2754 private: 2755 G1CollectedHeap* _g1h; 2756 VerifyNoCSetOopsPhase _phase; 2757 int _info; 2758 2759 const char* phase_str() { 2760 switch (_phase) { 2761 case VerifyNoCSetOopsStack: return "Stack"; 2762 case VerifyNoCSetOopsQueues: return "Queue"; 2763 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 2764 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 2765 default: ShouldNotReachHere(); 2766 } 2767 return NULL; 2768 } 2769 2770 void do_object_work(oop obj) { 2771 guarantee(!_g1h->obj_in_cs(obj), 2772 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 2773 (void*) obj, phase_str(), _info)); 2774 } 2775 2776 public: 2777 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 2778 2779 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 2780 _phase = phase; 2781 _info = info; 2782 } 2783 2784 virtual void do_oop(oop* p) { 2785 oop obj = oopDesc::load_decode_heap_oop(p); 2786 do_object_work(obj); 2787 } 2788 2789 virtual void do_oop(narrowOop* p) { 2790 // We should not come across narrow oops while scanning marking 2791 // stacks and SATB buffers. 2792 ShouldNotReachHere(); 2793 } 2794 2795 virtual void do_object(oop obj) { 2796 do_object_work(obj); 2797 } 2798 }; 2799 2800 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 2801 bool verify_enqueued_buffers, 2802 bool verify_thread_buffers, 2803 bool verify_fingers) { 2804 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2805 if (!G1CollectedHeap::heap()->mark_in_progress()) { 2806 return; 2807 } 2808 2809 VerifyNoCSetOopsClosure cl; 2810 2811 if (verify_stacks) { 2812 // Verify entries on the global mark stack 2813 cl.set_phase(VerifyNoCSetOopsStack); 2814 _markStack.oops_do(&cl); 2815 2816 // Verify entries on the task queues 2817 for (int i = 0; i < (int) _max_task_num; i += 1) { 2818 cl.set_phase(VerifyNoCSetOopsQueues, i); 2819 OopTaskQueue* queue = _task_queues->queue(i); 2820 queue->oops_do(&cl); 2821 } 2822 } 2823 2824 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 2825 2826 // Verify entries on the enqueued SATB buffers 2827 if (verify_enqueued_buffers) { 2828 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 2829 satb_qs.iterate_completed_buffers_read_only(&cl); 2830 } 2831 2832 // Verify entries on the per-thread SATB buffers 2833 if (verify_thread_buffers) { 2834 cl.set_phase(VerifyNoCSetOopsSATBThread); 2835 satb_qs.iterate_thread_buffers_read_only(&cl); 2836 } 2837 2838 if (verify_fingers) { 2839 // Verify the global finger 2840 HeapWord* global_finger = finger(); 2841 if (global_finger != NULL && global_finger < _heap_end) { 2842 // The global finger always points to a heap region boundary. We 2843 // use heap_region_containing_raw() to get the containing region 2844 // given that the global finger could be pointing to a free region 2845 // which subsequently becomes continues humongous. If that 2846 // happens, heap_region_containing() will return the bottom of the 2847 // corresponding starts humongous region and the check below will 2848 // not hold any more. 2849 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 2850 guarantee(global_finger == global_hr->bottom(), 2851 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 2852 global_finger, HR_FORMAT_PARAMS(global_hr))); 2853 } 2854 2855 // Verify the task fingers 2856 assert(parallel_marking_threads() <= _max_task_num, "sanity"); 2857 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 2858 CMTask* task = _tasks[i]; 2859 HeapWord* task_finger = task->finger(); 2860 if (task_finger != NULL && task_finger < _heap_end) { 2861 // See above note on the global finger verification. 2862 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 2863 guarantee(task_finger == task_hr->bottom() || 2864 !task_hr->in_collection_set(), 2865 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 2866 task_finger, HR_FORMAT_PARAMS(task_hr))); 2867 } 2868 } 2869 } 2870 } 2871 #endif // PRODUCT 2872 2873 void ConcurrentMark::clear_marking_state(bool clear_overflow) { 2874 _markStack.setEmpty(); 2875 _markStack.clear_overflow(); 2876 if (clear_overflow) { 2877 clear_has_overflown(); 2878 } else { 2879 assert(has_overflown(), "pre-condition"); 2880 } 2881 _finger = _heap_start; 2882 2883 for (int i = 0; i < (int)_max_task_num; ++i) { 2884 OopTaskQueue* queue = _task_queues->queue(i); 2885 queue->set_empty(); 2886 } 2887 } 2888 2889 // Aggregate the counting data that was constructed concurrently 2890 // with marking. 2891 class AggregateCountDataHRClosure: public HeapRegionClosure { 2892 ConcurrentMark* _cm; 2893 BitMap* _cm_card_bm; 2894 size_t _max_task_num; 2895 2896 public: 2897 AggregateCountDataHRClosure(ConcurrentMark *cm, 2898 BitMap* cm_card_bm, 2899 size_t max_task_num) : 2900 _cm(cm), _cm_card_bm(cm_card_bm), 2901 _max_task_num(max_task_num) { } 2902 2903 bool is_card_aligned(HeapWord* p) { 2904 return ((uintptr_t(p) & (CardTableModRefBS::card_size - 1)) == 0); 2905 } 2906 2907 bool doHeapRegion(HeapRegion* hr) { 2908 if (hr->continuesHumongous()) { 2909 // We will ignore these here and process them when their 2910 // associated "starts humongous" region is processed. 2911 // Note that we cannot rely on their associated 2912 // "starts humongous" region to have their bit set to 1 2913 // since, due to the region chunking in the parallel region 2914 // iteration, a "continues humongous" region might be visited 2915 // before its associated "starts humongous". 2916 return false; 2917 } 2918 2919 HeapWord* start = hr->bottom(); 2920 HeapWord* limit = hr->next_top_at_mark_start(); 2921 HeapWord* end = hr->end(); 2922 2923 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2924 err_msg("Preconditions not met - " 2925 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 2926 "top: "PTR_FORMAT", end: "PTR_FORMAT, 2927 start, limit, hr->top(), hr->end())); 2928 2929 assert(hr->next_marked_bytes() == 0, "Precondition"); 2930 2931 if (start == limit) { 2932 // NTAMS of this region has not been set so nothing to do. 2933 return false; 2934 } 2935 2936 assert(is_card_aligned(start), "sanity"); 2937 assert(is_card_aligned(end), "sanity"); 2938 2939 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2940 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2941 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2942 2943 // If ntams is not card aligned then we bump the index for 2944 // limit so that we get the card spanning ntams. 2945 if (!is_card_aligned(limit)) { 2946 limit_idx += 1; 2947 } 2948 2949 assert(limit_idx <= end_idx, "or else use atomics"); 2950 2951 // Aggregate the "stripe" in the count data associated with hr. 2952 uint hrs_index = hr->hrs_index(); 2953 size_t marked_bytes = 0; 2954 2955 for (int i = 0; (size_t)i < _max_task_num; i += 1) { 2956 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2957 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2958 2959 // Fetch the marked_bytes in this region for task i and 2960 // add it to the running total for this region. 2961 marked_bytes += marked_bytes_array[hrs_index]; 2962 2963 // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx) 2964 // into the global card bitmap. 2965 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2966 2967 while (scan_idx < limit_idx) { 2968 assert(task_card_bm->at(scan_idx) == true, "should be"); 2969 _cm_card_bm->set_bit(scan_idx); 2970 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2971 2972 // BitMap::get_next_one_offset() can handle the case when 2973 // its left_offset parameter is greater than its right_offset 2974 // parameter. If does, however, have an early exit if 2975 // left_offset == right_offset. So let's limit the value 2976 // passed in for left offset here. 2977 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2978 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2979 } 2980 } 2981 2982 // Update the marked bytes for this region. 2983 hr->add_to_marked_bytes(marked_bytes); 2984 2985 // Next heap region 2986 return false; 2987 } 2988 }; 2989 2990 class G1AggregateCountDataTask: public AbstractGangTask { 2991 protected: 2992 G1CollectedHeap* _g1h; 2993 ConcurrentMark* _cm; 2994 BitMap* _cm_card_bm; 2995 size_t _max_task_num; 2996 int _active_workers; 2997 2998 public: 2999 G1AggregateCountDataTask(G1CollectedHeap* g1h, 3000 ConcurrentMark* cm, 3001 BitMap* cm_card_bm, 3002 size_t max_task_num, 3003 int n_workers) : 3004 AbstractGangTask("Count Aggregation"), 3005 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 3006 _max_task_num(max_task_num), 3007 _active_workers(n_workers) { } 3008 3009 void work(uint worker_id) { 3010 AggregateCountDataHRClosure cl(_cm, _cm_card_bm, _max_task_num); 3011 3012 if (G1CollectedHeap::use_parallel_gc_threads()) { 3013 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 3014 _active_workers, 3015 HeapRegion::AggregateCountClaimValue); 3016 } else { 3017 _g1h->heap_region_iterate(&cl); 3018 } 3019 } 3020 }; 3021 3022 3023 void ConcurrentMark::aggregate_count_data() { 3024 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3025 _g1h->workers()->active_workers() : 3026 1); 3027 3028 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3029 _max_task_num, n_workers); 3030 3031 if (G1CollectedHeap::use_parallel_gc_threads()) { 3032 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3033 "sanity check"); 3034 _g1h->set_par_threads(n_workers); 3035 _g1h->workers()->run_task(&g1_par_agg_task); 3036 _g1h->set_par_threads(0); 3037 3038 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3039 "sanity check"); 3040 _g1h->reset_heap_region_claim_values(); 3041 } else { 3042 g1_par_agg_task.work(0); 3043 } 3044 } 3045 3046 // Clear the per-worker arrays used to store the per-region counting data 3047 void ConcurrentMark::clear_all_count_data() { 3048 // Clear the global card bitmap - it will be filled during 3049 // liveness count aggregation (during remark) and the 3050 // final counting task. 3051 _card_bm.clear(); 3052 3053 // Clear the global region bitmap - it will be filled as part 3054 // of the final counting task. 3055 _region_bm.clear(); 3056 3057 uint max_regions = _g1h->max_regions(); 3058 assert(_max_task_num != 0, "unitialized"); 3059 3060 for (int i = 0; (size_t) i < _max_task_num; i += 1) { 3061 BitMap* task_card_bm = count_card_bitmap_for(i); 3062 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3063 3064 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3065 assert(marked_bytes_array != NULL, "uninitialized"); 3066 3067 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3068 task_card_bm->clear(); 3069 } 3070 } 3071 3072 void ConcurrentMark::print_stats() { 3073 if (verbose_stats()) { 3074 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3075 for (size_t i = 0; i < _active_tasks; ++i) { 3076 _tasks[i]->print_stats(); 3077 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3078 } 3079 } 3080 } 3081 3082 // abandon current marking iteration due to a Full GC 3083 void ConcurrentMark::abort() { 3084 // Clear all marks to force marking thread to do nothing 3085 _nextMarkBitMap->clearAll(); 3086 // Clear the liveness counting data 3087 clear_all_count_data(); 3088 // Empty mark stack 3089 clear_marking_state(); 3090 for (int i = 0; i < (int)_max_task_num; ++i) { 3091 _tasks[i]->clear_region_fields(); 3092 } 3093 _has_aborted = true; 3094 3095 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3096 satb_mq_set.abandon_partial_marking(); 3097 // This can be called either during or outside marking, we'll read 3098 // the expected_active value from the SATB queue set. 3099 satb_mq_set.set_active_all_threads( 3100 false, /* new active value */ 3101 satb_mq_set.is_active() /* expected_active */); 3102 } 3103 3104 static void print_ms_time_info(const char* prefix, const char* name, 3105 NumberSeq& ns) { 3106 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3107 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3108 if (ns.num() > 0) { 3109 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3110 prefix, ns.sd(), ns.maximum()); 3111 } 3112 } 3113 3114 void ConcurrentMark::print_summary_info() { 3115 gclog_or_tty->print_cr(" Concurrent marking:"); 3116 print_ms_time_info(" ", "init marks", _init_times); 3117 print_ms_time_info(" ", "remarks", _remark_times); 3118 { 3119 print_ms_time_info(" ", "final marks", _remark_mark_times); 3120 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3121 3122 } 3123 print_ms_time_info(" ", "cleanups", _cleanup_times); 3124 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3125 _total_counting_time, 3126 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3127 (double)_cleanup_times.num() 3128 : 0.0)); 3129 if (G1ScrubRemSets) { 3130 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3131 _total_rs_scrub_time, 3132 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3133 (double)_cleanup_times.num() 3134 : 0.0)); 3135 } 3136 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3137 (_init_times.sum() + _remark_times.sum() + 3138 _cleanup_times.sum())/1000.0); 3139 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3140 "(%8.2f s marking).", 3141 cmThread()->vtime_accum(), 3142 cmThread()->vtime_mark_accum()); 3143 } 3144 3145 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3146 _parallel_workers->print_worker_threads_on(st); 3147 } 3148 3149 // We take a break if someone is trying to stop the world. 3150 bool ConcurrentMark::do_yield_check(uint worker_id) { 3151 if (should_yield()) { 3152 if (worker_id == 0) { 3153 _g1h->g1_policy()->record_concurrent_pause(); 3154 } 3155 cmThread()->yield(); 3156 if (worker_id == 0) { 3157 _g1h->g1_policy()->record_concurrent_pause_end(); 3158 } 3159 return true; 3160 } else { 3161 return false; 3162 } 3163 } 3164 3165 bool ConcurrentMark::should_yield() { 3166 return cmThread()->should_yield(); 3167 } 3168 3169 bool ConcurrentMark::containing_card_is_marked(void* p) { 3170 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3171 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3172 } 3173 3174 bool ConcurrentMark::containing_cards_are_marked(void* start, 3175 void* last) { 3176 return containing_card_is_marked(start) && 3177 containing_card_is_marked(last); 3178 } 3179 3180 #ifndef PRODUCT 3181 // for debugging purposes 3182 void ConcurrentMark::print_finger() { 3183 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3184 _heap_start, _heap_end, _finger); 3185 for (int i = 0; i < (int) _max_task_num; ++i) { 3186 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 3187 } 3188 gclog_or_tty->print_cr(""); 3189 } 3190 #endif 3191 3192 void CMTask::scan_object(oop obj) { 3193 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3194 3195 if (_cm->verbose_high()) { 3196 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT, 3197 _task_id, (void*) obj); 3198 } 3199 3200 size_t obj_size = obj->size(); 3201 _words_scanned += obj_size; 3202 3203 obj->oop_iterate(_cm_oop_closure); 3204 statsOnly( ++_objs_scanned ); 3205 check_limits(); 3206 } 3207 3208 // Closure for iteration over bitmaps 3209 class CMBitMapClosure : public BitMapClosure { 3210 private: 3211 // the bitmap that is being iterated over 3212 CMBitMap* _nextMarkBitMap; 3213 ConcurrentMark* _cm; 3214 CMTask* _task; 3215 3216 public: 3217 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3218 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3219 3220 bool do_bit(size_t offset) { 3221 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3222 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3223 assert( addr < _cm->finger(), "invariant"); 3224 3225 statsOnly( _task->increase_objs_found_on_bitmap() ); 3226 assert(addr >= _task->finger(), "invariant"); 3227 3228 // We move that task's local finger along. 3229 _task->move_finger_to(addr); 3230 3231 _task->scan_object(oop(addr)); 3232 // we only partially drain the local queue and global stack 3233 _task->drain_local_queue(true); 3234 _task->drain_global_stack(true); 3235 3236 // if the has_aborted flag has been raised, we need to bail out of 3237 // the iteration 3238 return !_task->has_aborted(); 3239 } 3240 }; 3241 3242 // Closure for iterating over objects, currently only used for 3243 // processing SATB buffers. 3244 class CMObjectClosure : public ObjectClosure { 3245 private: 3246 CMTask* _task; 3247 3248 public: 3249 void do_object(oop obj) { 3250 _task->deal_with_reference(obj); 3251 } 3252 3253 CMObjectClosure(CMTask* task) : _task(task) { } 3254 }; 3255 3256 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3257 ConcurrentMark* cm, 3258 CMTask* task) 3259 : _g1h(g1h), _cm(cm), _task(task) { 3260 assert(_ref_processor == NULL, "should be initialized to NULL"); 3261 3262 if (G1UseConcMarkReferenceProcessing) { 3263 _ref_processor = g1h->ref_processor_cm(); 3264 assert(_ref_processor != NULL, "should not be NULL"); 3265 } 3266 } 3267 3268 void CMTask::setup_for_region(HeapRegion* hr) { 3269 // Separated the asserts so that we know which one fires. 3270 assert(hr != NULL, 3271 "claim_region() should have filtered out continues humongous regions"); 3272 assert(!hr->continuesHumongous(), 3273 "claim_region() should have filtered out continues humongous regions"); 3274 3275 if (_cm->verbose_low()) { 3276 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 3277 _task_id, hr); 3278 } 3279 3280 _curr_region = hr; 3281 _finger = hr->bottom(); 3282 update_region_limit(); 3283 } 3284 3285 void CMTask::update_region_limit() { 3286 HeapRegion* hr = _curr_region; 3287 HeapWord* bottom = hr->bottom(); 3288 HeapWord* limit = hr->next_top_at_mark_start(); 3289 3290 if (limit == bottom) { 3291 if (_cm->verbose_low()) { 3292 gclog_or_tty->print_cr("[%d] found an empty region " 3293 "["PTR_FORMAT", "PTR_FORMAT")", 3294 _task_id, bottom, limit); 3295 } 3296 // The region was collected underneath our feet. 3297 // We set the finger to bottom to ensure that the bitmap 3298 // iteration that will follow this will not do anything. 3299 // (this is not a condition that holds when we set the region up, 3300 // as the region is not supposed to be empty in the first place) 3301 _finger = bottom; 3302 } else if (limit >= _region_limit) { 3303 assert(limit >= _finger, "peace of mind"); 3304 } else { 3305 assert(limit < _region_limit, "only way to get here"); 3306 // This can happen under some pretty unusual circumstances. An 3307 // evacuation pause empties the region underneath our feet (NTAMS 3308 // at bottom). We then do some allocation in the region (NTAMS 3309 // stays at bottom), followed by the region being used as a GC 3310 // alloc region (NTAMS will move to top() and the objects 3311 // originally below it will be grayed). All objects now marked in 3312 // the region are explicitly grayed, if below the global finger, 3313 // and we do not need in fact to scan anything else. So, we simply 3314 // set _finger to be limit to ensure that the bitmap iteration 3315 // doesn't do anything. 3316 _finger = limit; 3317 } 3318 3319 _region_limit = limit; 3320 } 3321 3322 void CMTask::giveup_current_region() { 3323 assert(_curr_region != NULL, "invariant"); 3324 if (_cm->verbose_low()) { 3325 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 3326 _task_id, _curr_region); 3327 } 3328 clear_region_fields(); 3329 } 3330 3331 void CMTask::clear_region_fields() { 3332 // Values for these three fields that indicate that we're not 3333 // holding on to a region. 3334 _curr_region = NULL; 3335 _finger = NULL; 3336 _region_limit = NULL; 3337 } 3338 3339 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3340 if (cm_oop_closure == NULL) { 3341 assert(_cm_oop_closure != NULL, "invariant"); 3342 } else { 3343 assert(_cm_oop_closure == NULL, "invariant"); 3344 } 3345 _cm_oop_closure = cm_oop_closure; 3346 } 3347 3348 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3349 guarantee(nextMarkBitMap != NULL, "invariant"); 3350 3351 if (_cm->verbose_low()) { 3352 gclog_or_tty->print_cr("[%d] resetting", _task_id); 3353 } 3354 3355 _nextMarkBitMap = nextMarkBitMap; 3356 clear_region_fields(); 3357 3358 _calls = 0; 3359 _elapsed_time_ms = 0.0; 3360 _termination_time_ms = 0.0; 3361 _termination_start_time_ms = 0.0; 3362 3363 #if _MARKING_STATS_ 3364 _local_pushes = 0; 3365 _local_pops = 0; 3366 _local_max_size = 0; 3367 _objs_scanned = 0; 3368 _global_pushes = 0; 3369 _global_pops = 0; 3370 _global_max_size = 0; 3371 _global_transfers_to = 0; 3372 _global_transfers_from = 0; 3373 _regions_claimed = 0; 3374 _objs_found_on_bitmap = 0; 3375 _satb_buffers_processed = 0; 3376 _steal_attempts = 0; 3377 _steals = 0; 3378 _aborted = 0; 3379 _aborted_overflow = 0; 3380 _aborted_cm_aborted = 0; 3381 _aborted_yield = 0; 3382 _aborted_timed_out = 0; 3383 _aborted_satb = 0; 3384 _aborted_termination = 0; 3385 #endif // _MARKING_STATS_ 3386 } 3387 3388 bool CMTask::should_exit_termination() { 3389 regular_clock_call(); 3390 // This is called when we are in the termination protocol. We should 3391 // quit if, for some reason, this task wants to abort or the global 3392 // stack is not empty (this means that we can get work from it). 3393 return !_cm->mark_stack_empty() || has_aborted(); 3394 } 3395 3396 void CMTask::reached_limit() { 3397 assert(_words_scanned >= _words_scanned_limit || 3398 _refs_reached >= _refs_reached_limit , 3399 "shouldn't have been called otherwise"); 3400 regular_clock_call(); 3401 } 3402 3403 void CMTask::regular_clock_call() { 3404 if (has_aborted()) return; 3405 3406 // First, we need to recalculate the words scanned and refs reached 3407 // limits for the next clock call. 3408 recalculate_limits(); 3409 3410 // During the regular clock call we do the following 3411 3412 // (1) If an overflow has been flagged, then we abort. 3413 if (_cm->has_overflown()) { 3414 set_has_aborted(); 3415 return; 3416 } 3417 3418 // If we are not concurrent (i.e. we're doing remark) we don't need 3419 // to check anything else. The other steps are only needed during 3420 // the concurrent marking phase. 3421 if (!concurrent()) return; 3422 3423 // (2) If marking has been aborted for Full GC, then we also abort. 3424 if (_cm->has_aborted()) { 3425 set_has_aborted(); 3426 statsOnly( ++_aborted_cm_aborted ); 3427 return; 3428 } 3429 3430 double curr_time_ms = os::elapsedVTime() * 1000.0; 3431 3432 // (3) If marking stats are enabled, then we update the step history. 3433 #if _MARKING_STATS_ 3434 if (_words_scanned >= _words_scanned_limit) { 3435 ++_clock_due_to_scanning; 3436 } 3437 if (_refs_reached >= _refs_reached_limit) { 3438 ++_clock_due_to_marking; 3439 } 3440 3441 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3442 _interval_start_time_ms = curr_time_ms; 3443 _all_clock_intervals_ms.add(last_interval_ms); 3444 3445 if (_cm->verbose_medium()) { 3446 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 3447 "scanned = %d%s, refs reached = %d%s", 3448 _task_id, last_interval_ms, 3449 _words_scanned, 3450 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3451 _refs_reached, 3452 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3453 } 3454 #endif // _MARKING_STATS_ 3455 3456 // (4) We check whether we should yield. If we have to, then we abort. 3457 if (_cm->should_yield()) { 3458 // We should yield. To do this we abort the task. The caller is 3459 // responsible for yielding. 3460 set_has_aborted(); 3461 statsOnly( ++_aborted_yield ); 3462 return; 3463 } 3464 3465 // (5) We check whether we've reached our time quota. If we have, 3466 // then we abort. 3467 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3468 if (elapsed_time_ms > _time_target_ms) { 3469 set_has_aborted(); 3470 _has_timed_out = true; 3471 statsOnly( ++_aborted_timed_out ); 3472 return; 3473 } 3474 3475 // (6) Finally, we check whether there are enough completed STAB 3476 // buffers available for processing. If there are, we abort. 3477 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3478 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3479 if (_cm->verbose_low()) { 3480 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 3481 _task_id); 3482 } 3483 // we do need to process SATB buffers, we'll abort and restart 3484 // the marking task to do so 3485 set_has_aborted(); 3486 statsOnly( ++_aborted_satb ); 3487 return; 3488 } 3489 } 3490 3491 void CMTask::recalculate_limits() { 3492 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3493 _words_scanned_limit = _real_words_scanned_limit; 3494 3495 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3496 _refs_reached_limit = _real_refs_reached_limit; 3497 } 3498 3499 void CMTask::decrease_limits() { 3500 // This is called when we believe that we're going to do an infrequent 3501 // operation which will increase the per byte scanned cost (i.e. move 3502 // entries to/from the global stack). It basically tries to decrease the 3503 // scanning limit so that the clock is called earlier. 3504 3505 if (_cm->verbose_medium()) { 3506 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 3507 } 3508 3509 _words_scanned_limit = _real_words_scanned_limit - 3510 3 * words_scanned_period / 4; 3511 _refs_reached_limit = _real_refs_reached_limit - 3512 3 * refs_reached_period / 4; 3513 } 3514 3515 void CMTask::move_entries_to_global_stack() { 3516 // local array where we'll store the entries that will be popped 3517 // from the local queue 3518 oop buffer[global_stack_transfer_size]; 3519 3520 int n = 0; 3521 oop obj; 3522 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3523 buffer[n] = obj; 3524 ++n; 3525 } 3526 3527 if (n > 0) { 3528 // we popped at least one entry from the local queue 3529 3530 statsOnly( ++_global_transfers_to; _local_pops += n ); 3531 3532 if (!_cm->mark_stack_push(buffer, n)) { 3533 if (_cm->verbose_low()) { 3534 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", 3535 _task_id); 3536 } 3537 set_has_aborted(); 3538 } else { 3539 // the transfer was successful 3540 3541 if (_cm->verbose_medium()) { 3542 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 3543 _task_id, n); 3544 } 3545 statsOnly( int tmp_size = _cm->mark_stack_size(); 3546 if (tmp_size > _global_max_size) { 3547 _global_max_size = tmp_size; 3548 } 3549 _global_pushes += n ); 3550 } 3551 } 3552 3553 // this operation was quite expensive, so decrease the limits 3554 decrease_limits(); 3555 } 3556 3557 void CMTask::get_entries_from_global_stack() { 3558 // local array where we'll store the entries that will be popped 3559 // from the global stack. 3560 oop buffer[global_stack_transfer_size]; 3561 int n; 3562 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3563 assert(n <= global_stack_transfer_size, 3564 "we should not pop more than the given limit"); 3565 if (n > 0) { 3566 // yes, we did actually pop at least one entry 3567 3568 statsOnly( ++_global_transfers_from; _global_pops += n ); 3569 if (_cm->verbose_medium()) { 3570 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 3571 _task_id, n); 3572 } 3573 for (int i = 0; i < n; ++i) { 3574 bool success = _task_queue->push(buffer[i]); 3575 // We only call this when the local queue is empty or under a 3576 // given target limit. So, we do not expect this push to fail. 3577 assert(success, "invariant"); 3578 } 3579 3580 statsOnly( int tmp_size = _task_queue->size(); 3581 if (tmp_size > _local_max_size) { 3582 _local_max_size = tmp_size; 3583 } 3584 _local_pushes += n ); 3585 } 3586 3587 // this operation was quite expensive, so decrease the limits 3588 decrease_limits(); 3589 } 3590 3591 void CMTask::drain_local_queue(bool partially) { 3592 if (has_aborted()) return; 3593 3594 // Decide what the target size is, depending whether we're going to 3595 // drain it partially (so that other tasks can steal if they run out 3596 // of things to do) or totally (at the very end). 3597 size_t target_size; 3598 if (partially) { 3599 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3600 } else { 3601 target_size = 0; 3602 } 3603 3604 if (_task_queue->size() > target_size) { 3605 if (_cm->verbose_high()) { 3606 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 3607 _task_id, target_size); 3608 } 3609 3610 oop obj; 3611 bool ret = _task_queue->pop_local(obj); 3612 while (ret) { 3613 statsOnly( ++_local_pops ); 3614 3615 if (_cm->verbose_high()) { 3616 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 3617 (void*) obj); 3618 } 3619 3620 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3621 assert(!_g1h->is_on_master_free_list( 3622 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3623 3624 scan_object(obj); 3625 3626 if (_task_queue->size() <= target_size || has_aborted()) { 3627 ret = false; 3628 } else { 3629 ret = _task_queue->pop_local(obj); 3630 } 3631 } 3632 3633 if (_cm->verbose_high()) { 3634 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 3635 _task_id, _task_queue->size()); 3636 } 3637 } 3638 } 3639 3640 void CMTask::drain_global_stack(bool partially) { 3641 if (has_aborted()) return; 3642 3643 // We have a policy to drain the local queue before we attempt to 3644 // drain the global stack. 3645 assert(partially || _task_queue->size() == 0, "invariant"); 3646 3647 // Decide what the target size is, depending whether we're going to 3648 // drain it partially (so that other tasks can steal if they run out 3649 // of things to do) or totally (at the very end). Notice that, 3650 // because we move entries from the global stack in chunks or 3651 // because another task might be doing the same, we might in fact 3652 // drop below the target. But, this is not a problem. 3653 size_t target_size; 3654 if (partially) { 3655 target_size = _cm->partial_mark_stack_size_target(); 3656 } else { 3657 target_size = 0; 3658 } 3659 3660 if (_cm->mark_stack_size() > target_size) { 3661 if (_cm->verbose_low()) { 3662 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 3663 _task_id, target_size); 3664 } 3665 3666 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3667 get_entries_from_global_stack(); 3668 drain_local_queue(partially); 3669 } 3670 3671 if (_cm->verbose_low()) { 3672 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 3673 _task_id, _cm->mark_stack_size()); 3674 } 3675 } 3676 } 3677 3678 // SATB Queue has several assumptions on whether to call the par or 3679 // non-par versions of the methods. this is why some of the code is 3680 // replicated. We should really get rid of the single-threaded version 3681 // of the code to simplify things. 3682 void CMTask::drain_satb_buffers() { 3683 if (has_aborted()) return; 3684 3685 // We set this so that the regular clock knows that we're in the 3686 // middle of draining buffers and doesn't set the abort flag when it 3687 // notices that SATB buffers are available for draining. It'd be 3688 // very counter productive if it did that. :-) 3689 _draining_satb_buffers = true; 3690 3691 CMObjectClosure oc(this); 3692 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3693 if (G1CollectedHeap::use_parallel_gc_threads()) { 3694 satb_mq_set.set_par_closure(_task_id, &oc); 3695 } else { 3696 satb_mq_set.set_closure(&oc); 3697 } 3698 3699 // This keeps claiming and applying the closure to completed buffers 3700 // until we run out of buffers or we need to abort. 3701 if (G1CollectedHeap::use_parallel_gc_threads()) { 3702 while (!has_aborted() && 3703 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 3704 if (_cm->verbose_medium()) { 3705 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3706 } 3707 statsOnly( ++_satb_buffers_processed ); 3708 regular_clock_call(); 3709 } 3710 } else { 3711 while (!has_aborted() && 3712 satb_mq_set.apply_closure_to_completed_buffer()) { 3713 if (_cm->verbose_medium()) { 3714 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3715 } 3716 statsOnly( ++_satb_buffers_processed ); 3717 regular_clock_call(); 3718 } 3719 } 3720 3721 if (!concurrent() && !has_aborted()) { 3722 // We should only do this during remark. 3723 if (G1CollectedHeap::use_parallel_gc_threads()) { 3724 satb_mq_set.par_iterate_closure_all_threads(_task_id); 3725 } else { 3726 satb_mq_set.iterate_closure_all_threads(); 3727 } 3728 } 3729 3730 _draining_satb_buffers = false; 3731 3732 assert(has_aborted() || 3733 concurrent() || 3734 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3735 3736 if (G1CollectedHeap::use_parallel_gc_threads()) { 3737 satb_mq_set.set_par_closure(_task_id, NULL); 3738 } else { 3739 satb_mq_set.set_closure(NULL); 3740 } 3741 3742 // again, this was a potentially expensive operation, decrease the 3743 // limits to get the regular clock call early 3744 decrease_limits(); 3745 } 3746 3747 void CMTask::print_stats() { 3748 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 3749 _task_id, _calls); 3750 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3751 _elapsed_time_ms, _termination_time_ms); 3752 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3753 _step_times_ms.num(), _step_times_ms.avg(), 3754 _step_times_ms.sd()); 3755 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3756 _step_times_ms.maximum(), _step_times_ms.sum()); 3757 3758 #if _MARKING_STATS_ 3759 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3760 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3761 _all_clock_intervals_ms.sd()); 3762 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3763 _all_clock_intervals_ms.maximum(), 3764 _all_clock_intervals_ms.sum()); 3765 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3766 _clock_due_to_scanning, _clock_due_to_marking); 3767 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3768 _objs_scanned, _objs_found_on_bitmap); 3769 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3770 _local_pushes, _local_pops, _local_max_size); 3771 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3772 _global_pushes, _global_pops, _global_max_size); 3773 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3774 _global_transfers_to,_global_transfers_from); 3775 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 3776 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3777 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3778 _steal_attempts, _steals); 3779 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3780 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3781 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3782 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3783 _aborted_timed_out, _aborted_satb, _aborted_termination); 3784 #endif // _MARKING_STATS_ 3785 } 3786 3787 /***************************************************************************** 3788 3789 The do_marking_step(time_target_ms) method is the building block 3790 of the parallel marking framework. It can be called in parallel 3791 with other invocations of do_marking_step() on different tasks 3792 (but only one per task, obviously) and concurrently with the 3793 mutator threads, or during remark, hence it eliminates the need 3794 for two versions of the code. When called during remark, it will 3795 pick up from where the task left off during the concurrent marking 3796 phase. Interestingly, tasks are also claimable during evacuation 3797 pauses too, since do_marking_step() ensures that it aborts before 3798 it needs to yield. 3799 3800 The data structures that is uses to do marking work are the 3801 following: 3802 3803 (1) Marking Bitmap. If there are gray objects that appear only 3804 on the bitmap (this happens either when dealing with an overflow 3805 or when the initial marking phase has simply marked the roots 3806 and didn't push them on the stack), then tasks claim heap 3807 regions whose bitmap they then scan to find gray objects. A 3808 global finger indicates where the end of the last claimed region 3809 is. A local finger indicates how far into the region a task has 3810 scanned. The two fingers are used to determine how to gray an 3811 object (i.e. whether simply marking it is OK, as it will be 3812 visited by a task in the future, or whether it needs to be also 3813 pushed on a stack). 3814 3815 (2) Local Queue. The local queue of the task which is accessed 3816 reasonably efficiently by the task. Other tasks can steal from 3817 it when they run out of work. Throughout the marking phase, a 3818 task attempts to keep its local queue short but not totally 3819 empty, so that entries are available for stealing by other 3820 tasks. Only when there is no more work, a task will totally 3821 drain its local queue. 3822 3823 (3) Global Mark Stack. This handles local queue overflow. During 3824 marking only sets of entries are moved between it and the local 3825 queues, as access to it requires a mutex and more fine-grain 3826 interaction with it which might cause contention. If it 3827 overflows, then the marking phase should restart and iterate 3828 over the bitmap to identify gray objects. Throughout the marking 3829 phase, tasks attempt to keep the global mark stack at a small 3830 length but not totally empty, so that entries are available for 3831 popping by other tasks. Only when there is no more work, tasks 3832 will totally drain the global mark stack. 3833 3834 (4) SATB Buffer Queue. This is where completed SATB buffers are 3835 made available. Buffers are regularly removed from this queue 3836 and scanned for roots, so that the queue doesn't get too 3837 long. During remark, all completed buffers are processed, as 3838 well as the filled in parts of any uncompleted buffers. 3839 3840 The do_marking_step() method tries to abort when the time target 3841 has been reached. There are a few other cases when the 3842 do_marking_step() method also aborts: 3843 3844 (1) When the marking phase has been aborted (after a Full GC). 3845 3846 (2) When a global overflow (on the global stack) has been 3847 triggered. Before the task aborts, it will actually sync up with 3848 the other tasks to ensure that all the marking data structures 3849 (local queues, stacks, fingers etc.) are re-initialised so that 3850 when do_marking_step() completes, the marking phase can 3851 immediately restart. 3852 3853 (3) When enough completed SATB buffers are available. The 3854 do_marking_step() method only tries to drain SATB buffers right 3855 at the beginning. So, if enough buffers are available, the 3856 marking step aborts and the SATB buffers are processed at 3857 the beginning of the next invocation. 3858 3859 (4) To yield. when we have to yield then we abort and yield 3860 right at the end of do_marking_step(). This saves us from a lot 3861 of hassle as, by yielding we might allow a Full GC. If this 3862 happens then objects will be compacted underneath our feet, the 3863 heap might shrink, etc. We save checking for this by just 3864 aborting and doing the yield right at the end. 3865 3866 From the above it follows that the do_marking_step() method should 3867 be called in a loop (or, otherwise, regularly) until it completes. 3868 3869 If a marking step completes without its has_aborted() flag being 3870 true, it means it has completed the current marking phase (and 3871 also all other marking tasks have done so and have all synced up). 3872 3873 A method called regular_clock_call() is invoked "regularly" (in 3874 sub ms intervals) throughout marking. It is this clock method that 3875 checks all the abort conditions which were mentioned above and 3876 decides when the task should abort. A work-based scheme is used to 3877 trigger this clock method: when the number of object words the 3878 marking phase has scanned or the number of references the marking 3879 phase has visited reach a given limit. Additional invocations to 3880 the method clock have been planted in a few other strategic places 3881 too. The initial reason for the clock method was to avoid calling 3882 vtime too regularly, as it is quite expensive. So, once it was in 3883 place, it was natural to piggy-back all the other conditions on it 3884 too and not constantly check them throughout the code. 3885 3886 *****************************************************************************/ 3887 3888 void CMTask::do_marking_step(double time_target_ms, 3889 bool do_stealing, 3890 bool do_termination) { 3891 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3892 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3893 3894 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3895 assert(_task_queues != NULL, "invariant"); 3896 assert(_task_queue != NULL, "invariant"); 3897 assert(_task_queues->queue(_task_id) == _task_queue, "invariant"); 3898 3899 assert(!_claimed, 3900 "only one thread should claim this task at any one time"); 3901 3902 // OK, this doesn't safeguard again all possible scenarios, as it is 3903 // possible for two threads to set the _claimed flag at the same 3904 // time. But it is only for debugging purposes anyway and it will 3905 // catch most problems. 3906 _claimed = true; 3907 3908 _start_time_ms = os::elapsedVTime() * 1000.0; 3909 statsOnly( _interval_start_time_ms = _start_time_ms ); 3910 3911 double diff_prediction_ms = 3912 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 3913 _time_target_ms = time_target_ms - diff_prediction_ms; 3914 3915 // set up the variables that are used in the work-based scheme to 3916 // call the regular clock method 3917 _words_scanned = 0; 3918 _refs_reached = 0; 3919 recalculate_limits(); 3920 3921 // clear all flags 3922 clear_has_aborted(); 3923 _has_timed_out = false; 3924 _draining_satb_buffers = false; 3925 3926 ++_calls; 3927 3928 if (_cm->verbose_low()) { 3929 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 3930 "target = %1.2lfms >>>>>>>>>>", 3931 _task_id, _calls, _time_target_ms); 3932 } 3933 3934 // Set up the bitmap and oop closures. Anything that uses them is 3935 // eventually called from this method, so it is OK to allocate these 3936 // statically. 3937 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3938 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3939 set_cm_oop_closure(&cm_oop_closure); 3940 3941 if (_cm->has_overflown()) { 3942 // This can happen if the mark stack overflows during a GC pause 3943 // and this task, after a yield point, restarts. We have to abort 3944 // as we need to get into the overflow protocol which happens 3945 // right at the end of this task. 3946 set_has_aborted(); 3947 } 3948 3949 // First drain any available SATB buffers. After this, we will not 3950 // look at SATB buffers before the next invocation of this method. 3951 // If enough completed SATB buffers are queued up, the regular clock 3952 // will abort this task so that it restarts. 3953 drain_satb_buffers(); 3954 // ...then partially drain the local queue and the global stack 3955 drain_local_queue(true); 3956 drain_global_stack(true); 3957 3958 do { 3959 if (!has_aborted() && _curr_region != NULL) { 3960 // This means that we're already holding on to a region. 3961 assert(_finger != NULL, "if region is not NULL, then the finger " 3962 "should not be NULL either"); 3963 3964 // We might have restarted this task after an evacuation pause 3965 // which might have evacuated the region we're holding on to 3966 // underneath our feet. Let's read its limit again to make sure 3967 // that we do not iterate over a region of the heap that 3968 // contains garbage (update_region_limit() will also move 3969 // _finger to the start of the region if it is found empty). 3970 update_region_limit(); 3971 // We will start from _finger not from the start of the region, 3972 // as we might be restarting this task after aborting half-way 3973 // through scanning this region. In this case, _finger points to 3974 // the address where we last found a marked object. If this is a 3975 // fresh region, _finger points to start(). 3976 MemRegion mr = MemRegion(_finger, _region_limit); 3977 3978 if (_cm->verbose_low()) { 3979 gclog_or_tty->print_cr("[%d] we're scanning part " 3980 "["PTR_FORMAT", "PTR_FORMAT") " 3981 "of region "PTR_FORMAT, 3982 _task_id, _finger, _region_limit, _curr_region); 3983 } 3984 3985 // Let's iterate over the bitmap of the part of the 3986 // region that is left. 3987 if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3988 // We successfully completed iterating over the region. Now, 3989 // let's give up the region. 3990 giveup_current_region(); 3991 regular_clock_call(); 3992 } else { 3993 assert(has_aborted(), "currently the only way to do so"); 3994 // The only way to abort the bitmap iteration is to return 3995 // false from the do_bit() method. However, inside the 3996 // do_bit() method we move the _finger to point to the 3997 // object currently being looked at. So, if we bail out, we 3998 // have definitely set _finger to something non-null. 3999 assert(_finger != NULL, "invariant"); 4000 4001 // Region iteration was actually aborted. So now _finger 4002 // points to the address of the object we last scanned. If we 4003 // leave it there, when we restart this task, we will rescan 4004 // the object. It is easy to avoid this. We move the finger by 4005 // enough to point to the next possible object header (the 4006 // bitmap knows by how much we need to move it as it knows its 4007 // granularity). 4008 assert(_finger < _region_limit, "invariant"); 4009 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 4010 // Check if bitmap iteration was aborted while scanning the last object 4011 if (new_finger >= _region_limit) { 4012 giveup_current_region(); 4013 } else { 4014 move_finger_to(new_finger); 4015 } 4016 } 4017 } 4018 // At this point we have either completed iterating over the 4019 // region we were holding on to, or we have aborted. 4020 4021 // We then partially drain the local queue and the global stack. 4022 // (Do we really need this?) 4023 drain_local_queue(true); 4024 drain_global_stack(true); 4025 4026 // Read the note on the claim_region() method on why it might 4027 // return NULL with potentially more regions available for 4028 // claiming and why we have to check out_of_regions() to determine 4029 // whether we're done or not. 4030 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4031 // We are going to try to claim a new region. We should have 4032 // given up on the previous one. 4033 // Separated the asserts so that we know which one fires. 4034 assert(_curr_region == NULL, "invariant"); 4035 assert(_finger == NULL, "invariant"); 4036 assert(_region_limit == NULL, "invariant"); 4037 if (_cm->verbose_low()) { 4038 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 4039 } 4040 HeapRegion* claimed_region = _cm->claim_region(_task_id); 4041 if (claimed_region != NULL) { 4042 // Yes, we managed to claim one 4043 statsOnly( ++_regions_claimed ); 4044 4045 if (_cm->verbose_low()) { 4046 gclog_or_tty->print_cr("[%d] we successfully claimed " 4047 "region "PTR_FORMAT, 4048 _task_id, claimed_region); 4049 } 4050 4051 setup_for_region(claimed_region); 4052 assert(_curr_region == claimed_region, "invariant"); 4053 } 4054 // It is important to call the regular clock here. It might take 4055 // a while to claim a region if, for example, we hit a large 4056 // block of empty regions. So we need to call the regular clock 4057 // method once round the loop to make sure it's called 4058 // frequently enough. 4059 regular_clock_call(); 4060 } 4061 4062 if (!has_aborted() && _curr_region == NULL) { 4063 assert(_cm->out_of_regions(), 4064 "at this point we should be out of regions"); 4065 } 4066 } while ( _curr_region != NULL && !has_aborted()); 4067 4068 if (!has_aborted()) { 4069 // We cannot check whether the global stack is empty, since other 4070 // tasks might be pushing objects to it concurrently. 4071 assert(_cm->out_of_regions(), 4072 "at this point we should be out of regions"); 4073 4074 if (_cm->verbose_low()) { 4075 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 4076 } 4077 4078 // Try to reduce the number of available SATB buffers so that 4079 // remark has less work to do. 4080 drain_satb_buffers(); 4081 } 4082 4083 // Since we've done everything else, we can now totally drain the 4084 // local queue and global stack. 4085 drain_local_queue(false); 4086 drain_global_stack(false); 4087 4088 // Attempt at work stealing from other task's queues. 4089 if (do_stealing && !has_aborted()) { 4090 // We have not aborted. This means that we have finished all that 4091 // we could. Let's try to do some stealing... 4092 4093 // We cannot check whether the global stack is empty, since other 4094 // tasks might be pushing objects to it concurrently. 4095 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4096 "only way to reach here"); 4097 4098 if (_cm->verbose_low()) { 4099 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 4100 } 4101 4102 while (!has_aborted()) { 4103 oop obj; 4104 statsOnly( ++_steal_attempts ); 4105 4106 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 4107 if (_cm->verbose_medium()) { 4108 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 4109 _task_id, (void*) obj); 4110 } 4111 4112 statsOnly( ++_steals ); 4113 4114 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4115 "any stolen object should be marked"); 4116 scan_object(obj); 4117 4118 // And since we're towards the end, let's totally drain the 4119 // local queue and global stack. 4120 drain_local_queue(false); 4121 drain_global_stack(false); 4122 } else { 4123 break; 4124 } 4125 } 4126 } 4127 4128 // If we are about to wrap up and go into termination, check if we 4129 // should raise the overflow flag. 4130 if (do_termination && !has_aborted()) { 4131 if (_cm->force_overflow()->should_force()) { 4132 _cm->set_has_overflown(); 4133 regular_clock_call(); 4134 } 4135 } 4136 4137 // We still haven't aborted. Now, let's try to get into the 4138 // termination protocol. 4139 if (do_termination && !has_aborted()) { 4140 // We cannot check whether the global stack is empty, since other 4141 // tasks might be concurrently pushing objects on it. 4142 // Separated the asserts so that we know which one fires. 4143 assert(_cm->out_of_regions(), "only way to reach here"); 4144 assert(_task_queue->size() == 0, "only way to reach here"); 4145 4146 if (_cm->verbose_low()) { 4147 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 4148 } 4149 4150 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4151 // The CMTask class also extends the TerminatorTerminator class, 4152 // hence its should_exit_termination() method will also decide 4153 // whether to exit the termination protocol or not. 4154 bool finished = _cm->terminator()->offer_termination(this); 4155 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4156 _termination_time_ms += 4157 termination_end_time_ms - _termination_start_time_ms; 4158 4159 if (finished) { 4160 // We're all done. 4161 4162 if (_task_id == 0) { 4163 // let's allow task 0 to do this 4164 if (concurrent()) { 4165 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4166 // we need to set this to false before the next 4167 // safepoint. This way we ensure that the marking phase 4168 // doesn't observe any more heap expansions. 4169 _cm->clear_concurrent_marking_in_progress(); 4170 } 4171 } 4172 4173 // We can now guarantee that the global stack is empty, since 4174 // all other tasks have finished. We separated the guarantees so 4175 // that, if a condition is false, we can immediately find out 4176 // which one. 4177 guarantee(_cm->out_of_regions(), "only way to reach here"); 4178 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4179 guarantee(_task_queue->size() == 0, "only way to reach here"); 4180 guarantee(!_cm->has_overflown(), "only way to reach here"); 4181 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4182 4183 if (_cm->verbose_low()) { 4184 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 4185 } 4186 } else { 4187 // Apparently there's more work to do. Let's abort this task. It 4188 // will restart it and we can hopefully find more things to do. 4189 4190 if (_cm->verbose_low()) { 4191 gclog_or_tty->print_cr("[%d] apparently there is more work to do", 4192 _task_id); 4193 } 4194 4195 set_has_aborted(); 4196 statsOnly( ++_aborted_termination ); 4197 } 4198 } 4199 4200 // Mainly for debugging purposes to make sure that a pointer to the 4201 // closure which was statically allocated in this frame doesn't 4202 // escape it by accident. 4203 set_cm_oop_closure(NULL); 4204 double end_time_ms = os::elapsedVTime() * 1000.0; 4205 double elapsed_time_ms = end_time_ms - _start_time_ms; 4206 // Update the step history. 4207 _step_times_ms.add(elapsed_time_ms); 4208 4209 if (has_aborted()) { 4210 // The task was aborted for some reason. 4211 4212 statsOnly( ++_aborted ); 4213 4214 if (_has_timed_out) { 4215 double diff_ms = elapsed_time_ms - _time_target_ms; 4216 // Keep statistics of how well we did with respect to hitting 4217 // our target only if we actually timed out (if we aborted for 4218 // other reasons, then the results might get skewed). 4219 _marking_step_diffs_ms.add(diff_ms); 4220 } 4221 4222 if (_cm->has_overflown()) { 4223 // This is the interesting one. We aborted because a global 4224 // overflow was raised. This means we have to restart the 4225 // marking phase and start iterating over regions. However, in 4226 // order to do this we have to make sure that all tasks stop 4227 // what they are doing and re-initialise in a safe manner. We 4228 // will achieve this with the use of two barrier sync points. 4229 4230 if (_cm->verbose_low()) { 4231 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 4232 } 4233 4234 _cm->enter_first_sync_barrier(_task_id); 4235 // When we exit this sync barrier we know that all tasks have 4236 // stopped doing marking work. So, it's now safe to 4237 // re-initialise our data structures. At the end of this method, 4238 // task 0 will clear the global data structures. 4239 4240 statsOnly( ++_aborted_overflow ); 4241 4242 // We clear the local state of this task... 4243 clear_region_fields(); 4244 4245 // ...and enter the second barrier. 4246 _cm->enter_second_sync_barrier(_task_id); 4247 // At this point everything has bee re-initialised and we're 4248 // ready to restart. 4249 } 4250 4251 if (_cm->verbose_low()) { 4252 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4253 "elapsed = %1.2lfms <<<<<<<<<<", 4254 _task_id, _time_target_ms, elapsed_time_ms); 4255 if (_cm->has_aborted()) { 4256 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 4257 _task_id); 4258 } 4259 } 4260 } else { 4261 if (_cm->verbose_low()) { 4262 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4263 "elapsed = %1.2lfms <<<<<<<<<<", 4264 _task_id, _time_target_ms, elapsed_time_ms); 4265 } 4266 } 4267 4268 _claimed = false; 4269 } 4270 4271 CMTask::CMTask(int task_id, 4272 ConcurrentMark* cm, 4273 size_t* marked_bytes, 4274 BitMap* card_bm, 4275 CMTaskQueue* task_queue, 4276 CMTaskQueueSet* task_queues) 4277 : _g1h(G1CollectedHeap::heap()), 4278 _task_id(task_id), _cm(cm), 4279 _claimed(false), 4280 _nextMarkBitMap(NULL), _hash_seed(17), 4281 _task_queue(task_queue), 4282 _task_queues(task_queues), 4283 _cm_oop_closure(NULL), 4284 _marked_bytes_array(marked_bytes), 4285 _card_bm(card_bm) { 4286 guarantee(task_queue != NULL, "invariant"); 4287 guarantee(task_queues != NULL, "invariant"); 4288 4289 statsOnly( _clock_due_to_scanning = 0; 4290 _clock_due_to_marking = 0 ); 4291 4292 _marking_step_diffs_ms.add(0.5); 4293 } 4294 4295 // These are formatting macros that are used below to ensure 4296 // consistent formatting. The *_H_* versions are used to format the 4297 // header for a particular value and they should be kept consistent 4298 // with the corresponding macro. Also note that most of the macros add 4299 // the necessary white space (as a prefix) which makes them a bit 4300 // easier to compose. 4301 4302 // All the output lines are prefixed with this string to be able to 4303 // identify them easily in a large log file. 4304 #define G1PPRL_LINE_PREFIX "###" 4305 4306 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4307 #ifdef _LP64 4308 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4309 #else // _LP64 4310 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4311 #endif // _LP64 4312 4313 // For per-region info 4314 #define G1PPRL_TYPE_FORMAT " %-4s" 4315 #define G1PPRL_TYPE_H_FORMAT " %4s" 4316 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4317 #define G1PPRL_BYTE_H_FORMAT " %9s" 4318 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4319 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4320 4321 // For summary info 4322 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4323 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4324 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4325 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4326 4327 G1PrintRegionLivenessInfoClosure:: 4328 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4329 : _out(out), 4330 _total_used_bytes(0), _total_capacity_bytes(0), 4331 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4332 _hum_used_bytes(0), _hum_capacity_bytes(0), 4333 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) { 4334 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4335 MemRegion g1_committed = g1h->g1_committed(); 4336 MemRegion g1_reserved = g1h->g1_reserved(); 4337 double now = os::elapsedTime(); 4338 4339 // Print the header of the output. 4340 _out->cr(); 4341 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4342 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4343 G1PPRL_SUM_ADDR_FORMAT("committed") 4344 G1PPRL_SUM_ADDR_FORMAT("reserved") 4345 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4346 g1_committed.start(), g1_committed.end(), 4347 g1_reserved.start(), g1_reserved.end(), 4348 HeapRegion::GrainBytes); 4349 _out->print_cr(G1PPRL_LINE_PREFIX); 4350 _out->print_cr(G1PPRL_LINE_PREFIX 4351 G1PPRL_TYPE_H_FORMAT 4352 G1PPRL_ADDR_BASE_H_FORMAT 4353 G1PPRL_BYTE_H_FORMAT 4354 G1PPRL_BYTE_H_FORMAT 4355 G1PPRL_BYTE_H_FORMAT 4356 G1PPRL_DOUBLE_H_FORMAT, 4357 "type", "address-range", 4358 "used", "prev-live", "next-live", "gc-eff"); 4359 _out->print_cr(G1PPRL_LINE_PREFIX 4360 G1PPRL_TYPE_H_FORMAT 4361 G1PPRL_ADDR_BASE_H_FORMAT 4362 G1PPRL_BYTE_H_FORMAT 4363 G1PPRL_BYTE_H_FORMAT 4364 G1PPRL_BYTE_H_FORMAT 4365 G1PPRL_DOUBLE_H_FORMAT, 4366 "", "", 4367 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)"); 4368 } 4369 4370 // It takes as a parameter a reference to one of the _hum_* fields, it 4371 // deduces the corresponding value for a region in a humongous region 4372 // series (either the region size, or what's left if the _hum_* field 4373 // is < the region size), and updates the _hum_* field accordingly. 4374 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4375 size_t bytes = 0; 4376 // The > 0 check is to deal with the prev and next live bytes which 4377 // could be 0. 4378 if (*hum_bytes > 0) { 4379 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4380 *hum_bytes -= bytes; 4381 } 4382 return bytes; 4383 } 4384 4385 // It deduces the values for a region in a humongous region series 4386 // from the _hum_* fields and updates those accordingly. It assumes 4387 // that that _hum_* fields have already been set up from the "starts 4388 // humongous" region and we visit the regions in address order. 4389 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4390 size_t* capacity_bytes, 4391 size_t* prev_live_bytes, 4392 size_t* next_live_bytes) { 4393 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4394 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4395 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4396 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4397 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4398 } 4399 4400 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4401 const char* type = ""; 4402 HeapWord* bottom = r->bottom(); 4403 HeapWord* end = r->end(); 4404 size_t capacity_bytes = r->capacity(); 4405 size_t used_bytes = r->used(); 4406 size_t prev_live_bytes = r->live_bytes(); 4407 size_t next_live_bytes = r->next_live_bytes(); 4408 double gc_eff = r->gc_efficiency(); 4409 if (r->used() == 0) { 4410 type = "FREE"; 4411 } else if (r->is_survivor()) { 4412 type = "SURV"; 4413 } else if (r->is_young()) { 4414 type = "EDEN"; 4415 } else if (r->startsHumongous()) { 4416 type = "HUMS"; 4417 4418 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4419 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4420 "they should have been zeroed after the last time we used them"); 4421 // Set up the _hum_* fields. 4422 _hum_capacity_bytes = capacity_bytes; 4423 _hum_used_bytes = used_bytes; 4424 _hum_prev_live_bytes = prev_live_bytes; 4425 _hum_next_live_bytes = next_live_bytes; 4426 get_hum_bytes(&used_bytes, &capacity_bytes, 4427 &prev_live_bytes, &next_live_bytes); 4428 end = bottom + HeapRegion::GrainWords; 4429 } else if (r->continuesHumongous()) { 4430 type = "HUMC"; 4431 get_hum_bytes(&used_bytes, &capacity_bytes, 4432 &prev_live_bytes, &next_live_bytes); 4433 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4434 } else { 4435 type = "OLD"; 4436 } 4437 4438 _total_used_bytes += used_bytes; 4439 _total_capacity_bytes += capacity_bytes; 4440 _total_prev_live_bytes += prev_live_bytes; 4441 _total_next_live_bytes += next_live_bytes; 4442 4443 // Print a line for this particular region. 4444 _out->print_cr(G1PPRL_LINE_PREFIX 4445 G1PPRL_TYPE_FORMAT 4446 G1PPRL_ADDR_BASE_FORMAT 4447 G1PPRL_BYTE_FORMAT 4448 G1PPRL_BYTE_FORMAT 4449 G1PPRL_BYTE_FORMAT 4450 G1PPRL_DOUBLE_FORMAT, 4451 type, bottom, end, 4452 used_bytes, prev_live_bytes, next_live_bytes, gc_eff); 4453 4454 return false; 4455 } 4456 4457 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4458 // Print the footer of the output. 4459 _out->print_cr(G1PPRL_LINE_PREFIX); 4460 _out->print_cr(G1PPRL_LINE_PREFIX 4461 " SUMMARY" 4462 G1PPRL_SUM_MB_FORMAT("capacity") 4463 G1PPRL_SUM_MB_PERC_FORMAT("used") 4464 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4465 G1PPRL_SUM_MB_PERC_FORMAT("next-live"), 4466 bytes_to_mb(_total_capacity_bytes), 4467 bytes_to_mb(_total_used_bytes), 4468 perc(_total_used_bytes, _total_capacity_bytes), 4469 bytes_to_mb(_total_prev_live_bytes), 4470 perc(_total_prev_live_bytes, _total_capacity_bytes), 4471 bytes_to_mb(_total_next_live_bytes), 4472 perc(_total_next_live_bytes, _total_capacity_bytes)); 4473 _out->cr(); 4474 }