1 /* 2 * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 32 #include "gc_implementation/g1/g1Log.hpp" 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 34 #include "gc_implementation/g1/g1RemSet.hpp" 35 #include "gc_implementation/g1/heapRegion.inline.hpp" 36 #include "gc_implementation/g1/heapRegionRemSet.hpp" 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 38 #include "gc_implementation/shared/vmGCOperations.hpp" 39 #include "gc_implementation/shared/gcTimer.hpp" 40 #include "gc_implementation/shared/gcTraceTime.hpp" 41 #include "memory/genOopClosures.inline.hpp" 42 #include "memory/referencePolicy.hpp" 43 #include "memory/resourceArea.hpp" 44 #include "oops/oop.inline.hpp" 45 #include "runtime/handles.inline.hpp" 46 #include "runtime/java.hpp" 47 #include "services/memTracker.hpp" 48 49 // Concurrent marking bit map wrapper 50 51 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) : 52 _bm((uintptr_t*)NULL,0), 53 _shifter(shifter) { 54 _bmStartWord = (HeapWord*)(rs.base()); 55 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 56 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 57 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 58 59 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC); 60 61 guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map"); 62 // For now we'll just commit all of the bit map up fromt. 63 // Later on we'll try to be more parsimonious with swap. 64 guarantee(_virtual_space.initialize(brs, brs.size()), 65 "couldn't reseve backing store for concurrent marking bit map"); 66 assert(_virtual_space.committed_size() == brs.size(), 67 "didn't reserve backing store for all of concurrent marking bit map?"); 68 _bm.set_map((uintptr_t*)_virtual_space.low()); 69 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 70 _bmWordSize, "inconsistency in bit map sizing"); 71 _bm.set_size(_bmWordSize >> _shifter); 72 } 73 74 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 75 HeapWord* limit) const { 76 // First we must round addr *up* to a possible object boundary. 77 addr = (HeapWord*)align_size_up((intptr_t)addr, 78 HeapWordSize << _shifter); 79 size_t addrOffset = heapWordToOffset(addr); 80 if (limit == NULL) { 81 limit = _bmStartWord + _bmWordSize; 82 } 83 size_t limitOffset = heapWordToOffset(limit); 84 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 85 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 86 assert(nextAddr >= addr, "get_next_one postcondition"); 87 assert(nextAddr == limit || isMarked(nextAddr), 88 "get_next_one postcondition"); 89 return nextAddr; 90 } 91 92 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 93 HeapWord* limit) const { 94 size_t addrOffset = heapWordToOffset(addr); 95 if (limit == NULL) { 96 limit = _bmStartWord + _bmWordSize; 97 } 98 size_t limitOffset = heapWordToOffset(limit); 99 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 100 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 101 assert(nextAddr >= addr, "get_next_one postcondition"); 102 assert(nextAddr == limit || !isMarked(nextAddr), 103 "get_next_one postcondition"); 104 return nextAddr; 105 } 106 107 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 108 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 109 return (int) (diff >> _shifter); 110 } 111 112 #ifndef PRODUCT 113 bool CMBitMapRO::covers(ReservedSpace rs) const { 114 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 115 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 116 "size inconsistency"); 117 return _bmStartWord == (HeapWord*)(rs.base()) && 118 _bmWordSize == rs.size()>>LogHeapWordSize; 119 } 120 #endif 121 122 void CMBitMap::clearAll() { 123 _bm.clear(); 124 return; 125 } 126 127 void CMBitMap::markRange(MemRegion mr) { 128 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 129 assert(!mr.is_empty(), "unexpected empty region"); 130 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 131 ((HeapWord *) mr.end())), 132 "markRange memory region end is not card aligned"); 133 // convert address range into offset range 134 _bm.at_put_range(heapWordToOffset(mr.start()), 135 heapWordToOffset(mr.end()), true); 136 } 137 138 void CMBitMap::clearRange(MemRegion mr) { 139 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 140 assert(!mr.is_empty(), "unexpected empty region"); 141 // convert address range into offset range 142 _bm.at_put_range(heapWordToOffset(mr.start()), 143 heapWordToOffset(mr.end()), false); 144 } 145 146 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 147 HeapWord* end_addr) { 148 HeapWord* start = getNextMarkedWordAddress(addr); 149 start = MIN2(start, end_addr); 150 HeapWord* end = getNextUnmarkedWordAddress(start); 151 end = MIN2(end, end_addr); 152 assert(start <= end, "Consistency check"); 153 MemRegion mr(start, end); 154 if (!mr.is_empty()) { 155 clearRange(mr); 156 } 157 return mr; 158 } 159 160 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 161 _base(NULL), _cm(cm) 162 #ifdef ASSERT 163 , _drain_in_progress(false) 164 , _drain_in_progress_yields(false) 165 #endif 166 {} 167 168 void CMMarkStack::allocate(size_t size) { 169 _base = NEW_C_HEAP_ARRAY(oop, size, mtGC); 170 if (_base == NULL) { 171 vm_exit_during_initialization("Failed to allocate CM region mark stack"); 172 } 173 _index = 0; 174 _capacity = (jint) size; 175 _saved_index = -1; 176 NOT_PRODUCT(_max_depth = 0); 177 } 178 179 CMMarkStack::~CMMarkStack() { 180 if (_base != NULL) { 181 FREE_C_HEAP_ARRAY(oop, _base, mtGC); 182 } 183 } 184 185 void CMMarkStack::par_push(oop ptr) { 186 while (true) { 187 if (isFull()) { 188 _overflow = true; 189 return; 190 } 191 // Otherwise... 192 jint index = _index; 193 jint next_index = index+1; 194 jint res = Atomic::cmpxchg(next_index, &_index, index); 195 if (res == index) { 196 _base[index] = ptr; 197 // Note that we don't maintain this atomically. We could, but it 198 // doesn't seem necessary. 199 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 200 return; 201 } 202 // Otherwise, we need to try again. 203 } 204 } 205 206 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 207 while (true) { 208 if (isFull()) { 209 _overflow = true; 210 return; 211 } 212 // Otherwise... 213 jint index = _index; 214 jint next_index = index + n; 215 if (next_index > _capacity) { 216 _overflow = true; 217 return; 218 } 219 jint res = Atomic::cmpxchg(next_index, &_index, index); 220 if (res == index) { 221 for (int i = 0; i < n; i++) { 222 int ind = index + i; 223 assert(ind < _capacity, "By overflow test above."); 224 _base[ind] = ptr_arr[i]; 225 } 226 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 227 return; 228 } 229 // Otherwise, we need to try again. 230 } 231 } 232 233 234 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 235 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 236 jint start = _index; 237 jint next_index = start + n; 238 if (next_index > _capacity) { 239 _overflow = true; 240 return; 241 } 242 // Otherwise. 243 _index = next_index; 244 for (int i = 0; i < n; i++) { 245 int ind = start + i; 246 assert(ind < _capacity, "By overflow test above."); 247 _base[ind] = ptr_arr[i]; 248 } 249 } 250 251 252 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 253 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 254 jint index = _index; 255 if (index == 0) { 256 *n = 0; 257 return false; 258 } else { 259 int k = MIN2(max, index); 260 jint new_ind = index - k; 261 for (int j = 0; j < k; j++) { 262 ptr_arr[j] = _base[new_ind + j]; 263 } 264 _index = new_ind; 265 *n = k; 266 return true; 267 } 268 } 269 270 template<class OopClosureClass> 271 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 272 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 273 || SafepointSynchronize::is_at_safepoint(), 274 "Drain recursion must be yield-safe."); 275 bool res = true; 276 debug_only(_drain_in_progress = true); 277 debug_only(_drain_in_progress_yields = yield_after); 278 while (!isEmpty()) { 279 oop newOop = pop(); 280 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 281 assert(newOop->is_oop(), "Expected an oop"); 282 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 283 "only grey objects on this stack"); 284 newOop->oop_iterate(cl); 285 if (yield_after && _cm->do_yield_check()) { 286 res = false; 287 break; 288 } 289 } 290 debug_only(_drain_in_progress = false); 291 return res; 292 } 293 294 void CMMarkStack::note_start_of_gc() { 295 assert(_saved_index == -1, 296 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 297 _saved_index = _index; 298 } 299 300 void CMMarkStack::note_end_of_gc() { 301 // This is intentionally a guarantee, instead of an assert. If we 302 // accidentally add something to the mark stack during GC, it 303 // will be a correctness issue so it's better if we crash. we'll 304 // only check this once per GC anyway, so it won't be a performance 305 // issue in any way. 306 guarantee(_saved_index == _index, 307 err_msg("saved index: %d index: %d", _saved_index, _index)); 308 _saved_index = -1; 309 } 310 311 void CMMarkStack::oops_do(OopClosure* f) { 312 assert(_saved_index == _index, 313 err_msg("saved index: %d index: %d", _saved_index, _index)); 314 for (int i = 0; i < _index; i += 1) { 315 f->do_oop(&_base[i]); 316 } 317 } 318 319 bool ConcurrentMark::not_yet_marked(oop obj) const { 320 return (_g1h->is_obj_ill(obj) 321 || (_g1h->is_in_permanent(obj) 322 && !nextMarkBitMap()->isMarked((HeapWord*)obj))); 323 } 324 325 CMRootRegions::CMRootRegions() : 326 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 327 _should_abort(false), _next_survivor(NULL) { } 328 329 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 330 _young_list = g1h->young_list(); 331 _cm = cm; 332 } 333 334 void CMRootRegions::prepare_for_scan() { 335 assert(!scan_in_progress(), "pre-condition"); 336 337 // Currently, only survivors can be root regions. 338 assert(_next_survivor == NULL, "pre-condition"); 339 _next_survivor = _young_list->first_survivor_region(); 340 _scan_in_progress = (_next_survivor != NULL); 341 _should_abort = false; 342 } 343 344 HeapRegion* CMRootRegions::claim_next() { 345 if (_should_abort) { 346 // If someone has set the should_abort flag, we return NULL to 347 // force the caller to bail out of their loop. 348 return NULL; 349 } 350 351 // Currently, only survivors can be root regions. 352 HeapRegion* res = _next_survivor; 353 if (res != NULL) { 354 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 355 // Read it again in case it changed while we were waiting for the lock. 356 res = _next_survivor; 357 if (res != NULL) { 358 if (res == _young_list->last_survivor_region()) { 359 // We just claimed the last survivor so store NULL to indicate 360 // that we're done. 361 _next_survivor = NULL; 362 } else { 363 _next_survivor = res->get_next_young_region(); 364 } 365 } else { 366 // Someone else claimed the last survivor while we were trying 367 // to take the lock so nothing else to do. 368 } 369 } 370 assert(res == NULL || res->is_survivor(), "post-condition"); 371 372 return res; 373 } 374 375 void CMRootRegions::scan_finished() { 376 assert(scan_in_progress(), "pre-condition"); 377 378 // Currently, only survivors can be root regions. 379 if (!_should_abort) { 380 assert(_next_survivor == NULL, "we should have claimed all survivors"); 381 } 382 _next_survivor = NULL; 383 384 { 385 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 386 _scan_in_progress = false; 387 RootRegionScan_lock->notify_all(); 388 } 389 } 390 391 bool CMRootRegions::wait_until_scan_finished() { 392 if (!scan_in_progress()) return false; 393 394 { 395 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 396 while (scan_in_progress()) { 397 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 398 } 399 } 400 return true; 401 } 402 403 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 404 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 405 #endif // _MSC_VER 406 407 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 408 return MAX2((n_par_threads + 2) / 4, 1U); 409 } 410 411 ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) : 412 _markBitMap1(rs, MinObjAlignment - 1), 413 _markBitMap2(rs, MinObjAlignment - 1), 414 415 _parallel_marking_threads(0), 416 _max_parallel_marking_threads(0), 417 _sleep_factor(0.0), 418 _marking_task_overhead(1.0), 419 _cleanup_sleep_factor(0.0), 420 _cleanup_task_overhead(1.0), 421 _cleanup_list("Cleanup List"), 422 _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/), 423 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 424 CardTableModRefBS::card_shift, 425 false /* in_resource_area*/), 426 427 _prevMarkBitMap(&_markBitMap1), 428 _nextMarkBitMap(&_markBitMap2), 429 430 _markStack(this), 431 // _finger set in set_non_marking_state 432 433 _max_task_num(MAX2((uint)ParallelGCThreads, 1U)), 434 // _active_tasks set in set_non_marking_state 435 // _tasks set inside the constructor 436 _task_queues(new CMTaskQueueSet((int) _max_task_num)), 437 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 438 439 _has_overflown(false), 440 _concurrent(false), 441 _has_aborted(false), 442 _restart_for_overflow(false), 443 _concurrent_marking_in_progress(false), 444 445 // _verbose_level set below 446 447 _init_times(), 448 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 449 _cleanup_times(), 450 _total_counting_time(0.0), 451 _total_rs_scrub_time(0.0), 452 453 _parallel_workers(NULL), 454 455 _count_card_bitmaps(NULL), 456 _count_marked_bytes(NULL) { 457 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 458 if (verbose_level < no_verbose) { 459 verbose_level = no_verbose; 460 } 461 if (verbose_level > high_verbose) { 462 verbose_level = high_verbose; 463 } 464 _verbose_level = verbose_level; 465 466 if (verbose_low()) { 467 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 468 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 469 } 470 471 _markStack.allocate(MarkStackSize); 472 473 // Create & start a ConcurrentMark thread. 474 _cmThread = new ConcurrentMarkThread(this); 475 assert(cmThread() != NULL, "CM Thread should have been created"); 476 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 477 478 _g1h = G1CollectedHeap::heap(); 479 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 480 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 481 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 482 483 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 484 satb_qs.set_buffer_size(G1SATBBufferSize); 485 486 _root_regions.init(_g1h, this); 487 488 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num, mtGC); 489 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num, mtGC); 490 491 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_task_num, mtGC); 492 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num, mtGC); 493 494 BitMap::idx_t card_bm_size = _card_bm.size(); 495 496 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 497 _active_tasks = _max_task_num; 498 for (int i = 0; i < (int) _max_task_num; ++i) { 499 CMTaskQueue* task_queue = new CMTaskQueue(); 500 task_queue->initialize(); 501 _task_queues->register_queue(i, task_queue); 502 503 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 504 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC); 505 506 _tasks[i] = new CMTask(i, this, 507 _count_marked_bytes[i], 508 &_count_card_bitmaps[i], 509 task_queue, _task_queues); 510 511 _accum_task_vtime[i] = 0.0; 512 } 513 514 // Calculate the card number for the bottom of the heap. Used 515 // in biasing indexes into the accounting card bitmaps. 516 _heap_bottom_card_num = 517 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 518 CardTableModRefBS::card_shift); 519 520 // Clear all the liveness counting data 521 clear_all_count_data(); 522 523 if (ConcGCThreads > ParallelGCThreads) { 524 vm_exit_during_initialization("Can't have more ConcGCThreads " 525 "than ParallelGCThreads."); 526 } 527 if (ParallelGCThreads == 0) { 528 // if we are not running with any parallel GC threads we will not 529 // spawn any marking threads either 530 _parallel_marking_threads = 0; 531 _max_parallel_marking_threads = 0; 532 _sleep_factor = 0.0; 533 _marking_task_overhead = 1.0; 534 } else { 535 if (ConcGCThreads > 0) { 536 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 537 // if both are set 538 539 _parallel_marking_threads = (uint) ConcGCThreads; 540 _max_parallel_marking_threads = _parallel_marking_threads; 541 _sleep_factor = 0.0; 542 _marking_task_overhead = 1.0; 543 } else if (G1MarkingOverheadPercent > 0) { 544 // we will calculate the number of parallel marking threads 545 // based on a target overhead with respect to the soft real-time 546 // goal 547 548 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 549 double overall_cm_overhead = 550 (double) MaxGCPauseMillis * marking_overhead / 551 (double) GCPauseIntervalMillis; 552 double cpu_ratio = 1.0 / (double) os::processor_count(); 553 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 554 double marking_task_overhead = 555 overall_cm_overhead / marking_thread_num * 556 (double) os::processor_count(); 557 double sleep_factor = 558 (1.0 - marking_task_overhead) / marking_task_overhead; 559 560 _parallel_marking_threads = (uint) marking_thread_num; 561 _max_parallel_marking_threads = _parallel_marking_threads; 562 _sleep_factor = sleep_factor; 563 _marking_task_overhead = marking_task_overhead; 564 } else { 565 _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads); 566 _max_parallel_marking_threads = _parallel_marking_threads; 567 _sleep_factor = 0.0; 568 _marking_task_overhead = 1.0; 569 } 570 571 if (parallel_marking_threads() > 1) { 572 _cleanup_task_overhead = 1.0; 573 } else { 574 _cleanup_task_overhead = marking_task_overhead(); 575 } 576 _cleanup_sleep_factor = 577 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 578 579 #if 0 580 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 581 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 582 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 583 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 584 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 585 #endif 586 587 guarantee(parallel_marking_threads() > 0, "peace of mind"); 588 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 589 _max_parallel_marking_threads, false, true); 590 if (_parallel_workers == NULL) { 591 vm_exit_during_initialization("Failed necessary allocation."); 592 } else { 593 _parallel_workers->initialize_workers(); 594 } 595 } 596 597 // so that the call below can read a sensible value 598 _heap_start = (HeapWord*) rs.base(); 599 set_non_marking_state(); 600 } 601 602 void ConcurrentMark::update_g1_committed(bool force) { 603 // If concurrent marking is not in progress, then we do not need to 604 // update _heap_end. 605 if (!concurrent_marking_in_progress() && !force) return; 606 607 MemRegion committed = _g1h->g1_committed(); 608 assert(committed.start() == _heap_start, "start shouldn't change"); 609 HeapWord* new_end = committed.end(); 610 if (new_end > _heap_end) { 611 // The heap has been expanded. 612 613 _heap_end = new_end; 614 } 615 // Notice that the heap can also shrink. However, this only happens 616 // during a Full GC (at least currently) and the entire marking 617 // phase will bail out and the task will not be restarted. So, let's 618 // do nothing. 619 } 620 621 void ConcurrentMark::reset() { 622 // Starting values for these two. This should be called in a STW 623 // phase. CM will be notified of any future g1_committed expansions 624 // will be at the end of evacuation pauses, when tasks are 625 // inactive. 626 MemRegion committed = _g1h->g1_committed(); 627 _heap_start = committed.start(); 628 _heap_end = committed.end(); 629 630 // Separated the asserts so that we know which one fires. 631 assert(_heap_start != NULL, "heap bounds should look ok"); 632 assert(_heap_end != NULL, "heap bounds should look ok"); 633 assert(_heap_start < _heap_end, "heap bounds should look ok"); 634 635 // reset all the marking data structures and any necessary flags 636 clear_marking_state(); 637 638 if (verbose_low()) { 639 gclog_or_tty->print_cr("[global] resetting"); 640 } 641 642 // We do reset all of them, since different phases will use 643 // different number of active threads. So, it's easiest to have all 644 // of them ready. 645 for (int i = 0; i < (int) _max_task_num; ++i) { 646 _tasks[i]->reset(_nextMarkBitMap); 647 } 648 649 // we need this to make sure that the flag is on during the evac 650 // pause with initial mark piggy-backed 651 set_concurrent_marking_in_progress(); 652 } 653 654 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) { 655 assert(active_tasks <= _max_task_num, "we should not have more"); 656 657 _active_tasks = active_tasks; 658 // Need to update the three data structures below according to the 659 // number of active threads for this phase. 660 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 661 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 662 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 663 664 _concurrent = concurrent; 665 // We propagate this to all tasks, not just the active ones. 666 for (int i = 0; i < (int) _max_task_num; ++i) 667 _tasks[i]->set_concurrent(concurrent); 668 669 if (concurrent) { 670 set_concurrent_marking_in_progress(); 671 } else { 672 // We currently assume that the concurrent flag has been set to 673 // false before we start remark. At this point we should also be 674 // in a STW phase. 675 assert(!concurrent_marking_in_progress(), "invariant"); 676 assert(_finger == _heap_end, "only way to get here"); 677 update_g1_committed(true); 678 } 679 } 680 681 void ConcurrentMark::set_non_marking_state() { 682 // We set the global marking state to some default values when we're 683 // not doing marking. 684 clear_marking_state(); 685 _active_tasks = 0; 686 clear_concurrent_marking_in_progress(); 687 } 688 689 ConcurrentMark::~ConcurrentMark() { 690 // The ConcurrentMark instance is never freed. 691 ShouldNotReachHere(); 692 } 693 694 void ConcurrentMark::clearNextBitmap() { 695 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 696 G1CollectorPolicy* g1p = g1h->g1_policy(); 697 698 // Make sure that the concurrent mark thread looks to still be in 699 // the current cycle. 700 guarantee(cmThread()->during_cycle(), "invariant"); 701 702 // We are finishing up the current cycle by clearing the next 703 // marking bitmap and getting it ready for the next cycle. During 704 // this time no other cycle can start. So, let's make sure that this 705 // is the case. 706 guarantee(!g1h->mark_in_progress(), "invariant"); 707 708 // clear the mark bitmap (no grey objects to start with). 709 // We need to do this in chunks and offer to yield in between 710 // each chunk. 711 HeapWord* start = _nextMarkBitMap->startWord(); 712 HeapWord* end = _nextMarkBitMap->endWord(); 713 HeapWord* cur = start; 714 size_t chunkSize = M; 715 while (cur < end) { 716 HeapWord* next = cur + chunkSize; 717 if (next > end) { 718 next = end; 719 } 720 MemRegion mr(cur,next); 721 _nextMarkBitMap->clearRange(mr); 722 cur = next; 723 do_yield_check(); 724 725 // Repeat the asserts from above. We'll do them as asserts here to 726 // minimize their overhead on the product. However, we'll have 727 // them as guarantees at the beginning / end of the bitmap 728 // clearing to get some checking in the product. 729 assert(cmThread()->during_cycle(), "invariant"); 730 assert(!g1h->mark_in_progress(), "invariant"); 731 } 732 733 // Clear the liveness counting data 734 clear_all_count_data(); 735 736 // Repeat the asserts from above. 737 guarantee(cmThread()->during_cycle(), "invariant"); 738 guarantee(!g1h->mark_in_progress(), "invariant"); 739 } 740 741 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 742 public: 743 bool doHeapRegion(HeapRegion* r) { 744 if (!r->continuesHumongous()) { 745 r->note_start_of_marking(); 746 } 747 return false; 748 } 749 }; 750 751 void ConcurrentMark::checkpointRootsInitialPre() { 752 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 753 G1CollectorPolicy* g1p = g1h->g1_policy(); 754 755 _has_aborted = false; 756 757 #ifndef PRODUCT 758 if (G1PrintReachableAtInitialMark) { 759 print_reachable("at-cycle-start", 760 VerifyOption_G1UsePrevMarking, true /* all */); 761 } 762 #endif 763 764 // Initialise marking structures. This has to be done in a STW phase. 765 reset(); 766 767 // For each region note start of marking. 768 NoteStartOfMarkHRClosure startcl; 769 g1h->heap_region_iterate(&startcl); 770 } 771 772 773 void ConcurrentMark::checkpointRootsInitialPost() { 774 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 775 776 // If we force an overflow during remark, the remark operation will 777 // actually abort and we'll restart concurrent marking. If we always 778 // force an oveflow during remark we'll never actually complete the 779 // marking phase. So, we initilize this here, at the start of the 780 // cycle, so that at the remaining overflow number will decrease at 781 // every remark and we'll eventually not need to cause one. 782 force_overflow_stw()->init(); 783 784 // Start Concurrent Marking weak-reference discovery. 785 ReferenceProcessor* rp = g1h->ref_processor_cm(); 786 // enable ("weak") refs discovery 787 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 788 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 789 790 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 791 // This is the start of the marking cycle, we're expected all 792 // threads to have SATB queues with active set to false. 793 satb_mq_set.set_active_all_threads(true, /* new active value */ 794 false /* expected_active */); 795 796 _root_regions.prepare_for_scan(); 797 798 // update_g1_committed() will be called at the end of an evac pause 799 // when marking is on. So, it's also called at the end of the 800 // initial-mark pause to update the heap end, if the heap expands 801 // during it. No need to call it here. 802 } 803 804 /* 805 * Notice that in the next two methods, we actually leave the STS 806 * during the barrier sync and join it immediately afterwards. If we 807 * do not do this, the following deadlock can occur: one thread could 808 * be in the barrier sync code, waiting for the other thread to also 809 * sync up, whereas another one could be trying to yield, while also 810 * waiting for the other threads to sync up too. 811 * 812 * Note, however, that this code is also used during remark and in 813 * this case we should not attempt to leave / enter the STS, otherwise 814 * we'll either hit an asseert (debug / fastdebug) or deadlock 815 * (product). So we should only leave / enter the STS if we are 816 * operating concurrently. 817 * 818 * Because the thread that does the sync barrier has left the STS, it 819 * is possible to be suspended for a Full GC or an evacuation pause 820 * could occur. This is actually safe, since the entering the sync 821 * barrier is one of the last things do_marking_step() does, and it 822 * doesn't manipulate any data structures afterwards. 823 */ 824 825 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 826 if (verbose_low()) { 827 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 828 } 829 830 if (concurrent()) { 831 ConcurrentGCThread::stsLeave(); 832 } 833 _first_overflow_barrier_sync.enter(); 834 if (concurrent()) { 835 ConcurrentGCThread::stsJoin(); 836 } 837 // at this point everyone should have synced up and not be doing any 838 // more work 839 840 if (verbose_low()) { 841 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 842 } 843 844 // let task 0 do this 845 if (task_num == 0) { 846 // task 0 is responsible for clearing the global data structures 847 // We should be here because of an overflow. During STW we should 848 // not clear the overflow flag since we rely on it being true when 849 // we exit this method to abort the pause and restart concurent 850 // marking. 851 clear_marking_state(concurrent() /* clear_overflow */); 852 force_overflow()->update(); 853 854 if (G1Log::fine()) { 855 gclog_or_tty->date_stamp(PrintGCDateStamps); 856 gclog_or_tty->stamp(PrintGCTimeStamps); 857 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 858 } 859 } 860 861 // after this, each task should reset its own data structures then 862 // then go into the second barrier 863 } 864 865 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 866 if (verbose_low()) { 867 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 868 } 869 870 if (concurrent()) { 871 ConcurrentGCThread::stsLeave(); 872 } 873 _second_overflow_barrier_sync.enter(); 874 if (concurrent()) { 875 ConcurrentGCThread::stsJoin(); 876 } 877 // at this point everything should be re-initialised and ready to go 878 879 if (verbose_low()) { 880 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 881 } 882 } 883 884 #ifndef PRODUCT 885 void ForceOverflowSettings::init() { 886 _num_remaining = G1ConcMarkForceOverflow; 887 _force = false; 888 update(); 889 } 890 891 void ForceOverflowSettings::update() { 892 if (_num_remaining > 0) { 893 _num_remaining -= 1; 894 _force = true; 895 } else { 896 _force = false; 897 } 898 } 899 900 bool ForceOverflowSettings::should_force() { 901 if (_force) { 902 _force = false; 903 return true; 904 } else { 905 return false; 906 } 907 } 908 #endif // !PRODUCT 909 910 class CMConcurrentMarkingTask: public AbstractGangTask { 911 private: 912 ConcurrentMark* _cm; 913 ConcurrentMarkThread* _cmt; 914 915 public: 916 void work(uint worker_id) { 917 assert(Thread::current()->is_ConcurrentGC_thread(), 918 "this should only be done by a conc GC thread"); 919 ResourceMark rm; 920 921 double start_vtime = os::elapsedVTime(); 922 923 ConcurrentGCThread::stsJoin(); 924 925 assert(worker_id < _cm->active_tasks(), "invariant"); 926 CMTask* the_task = _cm->task(worker_id); 927 the_task->record_start_time(); 928 if (!_cm->has_aborted()) { 929 do { 930 double start_vtime_sec = os::elapsedVTime(); 931 double start_time_sec = os::elapsedTime(); 932 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 933 934 the_task->do_marking_step(mark_step_duration_ms, 935 true /* do_stealing */, 936 true /* do_termination */); 937 938 double end_time_sec = os::elapsedTime(); 939 double end_vtime_sec = os::elapsedVTime(); 940 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 941 double elapsed_time_sec = end_time_sec - start_time_sec; 942 _cm->clear_has_overflown(); 943 944 bool ret = _cm->do_yield_check(worker_id); 945 946 jlong sleep_time_ms; 947 if (!_cm->has_aborted() && the_task->has_aborted()) { 948 sleep_time_ms = 949 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 950 ConcurrentGCThread::stsLeave(); 951 os::sleep(Thread::current(), sleep_time_ms, false); 952 ConcurrentGCThread::stsJoin(); 953 } 954 double end_time2_sec = os::elapsedTime(); 955 double elapsed_time2_sec = end_time2_sec - start_time_sec; 956 957 #if 0 958 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 959 "overhead %1.4lf", 960 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 961 the_task->conc_overhead(os::elapsedTime()) * 8.0); 962 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 963 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 964 #endif 965 } while (!_cm->has_aborted() && the_task->has_aborted()); 966 } 967 the_task->record_end_time(); 968 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 969 970 ConcurrentGCThread::stsLeave(); 971 972 double end_vtime = os::elapsedVTime(); 973 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 974 } 975 976 CMConcurrentMarkingTask(ConcurrentMark* cm, 977 ConcurrentMarkThread* cmt) : 978 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 979 980 ~CMConcurrentMarkingTask() { } 981 }; 982 983 // Calculates the number of active workers for a concurrent 984 // phase. 985 uint ConcurrentMark::calc_parallel_marking_threads() { 986 if (G1CollectedHeap::use_parallel_gc_threads()) { 987 uint n_conc_workers = 0; 988 if (!UseDynamicNumberOfGCThreads || 989 (!FLAG_IS_DEFAULT(ConcGCThreads) && 990 !ForceDynamicNumberOfGCThreads)) { 991 n_conc_workers = max_parallel_marking_threads(); 992 } else { 993 n_conc_workers = 994 AdaptiveSizePolicy::calc_default_active_workers( 995 max_parallel_marking_threads(), 996 1, /* Minimum workers */ 997 parallel_marking_threads(), 998 Threads::number_of_non_daemon_threads()); 999 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1000 // that scaling has already gone into "_max_parallel_marking_threads". 1001 } 1002 assert(n_conc_workers > 0, "Always need at least 1"); 1003 return n_conc_workers; 1004 } 1005 // If we are not running with any parallel GC threads we will not 1006 // have spawned any marking threads either. Hence the number of 1007 // concurrent workers should be 0. 1008 return 0; 1009 } 1010 1011 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1012 // Currently, only survivors can be root regions. 1013 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1014 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1015 1016 const uintx interval = PrefetchScanIntervalInBytes; 1017 HeapWord* curr = hr->bottom(); 1018 const HeapWord* end = hr->top(); 1019 while (curr < end) { 1020 Prefetch::read(curr, interval); 1021 oop obj = oop(curr); 1022 int size = obj->oop_iterate(&cl); 1023 assert(size == obj->size(), "sanity"); 1024 curr += size; 1025 } 1026 } 1027 1028 class CMRootRegionScanTask : public AbstractGangTask { 1029 private: 1030 ConcurrentMark* _cm; 1031 1032 public: 1033 CMRootRegionScanTask(ConcurrentMark* cm) : 1034 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1035 1036 void work(uint worker_id) { 1037 assert(Thread::current()->is_ConcurrentGC_thread(), 1038 "this should only be done by a conc GC thread"); 1039 1040 CMRootRegions* root_regions = _cm->root_regions(); 1041 HeapRegion* hr = root_regions->claim_next(); 1042 while (hr != NULL) { 1043 _cm->scanRootRegion(hr, worker_id); 1044 hr = root_regions->claim_next(); 1045 } 1046 } 1047 }; 1048 1049 void ConcurrentMark::scanRootRegions() { 1050 // scan_in_progress() will have been set to true only if there was 1051 // at least one root region to scan. So, if it's false, we 1052 // should not attempt to do any further work. 1053 if (root_regions()->scan_in_progress()) { 1054 _parallel_marking_threads = calc_parallel_marking_threads(); 1055 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1056 "Maximum number of marking threads exceeded"); 1057 uint active_workers = MAX2(1U, parallel_marking_threads()); 1058 1059 CMRootRegionScanTask task(this); 1060 if (parallel_marking_threads() > 0) { 1061 _parallel_workers->set_active_workers((int) active_workers); 1062 _parallel_workers->run_task(&task); 1063 } else { 1064 task.work(0); 1065 } 1066 1067 // It's possible that has_aborted() is true here without actually 1068 // aborting the survivor scan earlier. This is OK as it's 1069 // mainly used for sanity checking. 1070 root_regions()->scan_finished(); 1071 } 1072 } 1073 1074 void ConcurrentMark::markFromRoots() { 1075 // we might be tempted to assert that: 1076 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1077 // "inconsistent argument?"); 1078 // However that wouldn't be right, because it's possible that 1079 // a safepoint is indeed in progress as a younger generation 1080 // stop-the-world GC happens even as we mark in this generation. 1081 1082 _restart_for_overflow = false; 1083 force_overflow_conc()->init(); 1084 1085 // _g1h has _n_par_threads 1086 _parallel_marking_threads = calc_parallel_marking_threads(); 1087 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1088 "Maximum number of marking threads exceeded"); 1089 1090 uint active_workers = MAX2(1U, parallel_marking_threads()); 1091 1092 // Parallel task terminator is set in "set_phase()" 1093 set_phase(active_workers, true /* concurrent */); 1094 1095 CMConcurrentMarkingTask markingTask(this, cmThread()); 1096 if (parallel_marking_threads() > 0) { 1097 _parallel_workers->set_active_workers((int)active_workers); 1098 // Don't set _n_par_threads because it affects MT in proceess_strong_roots() 1099 // and the decisions on that MT processing is made elsewhere. 1100 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1101 _parallel_workers->run_task(&markingTask); 1102 } else { 1103 markingTask.work(0); 1104 } 1105 print_stats(); 1106 } 1107 1108 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1109 // world is stopped at this checkpoint 1110 assert(SafepointSynchronize::is_at_safepoint(), 1111 "world should be stopped"); 1112 1113 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1114 1115 // If a full collection has happened, we shouldn't do this. 1116 if (has_aborted()) { 1117 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1118 return; 1119 } 1120 1121 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1122 1123 if (VerifyDuringGC) { 1124 HandleMark hm; // handle scope 1125 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1126 Universe::heap()->prepare_for_verify(); 1127 Universe::verify(/* silent */ false, 1128 /* option */ VerifyOption_G1UsePrevMarking); 1129 } 1130 1131 G1CollectorPolicy* g1p = g1h->g1_policy(); 1132 g1p->record_concurrent_mark_remark_start(); 1133 1134 double start = os::elapsedTime(); 1135 1136 checkpointRootsFinalWork(); 1137 1138 double mark_work_end = os::elapsedTime(); 1139 1140 weakRefsWork(clear_all_soft_refs); 1141 1142 if (has_overflown()) { 1143 // Oops. We overflowed. Restart concurrent marking. 1144 _restart_for_overflow = true; 1145 // Clear the flag. We do not need it any more. 1146 clear_has_overflown(); 1147 if (G1TraceMarkStackOverflow) { 1148 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1149 } 1150 } else { 1151 // Aggregate the per-task counting data that we have accumulated 1152 // while marking. 1153 aggregate_count_data(); 1154 1155 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1156 // We're done with marking. 1157 // This is the end of the marking cycle, we're expected all 1158 // threads to have SATB queues with active set to true. 1159 satb_mq_set.set_active_all_threads(false, /* new active value */ 1160 true /* expected_active */); 1161 1162 if (VerifyDuringGC) { 1163 HandleMark hm; // handle scope 1164 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1165 Universe::heap()->prepare_for_verify(); 1166 Universe::verify(/* silent */ false, 1167 /* option */ VerifyOption_G1UseNextMarking); 1168 } 1169 assert(!restart_for_overflow(), "sanity"); 1170 } 1171 1172 // Reset the marking state if marking completed 1173 if (!restart_for_overflow()) { 1174 set_non_marking_state(); 1175 } 1176 1177 #if VERIFY_OBJS_PROCESSED 1178 _scan_obj_cl.objs_processed = 0; 1179 ThreadLocalObjQueue::objs_enqueued = 0; 1180 #endif 1181 1182 // Statistics 1183 double now = os::elapsedTime(); 1184 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1185 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1186 _remark_times.add((now - start) * 1000.0); 1187 1188 g1p->record_concurrent_mark_remark_end(); 1189 } 1190 1191 // Base class of the closures that finalize and verify the 1192 // liveness counting data. 1193 class CMCountDataClosureBase: public HeapRegionClosure { 1194 protected: 1195 G1CollectedHeap* _g1h; 1196 ConcurrentMark* _cm; 1197 CardTableModRefBS* _ct_bs; 1198 1199 BitMap* _region_bm; 1200 BitMap* _card_bm; 1201 1202 // Takes a region that's not empty (i.e., it has at least one 1203 // live object in it and sets its corresponding bit on the region 1204 // bitmap to 1. If the region is "starts humongous" it will also set 1205 // to 1 the bits on the region bitmap that correspond to its 1206 // associated "continues humongous" regions. 1207 void set_bit_for_region(HeapRegion* hr) { 1208 assert(!hr->continuesHumongous(), "should have filtered those out"); 1209 1210 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1211 if (!hr->startsHumongous()) { 1212 // Normal (non-humongous) case: just set the bit. 1213 _region_bm->par_at_put(index, true); 1214 } else { 1215 // Starts humongous case: calculate how many regions are part of 1216 // this humongous region and then set the bit range. 1217 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1218 _region_bm->par_at_put_range(index, end_index, true); 1219 } 1220 } 1221 1222 public: 1223 CMCountDataClosureBase(G1CollectedHeap* g1h, 1224 BitMap* region_bm, BitMap* card_bm): 1225 _g1h(g1h), _cm(g1h->concurrent_mark()), 1226 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 1227 _region_bm(region_bm), _card_bm(card_bm) { } 1228 }; 1229 1230 // Closure that calculates the # live objects per region. Used 1231 // for verification purposes during the cleanup pause. 1232 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1233 CMBitMapRO* _bm; 1234 size_t _region_marked_bytes; 1235 1236 public: 1237 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1238 BitMap* region_bm, BitMap* card_bm) : 1239 CMCountDataClosureBase(g1h, region_bm, card_bm), 1240 _bm(bm), _region_marked_bytes(0) { } 1241 1242 bool doHeapRegion(HeapRegion* hr) { 1243 1244 if (hr->continuesHumongous()) { 1245 // We will ignore these here and process them when their 1246 // associated "starts humongous" region is processed (see 1247 // set_bit_for_heap_region()). Note that we cannot rely on their 1248 // associated "starts humongous" region to have their bit set to 1249 // 1 since, due to the region chunking in the parallel region 1250 // iteration, a "continues humongous" region might be visited 1251 // before its associated "starts humongous". 1252 return false; 1253 } 1254 1255 HeapWord* ntams = hr->next_top_at_mark_start(); 1256 HeapWord* start = hr->bottom(); 1257 1258 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1259 err_msg("Preconditions not met - " 1260 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT, 1261 start, ntams, hr->end())); 1262 1263 // Find the first marked object at or after "start". 1264 start = _bm->getNextMarkedWordAddress(start, ntams); 1265 1266 size_t marked_bytes = 0; 1267 1268 while (start < ntams) { 1269 oop obj = oop(start); 1270 int obj_sz = obj->size(); 1271 HeapWord* obj_end = start + obj_sz; 1272 1273 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1274 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1275 1276 // Note: if we're looking at the last region in heap - obj_end 1277 // could be actually just beyond the end of the heap; end_idx 1278 // will then correspond to a (non-existent) card that is also 1279 // just beyond the heap. 1280 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1281 // end of object is not card aligned - increment to cover 1282 // all the cards spanned by the object 1283 end_idx += 1; 1284 } 1285 1286 // Set the bits in the card BM for the cards spanned by this object. 1287 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1288 1289 // Add the size of this object to the number of marked bytes. 1290 marked_bytes += (size_t)obj_sz * HeapWordSize; 1291 1292 // Find the next marked object after this one. 1293 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1294 } 1295 1296 // Mark the allocated-since-marking portion... 1297 HeapWord* top = hr->top(); 1298 if (ntams < top) { 1299 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1300 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1301 1302 // Note: if we're looking at the last region in heap - top 1303 // could be actually just beyond the end of the heap; end_idx 1304 // will then correspond to a (non-existent) card that is also 1305 // just beyond the heap. 1306 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1307 // end of object is not card aligned - increment to cover 1308 // all the cards spanned by the object 1309 end_idx += 1; 1310 } 1311 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1312 1313 // This definitely means the region has live objects. 1314 set_bit_for_region(hr); 1315 } 1316 1317 // Update the live region bitmap. 1318 if (marked_bytes > 0) { 1319 set_bit_for_region(hr); 1320 } 1321 1322 // Set the marked bytes for the current region so that 1323 // it can be queried by a calling verificiation routine 1324 _region_marked_bytes = marked_bytes; 1325 1326 return false; 1327 } 1328 1329 size_t region_marked_bytes() const { return _region_marked_bytes; } 1330 }; 1331 1332 // Heap region closure used for verifying the counting data 1333 // that was accumulated concurrently and aggregated during 1334 // the remark pause. This closure is applied to the heap 1335 // regions during the STW cleanup pause. 1336 1337 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1338 G1CollectedHeap* _g1h; 1339 ConcurrentMark* _cm; 1340 CalcLiveObjectsClosure _calc_cl; 1341 BitMap* _region_bm; // Region BM to be verified 1342 BitMap* _card_bm; // Card BM to be verified 1343 bool _verbose; // verbose output? 1344 1345 BitMap* _exp_region_bm; // Expected Region BM values 1346 BitMap* _exp_card_bm; // Expected card BM values 1347 1348 int _failures; 1349 1350 public: 1351 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1352 BitMap* region_bm, 1353 BitMap* card_bm, 1354 BitMap* exp_region_bm, 1355 BitMap* exp_card_bm, 1356 bool verbose) : 1357 _g1h(g1h), _cm(g1h->concurrent_mark()), 1358 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1359 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1360 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1361 _failures(0) { } 1362 1363 int failures() const { return _failures; } 1364 1365 bool doHeapRegion(HeapRegion* hr) { 1366 if (hr->continuesHumongous()) { 1367 // We will ignore these here and process them when their 1368 // associated "starts humongous" region is processed (see 1369 // set_bit_for_heap_region()). Note that we cannot rely on their 1370 // associated "starts humongous" region to have their bit set to 1371 // 1 since, due to the region chunking in the parallel region 1372 // iteration, a "continues humongous" region might be visited 1373 // before its associated "starts humongous". 1374 return false; 1375 } 1376 1377 int failures = 0; 1378 1379 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1380 // this region and set the corresponding bits in the expected region 1381 // and card bitmaps. 1382 bool res = _calc_cl.doHeapRegion(hr); 1383 assert(res == false, "should be continuing"); 1384 1385 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1386 Mutex::_no_safepoint_check_flag); 1387 1388 // Verify the marked bytes for this region. 1389 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1390 size_t act_marked_bytes = hr->next_marked_bytes(); 1391 1392 // We're not OK if expected marked bytes > actual marked bytes. It means 1393 // we have missed accounting some objects during the actual marking. 1394 if (exp_marked_bytes > act_marked_bytes) { 1395 if (_verbose) { 1396 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1397 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1398 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1399 } 1400 failures += 1; 1401 } 1402 1403 // Verify the bit, for this region, in the actual and expected 1404 // (which was just calculated) region bit maps. 1405 // We're not OK if the bit in the calculated expected region 1406 // bitmap is set and the bit in the actual region bitmap is not. 1407 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1408 1409 bool expected = _exp_region_bm->at(index); 1410 bool actual = _region_bm->at(index); 1411 if (expected && !actual) { 1412 if (_verbose) { 1413 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1414 "expected: %s, actual: %s", 1415 hr->hrs_index(), 1416 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1417 } 1418 failures += 1; 1419 } 1420 1421 // Verify that the card bit maps for the cards spanned by the current 1422 // region match. We have an error if we have a set bit in the expected 1423 // bit map and the corresponding bit in the actual bitmap is not set. 1424 1425 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1426 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1427 1428 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1429 expected = _exp_card_bm->at(i); 1430 actual = _card_bm->at(i); 1431 1432 if (expected && !actual) { 1433 if (_verbose) { 1434 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1435 "expected: %s, actual: %s", 1436 hr->hrs_index(), i, 1437 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1438 } 1439 failures += 1; 1440 } 1441 } 1442 1443 if (failures > 0 && _verbose) { 1444 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1445 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1446 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(), 1447 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1448 } 1449 1450 _failures += failures; 1451 1452 // We could stop iteration over the heap when we 1453 // find the first violating region by returning true. 1454 return false; 1455 } 1456 }; 1457 1458 1459 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1460 protected: 1461 G1CollectedHeap* _g1h; 1462 ConcurrentMark* _cm; 1463 BitMap* _actual_region_bm; 1464 BitMap* _actual_card_bm; 1465 1466 uint _n_workers; 1467 1468 BitMap* _expected_region_bm; 1469 BitMap* _expected_card_bm; 1470 1471 int _failures; 1472 bool _verbose; 1473 1474 public: 1475 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1476 BitMap* region_bm, BitMap* card_bm, 1477 BitMap* expected_region_bm, BitMap* expected_card_bm) 1478 : AbstractGangTask("G1 verify final counting"), 1479 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1480 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1481 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1482 _failures(0), _verbose(false), 1483 _n_workers(0) { 1484 assert(VerifyDuringGC, "don't call this otherwise"); 1485 1486 // Use the value already set as the number of active threads 1487 // in the call to run_task(). 1488 if (G1CollectedHeap::use_parallel_gc_threads()) { 1489 assert( _g1h->workers()->active_workers() > 0, 1490 "Should have been previously set"); 1491 _n_workers = _g1h->workers()->active_workers(); 1492 } else { 1493 _n_workers = 1; 1494 } 1495 1496 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1497 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1498 1499 _verbose = _cm->verbose_medium(); 1500 } 1501 1502 void work(uint worker_id) { 1503 assert(worker_id < _n_workers, "invariant"); 1504 1505 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1506 _actual_region_bm, _actual_card_bm, 1507 _expected_region_bm, 1508 _expected_card_bm, 1509 _verbose); 1510 1511 if (G1CollectedHeap::use_parallel_gc_threads()) { 1512 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1513 worker_id, 1514 _n_workers, 1515 HeapRegion::VerifyCountClaimValue); 1516 } else { 1517 _g1h->heap_region_iterate(&verify_cl); 1518 } 1519 1520 Atomic::add(verify_cl.failures(), &_failures); 1521 } 1522 1523 int failures() const { return _failures; } 1524 }; 1525 1526 // Closure that finalizes the liveness counting data. 1527 // Used during the cleanup pause. 1528 // Sets the bits corresponding to the interval [NTAMS, top] 1529 // (which contains the implicitly live objects) in the 1530 // card liveness bitmap. Also sets the bit for each region, 1531 // containing live data, in the region liveness bitmap. 1532 1533 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1534 public: 1535 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1536 BitMap* region_bm, 1537 BitMap* card_bm) : 1538 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1539 1540 bool doHeapRegion(HeapRegion* hr) { 1541 1542 if (hr->continuesHumongous()) { 1543 // We will ignore these here and process them when their 1544 // associated "starts humongous" region is processed (see 1545 // set_bit_for_heap_region()). Note that we cannot rely on their 1546 // associated "starts humongous" region to have their bit set to 1547 // 1 since, due to the region chunking in the parallel region 1548 // iteration, a "continues humongous" region might be visited 1549 // before its associated "starts humongous". 1550 return false; 1551 } 1552 1553 HeapWord* ntams = hr->next_top_at_mark_start(); 1554 HeapWord* top = hr->top(); 1555 1556 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1557 1558 // Mark the allocated-since-marking portion... 1559 if (ntams < top) { 1560 // This definitely means the region has live objects. 1561 set_bit_for_region(hr); 1562 1563 // Now set the bits in the card bitmap for [ntams, top) 1564 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1565 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1566 1567 // Note: if we're looking at the last region in heap - top 1568 // could be actually just beyond the end of the heap; end_idx 1569 // will then correspond to a (non-existent) card that is also 1570 // just beyond the heap. 1571 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1572 // end of object is not card aligned - increment to cover 1573 // all the cards spanned by the object 1574 end_idx += 1; 1575 } 1576 1577 assert(end_idx <= _card_bm->size(), 1578 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1579 end_idx, _card_bm->size())); 1580 assert(start_idx < _card_bm->size(), 1581 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1582 start_idx, _card_bm->size())); 1583 1584 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1585 } 1586 1587 // Set the bit for the region if it contains live data 1588 if (hr->next_marked_bytes() > 0) { 1589 set_bit_for_region(hr); 1590 } 1591 1592 return false; 1593 } 1594 }; 1595 1596 class G1ParFinalCountTask: public AbstractGangTask { 1597 protected: 1598 G1CollectedHeap* _g1h; 1599 ConcurrentMark* _cm; 1600 BitMap* _actual_region_bm; 1601 BitMap* _actual_card_bm; 1602 1603 uint _n_workers; 1604 1605 public: 1606 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1607 : AbstractGangTask("G1 final counting"), 1608 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1609 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1610 _n_workers(0) { 1611 // Use the value already set as the number of active threads 1612 // in the call to run_task(). 1613 if (G1CollectedHeap::use_parallel_gc_threads()) { 1614 assert( _g1h->workers()->active_workers() > 0, 1615 "Should have been previously set"); 1616 _n_workers = _g1h->workers()->active_workers(); 1617 } else { 1618 _n_workers = 1; 1619 } 1620 } 1621 1622 void work(uint worker_id) { 1623 assert(worker_id < _n_workers, "invariant"); 1624 1625 FinalCountDataUpdateClosure final_update_cl(_g1h, 1626 _actual_region_bm, 1627 _actual_card_bm); 1628 1629 if (G1CollectedHeap::use_parallel_gc_threads()) { 1630 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1631 worker_id, 1632 _n_workers, 1633 HeapRegion::FinalCountClaimValue); 1634 } else { 1635 _g1h->heap_region_iterate(&final_update_cl); 1636 } 1637 } 1638 }; 1639 1640 class G1ParNoteEndTask; 1641 1642 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1643 G1CollectedHeap* _g1; 1644 int _worker_num; 1645 size_t _max_live_bytes; 1646 uint _regions_claimed; 1647 size_t _freed_bytes; 1648 FreeRegionList* _local_cleanup_list; 1649 OldRegionSet* _old_proxy_set; 1650 HumongousRegionSet* _humongous_proxy_set; 1651 HRRSCleanupTask* _hrrs_cleanup_task; 1652 double _claimed_region_time; 1653 double _max_region_time; 1654 1655 public: 1656 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1657 int worker_num, 1658 FreeRegionList* local_cleanup_list, 1659 OldRegionSet* old_proxy_set, 1660 HumongousRegionSet* humongous_proxy_set, 1661 HRRSCleanupTask* hrrs_cleanup_task) : 1662 _g1(g1), _worker_num(worker_num), 1663 _max_live_bytes(0), _regions_claimed(0), 1664 _freed_bytes(0), 1665 _claimed_region_time(0.0), _max_region_time(0.0), 1666 _local_cleanup_list(local_cleanup_list), 1667 _old_proxy_set(old_proxy_set), 1668 _humongous_proxy_set(humongous_proxy_set), 1669 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1670 1671 size_t freed_bytes() { return _freed_bytes; } 1672 1673 bool doHeapRegion(HeapRegion *hr) { 1674 if (hr->continuesHumongous()) { 1675 return false; 1676 } 1677 // We use a claim value of zero here because all regions 1678 // were claimed with value 1 in the FinalCount task. 1679 _g1->reset_gc_time_stamps(hr); 1680 double start = os::elapsedTime(); 1681 _regions_claimed++; 1682 hr->note_end_of_marking(); 1683 _max_live_bytes += hr->max_live_bytes(); 1684 _g1->free_region_if_empty(hr, 1685 &_freed_bytes, 1686 _local_cleanup_list, 1687 _old_proxy_set, 1688 _humongous_proxy_set, 1689 _hrrs_cleanup_task, 1690 true /* par */); 1691 double region_time = (os::elapsedTime() - start); 1692 _claimed_region_time += region_time; 1693 if (region_time > _max_region_time) { 1694 _max_region_time = region_time; 1695 } 1696 return false; 1697 } 1698 1699 size_t max_live_bytes() { return _max_live_bytes; } 1700 uint regions_claimed() { return _regions_claimed; } 1701 double claimed_region_time_sec() { return _claimed_region_time; } 1702 double max_region_time_sec() { return _max_region_time; } 1703 }; 1704 1705 class G1ParNoteEndTask: public AbstractGangTask { 1706 friend class G1NoteEndOfConcMarkClosure; 1707 1708 protected: 1709 G1CollectedHeap* _g1h; 1710 size_t _max_live_bytes; 1711 size_t _freed_bytes; 1712 FreeRegionList* _cleanup_list; 1713 1714 public: 1715 G1ParNoteEndTask(G1CollectedHeap* g1h, 1716 FreeRegionList* cleanup_list) : 1717 AbstractGangTask("G1 note end"), _g1h(g1h), 1718 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1719 1720 void work(uint worker_id) { 1721 double start = os::elapsedTime(); 1722 FreeRegionList local_cleanup_list("Local Cleanup List"); 1723 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set"); 1724 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1725 HRRSCleanupTask hrrs_cleanup_task; 1726 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list, 1727 &old_proxy_set, 1728 &humongous_proxy_set, 1729 &hrrs_cleanup_task); 1730 if (G1CollectedHeap::use_parallel_gc_threads()) { 1731 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1732 _g1h->workers()->active_workers(), 1733 HeapRegion::NoteEndClaimValue); 1734 } else { 1735 _g1h->heap_region_iterate(&g1_note_end); 1736 } 1737 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1738 1739 // Now update the lists 1740 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1741 NULL /* free_list */, 1742 &old_proxy_set, 1743 &humongous_proxy_set, 1744 true /* par */); 1745 { 1746 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1747 _max_live_bytes += g1_note_end.max_live_bytes(); 1748 _freed_bytes += g1_note_end.freed_bytes(); 1749 1750 // If we iterate over the global cleanup list at the end of 1751 // cleanup to do this printing we will not guarantee to only 1752 // generate output for the newly-reclaimed regions (the list 1753 // might not be empty at the beginning of cleanup; we might 1754 // still be working on its previous contents). So we do the 1755 // printing here, before we append the new regions to the global 1756 // cleanup list. 1757 1758 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1759 if (hr_printer->is_active()) { 1760 HeapRegionLinkedListIterator iter(&local_cleanup_list); 1761 while (iter.more_available()) { 1762 HeapRegion* hr = iter.get_next(); 1763 hr_printer->cleanup(hr); 1764 } 1765 } 1766 1767 _cleanup_list->add_as_tail(&local_cleanup_list); 1768 assert(local_cleanup_list.is_empty(), "post-condition"); 1769 1770 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1771 } 1772 } 1773 size_t max_live_bytes() { return _max_live_bytes; } 1774 size_t freed_bytes() { return _freed_bytes; } 1775 }; 1776 1777 class G1ParScrubRemSetTask: public AbstractGangTask { 1778 protected: 1779 G1RemSet* _g1rs; 1780 BitMap* _region_bm; 1781 BitMap* _card_bm; 1782 public: 1783 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1784 BitMap* region_bm, BitMap* card_bm) : 1785 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1786 _region_bm(region_bm), _card_bm(card_bm) { } 1787 1788 void work(uint worker_id) { 1789 if (G1CollectedHeap::use_parallel_gc_threads()) { 1790 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1791 HeapRegion::ScrubRemSetClaimValue); 1792 } else { 1793 _g1rs->scrub(_region_bm, _card_bm); 1794 } 1795 } 1796 1797 }; 1798 1799 void ConcurrentMark::cleanup() { 1800 // world is stopped at this checkpoint 1801 assert(SafepointSynchronize::is_at_safepoint(), 1802 "world should be stopped"); 1803 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1804 1805 // If a full collection has happened, we shouldn't do this. 1806 if (has_aborted()) { 1807 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1808 return; 1809 } 1810 1811 HRSPhaseSetter x(HRSPhaseCleanup); 1812 g1h->verify_region_sets_optional(); 1813 1814 if (VerifyDuringGC) { 1815 HandleMark hm; // handle scope 1816 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1817 Universe::heap()->prepare_for_verify(); 1818 Universe::verify(/* silent */ false, 1819 /* option */ VerifyOption_G1UsePrevMarking); 1820 } 1821 1822 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1823 g1p->record_concurrent_mark_cleanup_start(); 1824 1825 double start = os::elapsedTime(); 1826 1827 HeapRegionRemSet::reset_for_cleanup_tasks(); 1828 1829 uint n_workers; 1830 1831 // Do counting once more with the world stopped for good measure. 1832 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1833 1834 if (G1CollectedHeap::use_parallel_gc_threads()) { 1835 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 1836 "sanity check"); 1837 1838 g1h->set_par_threads(); 1839 n_workers = g1h->n_par_threads(); 1840 assert(g1h->n_par_threads() == n_workers, 1841 "Should not have been reset"); 1842 g1h->workers()->run_task(&g1_par_count_task); 1843 // Done with the parallel phase so reset to 0. 1844 g1h->set_par_threads(0); 1845 1846 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 1847 "sanity check"); 1848 } else { 1849 n_workers = 1; 1850 g1_par_count_task.work(0); 1851 } 1852 1853 if (VerifyDuringGC) { 1854 // Verify that the counting data accumulated during marking matches 1855 // that calculated by walking the marking bitmap. 1856 1857 // Bitmaps to hold expected values 1858 BitMap expected_region_bm(_region_bm.size(), false); 1859 BitMap expected_card_bm(_card_bm.size(), false); 1860 1861 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1862 &_region_bm, 1863 &_card_bm, 1864 &expected_region_bm, 1865 &expected_card_bm); 1866 1867 if (G1CollectedHeap::use_parallel_gc_threads()) { 1868 g1h->set_par_threads((int)n_workers); 1869 g1h->workers()->run_task(&g1_par_verify_task); 1870 // Done with the parallel phase so reset to 0. 1871 g1h->set_par_threads(0); 1872 1873 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 1874 "sanity check"); 1875 } else { 1876 g1_par_verify_task.work(0); 1877 } 1878 1879 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1880 } 1881 1882 size_t start_used_bytes = g1h->used(); 1883 g1h->set_marking_complete(); 1884 1885 double count_end = os::elapsedTime(); 1886 double this_final_counting_time = (count_end - start); 1887 _total_counting_time += this_final_counting_time; 1888 1889 if (G1PrintRegionLivenessInfo) { 1890 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1891 _g1h->heap_region_iterate(&cl); 1892 } 1893 1894 // Install newly created mark bitMap as "prev". 1895 swapMarkBitMaps(); 1896 1897 g1h->reset_gc_time_stamp(); 1898 1899 // Note end of marking in all heap regions. 1900 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 1901 if (G1CollectedHeap::use_parallel_gc_threads()) { 1902 g1h->set_par_threads((int)n_workers); 1903 g1h->workers()->run_task(&g1_par_note_end_task); 1904 g1h->set_par_threads(0); 1905 1906 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 1907 "sanity check"); 1908 } else { 1909 g1_par_note_end_task.work(0); 1910 } 1911 g1h->check_gc_time_stamps(); 1912 1913 if (!cleanup_list_is_empty()) { 1914 // The cleanup list is not empty, so we'll have to process it 1915 // concurrently. Notify anyone else that might be wanting free 1916 // regions that there will be more free regions coming soon. 1917 g1h->set_free_regions_coming(); 1918 } 1919 1920 // call below, since it affects the metric by which we sort the heap 1921 // regions. 1922 if (G1ScrubRemSets) { 1923 double rs_scrub_start = os::elapsedTime(); 1924 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1925 if (G1CollectedHeap::use_parallel_gc_threads()) { 1926 g1h->set_par_threads((int)n_workers); 1927 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1928 g1h->set_par_threads(0); 1929 1930 assert(g1h->check_heap_region_claim_values( 1931 HeapRegion::ScrubRemSetClaimValue), 1932 "sanity check"); 1933 } else { 1934 g1_par_scrub_rs_task.work(0); 1935 } 1936 1937 double rs_scrub_end = os::elapsedTime(); 1938 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1939 _total_rs_scrub_time += this_rs_scrub_time; 1940 } 1941 1942 // this will also free any regions totally full of garbage objects, 1943 // and sort the regions. 1944 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 1945 1946 // Statistics. 1947 double end = os::elapsedTime(); 1948 _cleanup_times.add((end - start) * 1000.0); 1949 1950 if (G1Log::fine()) { 1951 g1h->print_size_transition(gclog_or_tty, 1952 start_used_bytes, 1953 g1h->used(), 1954 g1h->capacity()); 1955 } 1956 1957 // Clean up will have freed any regions completely full of garbage. 1958 // Update the soft reference policy with the new heap occupancy. 1959 Universe::update_heap_info_at_gc(); 1960 1961 // We need to make this be a "collection" so any collection pause that 1962 // races with it goes around and waits for completeCleanup to finish. 1963 g1h->increment_total_collections(); 1964 1965 // We reclaimed old regions so we should calculate the sizes to make 1966 // sure we update the old gen/space data. 1967 g1h->g1mm()->update_sizes(); 1968 1969 if (VerifyDuringGC) { 1970 HandleMark hm; // handle scope 1971 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1972 Universe::heap()->prepare_for_verify(); 1973 Universe::verify(/* silent */ false, 1974 /* option */ VerifyOption_G1UsePrevMarking); 1975 } 1976 1977 g1h->verify_region_sets_optional(); 1978 g1h->trace_heap_after_concurrent_cycle(); 1979 } 1980 1981 void ConcurrentMark::completeCleanup() { 1982 if (has_aborted()) return; 1983 1984 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1985 1986 _cleanup_list.verify_optional(); 1987 FreeRegionList tmp_free_list("Tmp Free List"); 1988 1989 if (G1ConcRegionFreeingVerbose) { 1990 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1991 "cleanup list has %u entries", 1992 _cleanup_list.length()); 1993 } 1994 1995 // Noone else should be accessing the _cleanup_list at this point, 1996 // so it's not necessary to take any locks 1997 while (!_cleanup_list.is_empty()) { 1998 HeapRegion* hr = _cleanup_list.remove_head(); 1999 assert(hr != NULL, "the list was not empty"); 2000 hr->par_clear(); 2001 tmp_free_list.add_as_tail(hr); 2002 2003 // Instead of adding one region at a time to the secondary_free_list, 2004 // we accumulate them in the local list and move them a few at a 2005 // time. This also cuts down on the number of notify_all() calls 2006 // we do during this process. We'll also append the local list when 2007 // _cleanup_list is empty (which means we just removed the last 2008 // region from the _cleanup_list). 2009 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2010 _cleanup_list.is_empty()) { 2011 if (G1ConcRegionFreeingVerbose) { 2012 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2013 "appending %u entries to the secondary_free_list, " 2014 "cleanup list still has %u entries", 2015 tmp_free_list.length(), 2016 _cleanup_list.length()); 2017 } 2018 2019 { 2020 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2021 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 2022 SecondaryFreeList_lock->notify_all(); 2023 } 2024 2025 if (G1StressConcRegionFreeing) { 2026 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2027 os::sleep(Thread::current(), (jlong) 1, false); 2028 } 2029 } 2030 } 2031 } 2032 assert(tmp_free_list.is_empty(), "post-condition"); 2033 } 2034 2035 // Support closures for reference procssing in G1 2036 2037 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2038 HeapWord* addr = (HeapWord*)obj; 2039 return addr != NULL && 2040 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2041 } 2042 2043 class G1CMKeepAliveClosure: public OopClosure { 2044 G1CollectedHeap* _g1; 2045 ConcurrentMark* _cm; 2046 public: 2047 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) : 2048 _g1(g1), _cm(cm) { 2049 assert(Thread::current()->is_VM_thread(), "otherwise fix worker id"); 2050 } 2051 2052 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2053 virtual void do_oop( oop* p) { do_oop_work(p); } 2054 2055 template <class T> void do_oop_work(T* p) { 2056 oop obj = oopDesc::load_decode_heap_oop(p); 2057 HeapWord* addr = (HeapWord*)obj; 2058 2059 if (_cm->verbose_high()) { 2060 gclog_or_tty->print_cr("\t[0] we're looking at location " 2061 "*"PTR_FORMAT" = "PTR_FORMAT, 2062 p, (void*) obj); 2063 } 2064 2065 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) { 2066 _cm->mark_and_count(obj); 2067 _cm->mark_stack_push(obj); 2068 } 2069 } 2070 }; 2071 2072 class G1CMDrainMarkingStackClosure: public VoidClosure { 2073 ConcurrentMark* _cm; 2074 CMMarkStack* _markStack; 2075 G1CMKeepAliveClosure* _oopClosure; 2076 public: 2077 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack, 2078 G1CMKeepAliveClosure* oopClosure) : 2079 _cm(cm), 2080 _markStack(markStack), 2081 _oopClosure(oopClosure) { } 2082 2083 void do_void() { 2084 _markStack->drain((OopClosure*)_oopClosure, _cm->nextMarkBitMap(), false); 2085 } 2086 }; 2087 2088 // 'Keep Alive' closure used by parallel reference processing. 2089 // An instance of this closure is used in the parallel reference processing 2090 // code rather than an instance of G1CMKeepAliveClosure. We could have used 2091 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are 2092 // placed on to discovered ref lists once so we can mark and push with no 2093 // need to check whether the object has already been marked. Using the 2094 // G1CMKeepAliveClosure would mean, however, having all the worker threads 2095 // operating on the global mark stack. This means that an individual 2096 // worker would be doing lock-free pushes while it processes its own 2097 // discovered ref list followed by drain call. If the discovered ref lists 2098 // are unbalanced then this could cause interference with the other 2099 // workers. Using a CMTask (and its embedded local data structures) 2100 // avoids that potential interference. 2101 class G1CMParKeepAliveAndDrainClosure: public OopClosure { 2102 ConcurrentMark* _cm; 2103 CMTask* _task; 2104 int _ref_counter_limit; 2105 int _ref_counter; 2106 public: 2107 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) : 2108 _cm(cm), _task(task), 2109 _ref_counter_limit(G1RefProcDrainInterval) { 2110 assert(_ref_counter_limit > 0, "sanity"); 2111 _ref_counter = _ref_counter_limit; 2112 } 2113 2114 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2115 virtual void do_oop( oop* p) { do_oop_work(p); } 2116 2117 template <class T> void do_oop_work(T* p) { 2118 if (!_cm->has_overflown()) { 2119 oop obj = oopDesc::load_decode_heap_oop(p); 2120 if (_cm->verbose_high()) { 2121 gclog_or_tty->print_cr("\t[%d] we're looking at location " 2122 "*"PTR_FORMAT" = "PTR_FORMAT, 2123 _task->task_id(), p, (void*) obj); 2124 } 2125 2126 _task->deal_with_reference(obj); 2127 _ref_counter--; 2128 2129 if (_ref_counter == 0) { 2130 // We have dealt with _ref_counter_limit references, pushing them and objects 2131 // reachable from them on to the local stack (and possibly the global stack). 2132 // Call do_marking_step() to process these entries. We call the routine in a 2133 // loop, which we'll exit if there's nothing more to do (i.e. we're done 2134 // with the entries that we've pushed as a result of the deal_with_reference 2135 // calls above) or we overflow. 2136 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2137 // while there may still be some work to do. (See the comment at the 2138 // beginning of CMTask::do_marking_step() for those conditions - one of which 2139 // is reaching the specified time target.) It is only when 2140 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2141 // that the marking has completed. 2142 do { 2143 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2144 _task->do_marking_step(mark_step_duration_ms, 2145 false /* do_stealing */, 2146 false /* do_termination */); 2147 } while (_task->has_aborted() && !_cm->has_overflown()); 2148 _ref_counter = _ref_counter_limit; 2149 } 2150 } else { 2151 if (_cm->verbose_high()) { 2152 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id()); 2153 } 2154 } 2155 } 2156 }; 2157 2158 class G1CMParDrainMarkingStackClosure: public VoidClosure { 2159 ConcurrentMark* _cm; 2160 CMTask* _task; 2161 public: 2162 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) : 2163 _cm(cm), _task(task) { } 2164 2165 void do_void() { 2166 do { 2167 if (_cm->verbose_high()) { 2168 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", 2169 _task->task_id()); 2170 } 2171 2172 // We call CMTask::do_marking_step() to completely drain the local and 2173 // global marking stacks. The routine is called in a loop, which we'll 2174 // exit if there's nothing more to do (i.e. we'completely drained the 2175 // entries that were pushed as a result of applying the 2176 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref 2177 // lists above) or we overflow the global marking stack. 2178 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2179 // while there may still be some work to do. (See the comment at the 2180 // beginning of CMTask::do_marking_step() for those conditions - one of which 2181 // is reaching the specified time target.) It is only when 2182 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2183 // that the marking has completed. 2184 2185 _task->do_marking_step(1000000000.0 /* something very large */, 2186 true /* do_stealing */, 2187 true /* do_termination */); 2188 } while (_task->has_aborted() && !_cm->has_overflown()); 2189 } 2190 }; 2191 2192 // Implementation of AbstractRefProcTaskExecutor for parallel 2193 // reference processing at the end of G1 concurrent marking 2194 2195 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2196 private: 2197 G1CollectedHeap* _g1h; 2198 ConcurrentMark* _cm; 2199 WorkGang* _workers; 2200 int _active_workers; 2201 2202 public: 2203 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2204 ConcurrentMark* cm, 2205 WorkGang* workers, 2206 int n_workers) : 2207 _g1h(g1h), _cm(cm), 2208 _workers(workers), _active_workers(n_workers) { } 2209 2210 // Executes the given task using concurrent marking worker threads. 2211 virtual void execute(ProcessTask& task); 2212 virtual void execute(EnqueueTask& task); 2213 }; 2214 2215 class G1CMRefProcTaskProxy: public AbstractGangTask { 2216 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2217 ProcessTask& _proc_task; 2218 G1CollectedHeap* _g1h; 2219 ConcurrentMark* _cm; 2220 2221 public: 2222 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2223 G1CollectedHeap* g1h, 2224 ConcurrentMark* cm) : 2225 AbstractGangTask("Process reference objects in parallel"), 2226 _proc_task(proc_task), _g1h(g1h), _cm(cm) { } 2227 2228 virtual void work(uint worker_id) { 2229 CMTask* marking_task = _cm->task(worker_id); 2230 G1CMIsAliveClosure g1_is_alive(_g1h); 2231 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task); 2232 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task); 2233 2234 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2235 } 2236 }; 2237 2238 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2239 assert(_workers != NULL, "Need parallel worker threads."); 2240 2241 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2242 2243 // We need to reset the phase for each task execution so that 2244 // the termination protocol of CMTask::do_marking_step works. 2245 _cm->set_phase(_active_workers, false /* concurrent */); 2246 _g1h->set_par_threads(_active_workers); 2247 _workers->run_task(&proc_task_proxy); 2248 _g1h->set_par_threads(0); 2249 } 2250 2251 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2252 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2253 EnqueueTask& _enq_task; 2254 2255 public: 2256 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2257 AbstractGangTask("Enqueue reference objects in parallel"), 2258 _enq_task(enq_task) { } 2259 2260 virtual void work(uint worker_id) { 2261 _enq_task.work(worker_id); 2262 } 2263 }; 2264 2265 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2266 assert(_workers != NULL, "Need parallel worker threads."); 2267 2268 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2269 2270 _g1h->set_par_threads(_active_workers); 2271 _workers->run_task(&enq_task_proxy); 2272 _g1h->set_par_threads(0); 2273 } 2274 2275 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2276 ResourceMark rm; 2277 HandleMark hm; 2278 2279 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2280 2281 // Is alive closure. 2282 G1CMIsAliveClosure g1_is_alive(g1h); 2283 2284 // Inner scope to exclude the cleaning of the string and symbol 2285 // tables from the displayed time. 2286 { 2287 if (G1Log::finer()) { 2288 gclog_or_tty->put(' '); 2289 } 2290 GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm()); 2291 2292 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2293 2294 // See the comment in G1CollectedHeap::ref_processing_init() 2295 // about how reference processing currently works in G1. 2296 2297 // Process weak references. 2298 rp->setup_policy(clear_all_soft_refs); 2299 assert(_markStack.isEmpty(), "mark stack should be empty"); 2300 2301 G1CMKeepAliveClosure g1_keep_alive(g1h, this); 2302 G1CMDrainMarkingStackClosure 2303 g1_drain_mark_stack(this, &_markStack, &g1_keep_alive); 2304 2305 // We use the work gang from the G1CollectedHeap and we utilize all 2306 // the worker threads. 2307 uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U; 2308 active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U); 2309 2310 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2311 g1h->workers(), active_workers); 2312 2313 if (rp->processing_is_mt()) { 2314 // Set the degree of MT here. If the discovery is done MT, there 2315 // may have been a different number of threads doing the discovery 2316 // and a different number of discovered lists may have Ref objects. 2317 // That is OK as long as the Reference lists are balanced (see 2318 // balance_all_queues() and balance_queues()). 2319 rp->set_active_mt_degree(active_workers); 2320 2321 rp->process_discovered_references(&g1_is_alive, 2322 &g1_keep_alive, 2323 &g1_drain_mark_stack, 2324 &par_task_executor, 2325 g1h->gc_timer_cm()); 2326 2327 // The work routines of the parallel keep_alive and drain_marking_stack 2328 // will set the has_overflown flag if we overflow the global marking 2329 // stack. 2330 } else { 2331 rp->process_discovered_references(&g1_is_alive, 2332 &g1_keep_alive, 2333 &g1_drain_mark_stack, 2334 NULL, 2335 g1h->gc_timer_cm()); 2336 } 2337 2338 assert(_markStack.overflow() || _markStack.isEmpty(), 2339 "mark stack should be empty (unless it overflowed)"); 2340 if (_markStack.overflow()) { 2341 // Should have been done already when we tried to push an 2342 // entry on to the global mark stack. But let's do it again. 2343 set_has_overflown(); 2344 } 2345 2346 if (rp->processing_is_mt()) { 2347 assert(rp->num_q() == active_workers, "why not"); 2348 rp->enqueue_discovered_references(&par_task_executor); 2349 } else { 2350 rp->enqueue_discovered_references(); 2351 } 2352 2353 rp->verify_no_references_recorded(); 2354 assert(!rp->discovery_enabled(), "Post condition"); 2355 } 2356 2357 // Now clean up stale oops in StringTable 2358 StringTable::unlink(&g1_is_alive); 2359 // Clean up unreferenced symbols in symbol table. 2360 SymbolTable::unlink(); 2361 } 2362 2363 void ConcurrentMark::swapMarkBitMaps() { 2364 CMBitMapRO* temp = _prevMarkBitMap; 2365 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2366 _nextMarkBitMap = (CMBitMap*) temp; 2367 } 2368 2369 class CMRemarkTask: public AbstractGangTask { 2370 private: 2371 ConcurrentMark *_cm; 2372 2373 public: 2374 void work(uint worker_id) { 2375 // Since all available tasks are actually started, we should 2376 // only proceed if we're supposed to be actived. 2377 if (worker_id < _cm->active_tasks()) { 2378 CMTask* task = _cm->task(worker_id); 2379 task->record_start_time(); 2380 do { 2381 task->do_marking_step(1000000000.0 /* something very large */, 2382 true /* do_stealing */, 2383 true /* do_termination */); 2384 } while (task->has_aborted() && !_cm->has_overflown()); 2385 // If we overflow, then we do not want to restart. We instead 2386 // want to abort remark and do concurrent marking again. 2387 task->record_end_time(); 2388 } 2389 } 2390 2391 CMRemarkTask(ConcurrentMark* cm, int active_workers) : 2392 AbstractGangTask("Par Remark"), _cm(cm) { 2393 _cm->terminator()->reset_for_reuse(active_workers); 2394 } 2395 }; 2396 2397 void ConcurrentMark::checkpointRootsFinalWork() { 2398 ResourceMark rm; 2399 HandleMark hm; 2400 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2401 2402 g1h->ensure_parsability(false); 2403 2404 if (G1CollectedHeap::use_parallel_gc_threads()) { 2405 G1CollectedHeap::StrongRootsScope srs(g1h); 2406 // this is remark, so we'll use up all active threads 2407 uint active_workers = g1h->workers()->active_workers(); 2408 if (active_workers == 0) { 2409 assert(active_workers > 0, "Should have been set earlier"); 2410 active_workers = (uint) ParallelGCThreads; 2411 g1h->workers()->set_active_workers(active_workers); 2412 } 2413 set_phase(active_workers, false /* concurrent */); 2414 // Leave _parallel_marking_threads at it's 2415 // value originally calculated in the ConcurrentMark 2416 // constructor and pass values of the active workers 2417 // through the gang in the task. 2418 2419 CMRemarkTask remarkTask(this, active_workers); 2420 g1h->set_par_threads(active_workers); 2421 g1h->workers()->run_task(&remarkTask); 2422 g1h->set_par_threads(0); 2423 } else { 2424 G1CollectedHeap::StrongRootsScope srs(g1h); 2425 // this is remark, so we'll use up all available threads 2426 uint active_workers = 1; 2427 set_phase(active_workers, false /* concurrent */); 2428 2429 CMRemarkTask remarkTask(this, active_workers); 2430 // We will start all available threads, even if we decide that the 2431 // active_workers will be fewer. The extra ones will just bail out 2432 // immediately. 2433 remarkTask.work(0); 2434 } 2435 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2436 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2437 2438 print_stats(); 2439 2440 #if VERIFY_OBJS_PROCESSED 2441 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2442 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2443 _scan_obj_cl.objs_processed, 2444 ThreadLocalObjQueue::objs_enqueued); 2445 guarantee(_scan_obj_cl.objs_processed == 2446 ThreadLocalObjQueue::objs_enqueued, 2447 "Different number of objs processed and enqueued."); 2448 } 2449 #endif 2450 } 2451 2452 #ifndef PRODUCT 2453 2454 class PrintReachableOopClosure: public OopClosure { 2455 private: 2456 G1CollectedHeap* _g1h; 2457 outputStream* _out; 2458 VerifyOption _vo; 2459 bool _all; 2460 2461 public: 2462 PrintReachableOopClosure(outputStream* out, 2463 VerifyOption vo, 2464 bool all) : 2465 _g1h(G1CollectedHeap::heap()), 2466 _out(out), _vo(vo), _all(all) { } 2467 2468 void do_oop(narrowOop* p) { do_oop_work(p); } 2469 void do_oop( oop* p) { do_oop_work(p); } 2470 2471 template <class T> void do_oop_work(T* p) { 2472 oop obj = oopDesc::load_decode_heap_oop(p); 2473 const char* str = NULL; 2474 const char* str2 = ""; 2475 2476 if (obj == NULL) { 2477 str = ""; 2478 } else if (!_g1h->is_in_g1_reserved(obj)) { 2479 str = " O"; 2480 } else { 2481 HeapRegion* hr = _g1h->heap_region_containing(obj); 2482 guarantee(hr != NULL, "invariant"); 2483 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2484 bool marked = _g1h->is_marked(obj, _vo); 2485 2486 if (over_tams) { 2487 str = " >"; 2488 if (marked) { 2489 str2 = " AND MARKED"; 2490 } 2491 } else if (marked) { 2492 str = " M"; 2493 } else { 2494 str = " NOT"; 2495 } 2496 } 2497 2498 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2499 p, (void*) obj, str, str2); 2500 } 2501 }; 2502 2503 class PrintReachableObjectClosure : public ObjectClosure { 2504 private: 2505 G1CollectedHeap* _g1h; 2506 outputStream* _out; 2507 VerifyOption _vo; 2508 bool _all; 2509 HeapRegion* _hr; 2510 2511 public: 2512 PrintReachableObjectClosure(outputStream* out, 2513 VerifyOption vo, 2514 bool all, 2515 HeapRegion* hr) : 2516 _g1h(G1CollectedHeap::heap()), 2517 _out(out), _vo(vo), _all(all), _hr(hr) { } 2518 2519 void do_object(oop o) { 2520 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2521 bool marked = _g1h->is_marked(o, _vo); 2522 bool print_it = _all || over_tams || marked; 2523 2524 if (print_it) { 2525 _out->print_cr(" "PTR_FORMAT"%s", 2526 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2527 PrintReachableOopClosure oopCl(_out, _vo, _all); 2528 o->oop_iterate(&oopCl); 2529 } 2530 } 2531 }; 2532 2533 class PrintReachableRegionClosure : public HeapRegionClosure { 2534 private: 2535 G1CollectedHeap* _g1h; 2536 outputStream* _out; 2537 VerifyOption _vo; 2538 bool _all; 2539 2540 public: 2541 bool doHeapRegion(HeapRegion* hr) { 2542 HeapWord* b = hr->bottom(); 2543 HeapWord* e = hr->end(); 2544 HeapWord* t = hr->top(); 2545 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2546 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2547 "TAMS: "PTR_FORMAT, b, e, t, p); 2548 _out->cr(); 2549 2550 HeapWord* from = b; 2551 HeapWord* to = t; 2552 2553 if (to > from) { 2554 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2555 _out->cr(); 2556 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2557 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2558 _out->cr(); 2559 } 2560 2561 return false; 2562 } 2563 2564 PrintReachableRegionClosure(outputStream* out, 2565 VerifyOption vo, 2566 bool all) : 2567 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2568 }; 2569 2570 void ConcurrentMark::print_reachable(const char* str, 2571 VerifyOption vo, 2572 bool all) { 2573 gclog_or_tty->cr(); 2574 gclog_or_tty->print_cr("== Doing heap dump... "); 2575 2576 if (G1PrintReachableBaseFile == NULL) { 2577 gclog_or_tty->print_cr(" #### error: no base file defined"); 2578 return; 2579 } 2580 2581 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2582 (JVM_MAXPATHLEN - 1)) { 2583 gclog_or_tty->print_cr(" #### error: file name too long"); 2584 return; 2585 } 2586 2587 char file_name[JVM_MAXPATHLEN]; 2588 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2589 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2590 2591 fileStream fout(file_name); 2592 if (!fout.is_open()) { 2593 gclog_or_tty->print_cr(" #### error: could not open file"); 2594 return; 2595 } 2596 2597 outputStream* out = &fout; 2598 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2599 out->cr(); 2600 2601 out->print_cr("--- ITERATING OVER REGIONS"); 2602 out->cr(); 2603 PrintReachableRegionClosure rcl(out, vo, all); 2604 _g1h->heap_region_iterate(&rcl); 2605 out->cr(); 2606 2607 gclog_or_tty->print_cr(" done"); 2608 gclog_or_tty->flush(); 2609 } 2610 2611 #endif // PRODUCT 2612 2613 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2614 // Note we are overriding the read-only view of the prev map here, via 2615 // the cast. 2616 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2617 } 2618 2619 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2620 _nextMarkBitMap->clearRange(mr); 2621 } 2622 2623 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2624 clearRangePrevBitmap(mr); 2625 clearRangeNextBitmap(mr); 2626 } 2627 2628 HeapRegion* 2629 ConcurrentMark::claim_region(int task_num) { 2630 // "checkpoint" the finger 2631 HeapWord* finger = _finger; 2632 2633 // _heap_end will not change underneath our feet; it only changes at 2634 // yield points. 2635 while (finger < _heap_end) { 2636 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2637 2638 // Note on how this code handles humongous regions. In the 2639 // normal case the finger will reach the start of a "starts 2640 // humongous" (SH) region. Its end will either be the end of the 2641 // last "continues humongous" (CH) region in the sequence, or the 2642 // standard end of the SH region (if the SH is the only region in 2643 // the sequence). That way claim_region() will skip over the CH 2644 // regions. However, there is a subtle race between a CM thread 2645 // executing this method and a mutator thread doing a humongous 2646 // object allocation. The two are not mutually exclusive as the CM 2647 // thread does not need to hold the Heap_lock when it gets 2648 // here. So there is a chance that claim_region() will come across 2649 // a free region that's in the progress of becoming a SH or a CH 2650 // region. In the former case, it will either 2651 // a) Miss the update to the region's end, in which case it will 2652 // visit every subsequent CH region, will find their bitmaps 2653 // empty, and do nothing, or 2654 // b) Will observe the update of the region's end (in which case 2655 // it will skip the subsequent CH regions). 2656 // If it comes across a region that suddenly becomes CH, the 2657 // scenario will be similar to b). So, the race between 2658 // claim_region() and a humongous object allocation might force us 2659 // to do a bit of unnecessary work (due to some unnecessary bitmap 2660 // iterations) but it should not introduce and correctness issues. 2661 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2662 HeapWord* bottom = curr_region->bottom(); 2663 HeapWord* end = curr_region->end(); 2664 HeapWord* limit = curr_region->next_top_at_mark_start(); 2665 2666 if (verbose_low()) { 2667 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 2668 "["PTR_FORMAT", "PTR_FORMAT"), " 2669 "limit = "PTR_FORMAT, 2670 task_num, curr_region, bottom, end, limit); 2671 } 2672 2673 // Is the gap between reading the finger and doing the CAS too long? 2674 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2675 if (res == finger) { 2676 // we succeeded 2677 2678 // notice that _finger == end cannot be guaranteed here since, 2679 // someone else might have moved the finger even further 2680 assert(_finger >= end, "the finger should have moved forward"); 2681 2682 if (verbose_low()) { 2683 gclog_or_tty->print_cr("[%d] we were successful with region = " 2684 PTR_FORMAT, task_num, curr_region); 2685 } 2686 2687 if (limit > bottom) { 2688 if (verbose_low()) { 2689 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 2690 "returning it ", task_num, curr_region); 2691 } 2692 return curr_region; 2693 } else { 2694 assert(limit == bottom, 2695 "the region limit should be at bottom"); 2696 if (verbose_low()) { 2697 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 2698 "returning NULL", task_num, curr_region); 2699 } 2700 // we return NULL and the caller should try calling 2701 // claim_region() again. 2702 return NULL; 2703 } 2704 } else { 2705 assert(_finger > finger, "the finger should have moved forward"); 2706 if (verbose_low()) { 2707 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 2708 "global finger = "PTR_FORMAT", " 2709 "our finger = "PTR_FORMAT, 2710 task_num, _finger, finger); 2711 } 2712 2713 // read it again 2714 finger = _finger; 2715 } 2716 } 2717 2718 return NULL; 2719 } 2720 2721 #ifndef PRODUCT 2722 enum VerifyNoCSetOopsPhase { 2723 VerifyNoCSetOopsStack, 2724 VerifyNoCSetOopsQueues, 2725 VerifyNoCSetOopsSATBCompleted, 2726 VerifyNoCSetOopsSATBThread 2727 }; 2728 2729 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 2730 private: 2731 G1CollectedHeap* _g1h; 2732 VerifyNoCSetOopsPhase _phase; 2733 int _info; 2734 2735 const char* phase_str() { 2736 switch (_phase) { 2737 case VerifyNoCSetOopsStack: return "Stack"; 2738 case VerifyNoCSetOopsQueues: return "Queue"; 2739 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 2740 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 2741 default: ShouldNotReachHere(); 2742 } 2743 return NULL; 2744 } 2745 2746 void do_object_work(oop obj) { 2747 guarantee(!_g1h->obj_in_cs(obj), 2748 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 2749 (void*) obj, phase_str(), _info)); 2750 } 2751 2752 public: 2753 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 2754 2755 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 2756 _phase = phase; 2757 _info = info; 2758 } 2759 2760 virtual void do_oop(oop* p) { 2761 oop obj = oopDesc::load_decode_heap_oop(p); 2762 do_object_work(obj); 2763 } 2764 2765 virtual void do_oop(narrowOop* p) { 2766 // We should not come across narrow oops while scanning marking 2767 // stacks and SATB buffers. 2768 ShouldNotReachHere(); 2769 } 2770 2771 virtual void do_object(oop obj) { 2772 do_object_work(obj); 2773 } 2774 }; 2775 2776 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 2777 bool verify_enqueued_buffers, 2778 bool verify_thread_buffers, 2779 bool verify_fingers) { 2780 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2781 if (!G1CollectedHeap::heap()->mark_in_progress()) { 2782 return; 2783 } 2784 2785 VerifyNoCSetOopsClosure cl; 2786 2787 if (verify_stacks) { 2788 // Verify entries on the global mark stack 2789 cl.set_phase(VerifyNoCSetOopsStack); 2790 _markStack.oops_do(&cl); 2791 2792 // Verify entries on the task queues 2793 for (int i = 0; i < (int) _max_task_num; i += 1) { 2794 cl.set_phase(VerifyNoCSetOopsQueues, i); 2795 OopTaskQueue* queue = _task_queues->queue(i); 2796 queue->oops_do(&cl); 2797 } 2798 } 2799 2800 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 2801 2802 // Verify entries on the enqueued SATB buffers 2803 if (verify_enqueued_buffers) { 2804 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 2805 satb_qs.iterate_completed_buffers_read_only(&cl); 2806 } 2807 2808 // Verify entries on the per-thread SATB buffers 2809 if (verify_thread_buffers) { 2810 cl.set_phase(VerifyNoCSetOopsSATBThread); 2811 satb_qs.iterate_thread_buffers_read_only(&cl); 2812 } 2813 2814 if (verify_fingers) { 2815 // Verify the global finger 2816 HeapWord* global_finger = finger(); 2817 if (global_finger != NULL && global_finger < _heap_end) { 2818 // The global finger always points to a heap region boundary. We 2819 // use heap_region_containing_raw() to get the containing region 2820 // given that the global finger could be pointing to a free region 2821 // which subsequently becomes continues humongous. If that 2822 // happens, heap_region_containing() will return the bottom of the 2823 // corresponding starts humongous region and the check below will 2824 // not hold any more. 2825 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 2826 guarantee(global_finger == global_hr->bottom(), 2827 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 2828 global_finger, HR_FORMAT_PARAMS(global_hr))); 2829 } 2830 2831 // Verify the task fingers 2832 assert(parallel_marking_threads() <= _max_task_num, "sanity"); 2833 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 2834 CMTask* task = _tasks[i]; 2835 HeapWord* task_finger = task->finger(); 2836 if (task_finger != NULL && task_finger < _heap_end) { 2837 // See above note on the global finger verification. 2838 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 2839 guarantee(task_finger == task_hr->bottom() || 2840 !task_hr->in_collection_set(), 2841 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 2842 task_finger, HR_FORMAT_PARAMS(task_hr))); 2843 } 2844 } 2845 } 2846 } 2847 #endif // PRODUCT 2848 2849 void ConcurrentMark::clear_marking_state(bool clear_overflow) { 2850 _markStack.setEmpty(); 2851 _markStack.clear_overflow(); 2852 if (clear_overflow) { 2853 clear_has_overflown(); 2854 } else { 2855 assert(has_overflown(), "pre-condition"); 2856 } 2857 _finger = _heap_start; 2858 2859 for (int i = 0; i < (int)_max_task_num; ++i) { 2860 OopTaskQueue* queue = _task_queues->queue(i); 2861 queue->set_empty(); 2862 } 2863 } 2864 2865 // Aggregate the counting data that was constructed concurrently 2866 // with marking. 2867 class AggregateCountDataHRClosure: public HeapRegionClosure { 2868 G1CollectedHeap* _g1h; 2869 ConcurrentMark* _cm; 2870 CardTableModRefBS* _ct_bs; 2871 BitMap* _cm_card_bm; 2872 size_t _max_task_num; 2873 2874 public: 2875 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2876 BitMap* cm_card_bm, 2877 size_t max_task_num) : 2878 _g1h(g1h), _cm(g1h->concurrent_mark()), 2879 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 2880 _cm_card_bm(cm_card_bm), _max_task_num(max_task_num) { } 2881 2882 bool doHeapRegion(HeapRegion* hr) { 2883 if (hr->continuesHumongous()) { 2884 // We will ignore these here and process them when their 2885 // associated "starts humongous" region is processed. 2886 // Note that we cannot rely on their associated 2887 // "starts humongous" region to have their bit set to 1 2888 // since, due to the region chunking in the parallel region 2889 // iteration, a "continues humongous" region might be visited 2890 // before its associated "starts humongous". 2891 return false; 2892 } 2893 2894 HeapWord* start = hr->bottom(); 2895 HeapWord* limit = hr->next_top_at_mark_start(); 2896 HeapWord* end = hr->end(); 2897 2898 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2899 err_msg("Preconditions not met - " 2900 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 2901 "top: "PTR_FORMAT", end: "PTR_FORMAT, 2902 start, limit, hr->top(), hr->end())); 2903 2904 assert(hr->next_marked_bytes() == 0, "Precondition"); 2905 2906 if (start == limit) { 2907 // NTAMS of this region has not been set so nothing to do. 2908 return false; 2909 } 2910 2911 // 'start' should be in the heap. 2912 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2913 // 'end' *may* be just beyone the end of the heap (if hr is the last region) 2914 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2915 2916 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2917 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2918 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2919 2920 // If ntams is not card aligned then we bump card bitmap index 2921 // for limit so that we get the all the cards spanned by 2922 // the object ending at ntams. 2923 // Note: if this is the last region in the heap then ntams 2924 // could be actually just beyond the end of the the heap; 2925 // limit_idx will then correspond to a (non-existent) card 2926 // that is also outside the heap. 2927 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2928 limit_idx += 1; 2929 } 2930 2931 assert(limit_idx <= end_idx, "or else use atomics"); 2932 2933 // Aggregate the "stripe" in the count data associated with hr. 2934 uint hrs_index = hr->hrs_index(); 2935 size_t marked_bytes = 0; 2936 2937 for (int i = 0; (size_t)i < _max_task_num; i += 1) { 2938 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2939 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2940 2941 // Fetch the marked_bytes in this region for task i and 2942 // add it to the running total for this region. 2943 marked_bytes += marked_bytes_array[hrs_index]; 2944 2945 // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx) 2946 // into the global card bitmap. 2947 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2948 2949 while (scan_idx < limit_idx) { 2950 assert(task_card_bm->at(scan_idx) == true, "should be"); 2951 _cm_card_bm->set_bit(scan_idx); 2952 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2953 2954 // BitMap::get_next_one_offset() can handle the case when 2955 // its left_offset parameter is greater than its right_offset 2956 // parameter. It does, however, have an early exit if 2957 // left_offset == right_offset. So let's limit the value 2958 // passed in for left offset here. 2959 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2960 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2961 } 2962 } 2963 2964 // Update the marked bytes for this region. 2965 hr->add_to_marked_bytes(marked_bytes); 2966 2967 // Next heap region 2968 return false; 2969 } 2970 }; 2971 2972 class G1AggregateCountDataTask: public AbstractGangTask { 2973 protected: 2974 G1CollectedHeap* _g1h; 2975 ConcurrentMark* _cm; 2976 BitMap* _cm_card_bm; 2977 size_t _max_task_num; 2978 int _active_workers; 2979 2980 public: 2981 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2982 ConcurrentMark* cm, 2983 BitMap* cm_card_bm, 2984 size_t max_task_num, 2985 int n_workers) : 2986 AbstractGangTask("Count Aggregation"), 2987 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2988 _max_task_num(max_task_num), 2989 _active_workers(n_workers) { } 2990 2991 void work(uint worker_id) { 2992 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_task_num); 2993 2994 if (G1CollectedHeap::use_parallel_gc_threads()) { 2995 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 2996 _active_workers, 2997 HeapRegion::AggregateCountClaimValue); 2998 } else { 2999 _g1h->heap_region_iterate(&cl); 3000 } 3001 } 3002 }; 3003 3004 3005 void ConcurrentMark::aggregate_count_data() { 3006 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3007 _g1h->workers()->active_workers() : 3008 1); 3009 3010 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3011 _max_task_num, n_workers); 3012 3013 if (G1CollectedHeap::use_parallel_gc_threads()) { 3014 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3015 "sanity check"); 3016 _g1h->set_par_threads(n_workers); 3017 _g1h->workers()->run_task(&g1_par_agg_task); 3018 _g1h->set_par_threads(0); 3019 3020 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3021 "sanity check"); 3022 _g1h->reset_heap_region_claim_values(); 3023 } else { 3024 g1_par_agg_task.work(0); 3025 } 3026 } 3027 3028 // Clear the per-worker arrays used to store the per-region counting data 3029 void ConcurrentMark::clear_all_count_data() { 3030 // Clear the global card bitmap - it will be filled during 3031 // liveness count aggregation (during remark) and the 3032 // final counting task. 3033 _card_bm.clear(); 3034 3035 // Clear the global region bitmap - it will be filled as part 3036 // of the final counting task. 3037 _region_bm.clear(); 3038 3039 uint max_regions = _g1h->max_regions(); 3040 assert(_max_task_num != 0, "unitialized"); 3041 3042 for (int i = 0; (size_t) i < _max_task_num; i += 1) { 3043 BitMap* task_card_bm = count_card_bitmap_for(i); 3044 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3045 3046 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3047 assert(marked_bytes_array != NULL, "uninitialized"); 3048 3049 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3050 task_card_bm->clear(); 3051 } 3052 } 3053 3054 void ConcurrentMark::print_stats() { 3055 if (verbose_stats()) { 3056 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3057 for (size_t i = 0; i < _active_tasks; ++i) { 3058 _tasks[i]->print_stats(); 3059 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3060 } 3061 } 3062 } 3063 3064 // abandon current marking iteration due to a Full GC 3065 void ConcurrentMark::abort() { 3066 // Clear all marks to force marking thread to do nothing 3067 _nextMarkBitMap->clearAll(); 3068 // Clear the liveness counting data 3069 clear_all_count_data(); 3070 // Empty mark stack 3071 clear_marking_state(); 3072 for (int i = 0; i < (int)_max_task_num; ++i) { 3073 _tasks[i]->clear_region_fields(); 3074 } 3075 _has_aborted = true; 3076 3077 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3078 satb_mq_set.abandon_partial_marking(); 3079 // This can be called either during or outside marking, we'll read 3080 // the expected_active value from the SATB queue set. 3081 satb_mq_set.set_active_all_threads( 3082 false, /* new active value */ 3083 satb_mq_set.is_active() /* expected_active */); 3084 3085 _g1h->trace_heap_after_concurrent_cycle(); 3086 _g1h->register_concurrent_cycle_end(); 3087 } 3088 3089 static void print_ms_time_info(const char* prefix, const char* name, 3090 NumberSeq& ns) { 3091 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3092 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3093 if (ns.num() > 0) { 3094 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3095 prefix, ns.sd(), ns.maximum()); 3096 } 3097 } 3098 3099 void ConcurrentMark::print_summary_info() { 3100 gclog_or_tty->print_cr(" Concurrent marking:"); 3101 print_ms_time_info(" ", "init marks", _init_times); 3102 print_ms_time_info(" ", "remarks", _remark_times); 3103 { 3104 print_ms_time_info(" ", "final marks", _remark_mark_times); 3105 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3106 3107 } 3108 print_ms_time_info(" ", "cleanups", _cleanup_times); 3109 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3110 _total_counting_time, 3111 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3112 (double)_cleanup_times.num() 3113 : 0.0)); 3114 if (G1ScrubRemSets) { 3115 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3116 _total_rs_scrub_time, 3117 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3118 (double)_cleanup_times.num() 3119 : 0.0)); 3120 } 3121 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3122 (_init_times.sum() + _remark_times.sum() + 3123 _cleanup_times.sum())/1000.0); 3124 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3125 "(%8.2f s marking).", 3126 cmThread()->vtime_accum(), 3127 cmThread()->vtime_mark_accum()); 3128 } 3129 3130 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3131 _parallel_workers->print_worker_threads_on(st); 3132 } 3133 3134 // We take a break if someone is trying to stop the world. 3135 bool ConcurrentMark::do_yield_check(uint worker_id) { 3136 if (should_yield()) { 3137 if (worker_id == 0) { 3138 _g1h->g1_policy()->record_concurrent_pause(); 3139 } 3140 cmThread()->yield(); 3141 return true; 3142 } else { 3143 return false; 3144 } 3145 } 3146 3147 bool ConcurrentMark::should_yield() { 3148 return cmThread()->should_yield(); 3149 } 3150 3151 bool ConcurrentMark::containing_card_is_marked(void* p) { 3152 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3153 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3154 } 3155 3156 bool ConcurrentMark::containing_cards_are_marked(void* start, 3157 void* last) { 3158 return containing_card_is_marked(start) && 3159 containing_card_is_marked(last); 3160 } 3161 3162 #ifndef PRODUCT 3163 // for debugging purposes 3164 void ConcurrentMark::print_finger() { 3165 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3166 _heap_start, _heap_end, _finger); 3167 for (int i = 0; i < (int) _max_task_num; ++i) { 3168 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 3169 } 3170 gclog_or_tty->print_cr(""); 3171 } 3172 #endif 3173 3174 void CMTask::scan_object(oop obj) { 3175 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3176 3177 if (_cm->verbose_high()) { 3178 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT, 3179 _task_id, (void*) obj); 3180 } 3181 3182 size_t obj_size = obj->size(); 3183 _words_scanned += obj_size; 3184 3185 obj->oop_iterate(_cm_oop_closure); 3186 statsOnly( ++_objs_scanned ); 3187 check_limits(); 3188 } 3189 3190 // Closure for iteration over bitmaps 3191 class CMBitMapClosure : public BitMapClosure { 3192 private: 3193 // the bitmap that is being iterated over 3194 CMBitMap* _nextMarkBitMap; 3195 ConcurrentMark* _cm; 3196 CMTask* _task; 3197 3198 public: 3199 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3200 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3201 3202 bool do_bit(size_t offset) { 3203 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3204 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3205 assert( addr < _cm->finger(), "invariant"); 3206 3207 statsOnly( _task->increase_objs_found_on_bitmap() ); 3208 assert(addr >= _task->finger(), "invariant"); 3209 3210 // We move that task's local finger along. 3211 _task->move_finger_to(addr); 3212 3213 _task->scan_object(oop(addr)); 3214 // we only partially drain the local queue and global stack 3215 _task->drain_local_queue(true); 3216 _task->drain_global_stack(true); 3217 3218 // if the has_aborted flag has been raised, we need to bail out of 3219 // the iteration 3220 return !_task->has_aborted(); 3221 } 3222 }; 3223 3224 // Closure for iterating over objects, currently only used for 3225 // processing SATB buffers. 3226 class CMObjectClosure : public ObjectClosure { 3227 private: 3228 CMTask* _task; 3229 3230 public: 3231 void do_object(oop obj) { 3232 _task->deal_with_reference(obj); 3233 } 3234 3235 CMObjectClosure(CMTask* task) : _task(task) { } 3236 }; 3237 3238 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3239 ConcurrentMark* cm, 3240 CMTask* task) 3241 : _g1h(g1h), _cm(cm), _task(task) { 3242 assert(_ref_processor == NULL, "should be initialized to NULL"); 3243 3244 if (G1UseConcMarkReferenceProcessing) { 3245 _ref_processor = g1h->ref_processor_cm(); 3246 assert(_ref_processor != NULL, "should not be NULL"); 3247 } 3248 } 3249 3250 void CMTask::setup_for_region(HeapRegion* hr) { 3251 // Separated the asserts so that we know which one fires. 3252 assert(hr != NULL, 3253 "claim_region() should have filtered out continues humongous regions"); 3254 assert(!hr->continuesHumongous(), 3255 "claim_region() should have filtered out continues humongous regions"); 3256 3257 if (_cm->verbose_low()) { 3258 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 3259 _task_id, hr); 3260 } 3261 3262 _curr_region = hr; 3263 _finger = hr->bottom(); 3264 update_region_limit(); 3265 } 3266 3267 void CMTask::update_region_limit() { 3268 HeapRegion* hr = _curr_region; 3269 HeapWord* bottom = hr->bottom(); 3270 HeapWord* limit = hr->next_top_at_mark_start(); 3271 3272 if (limit == bottom) { 3273 if (_cm->verbose_low()) { 3274 gclog_or_tty->print_cr("[%d] found an empty region " 3275 "["PTR_FORMAT", "PTR_FORMAT")", 3276 _task_id, bottom, limit); 3277 } 3278 // The region was collected underneath our feet. 3279 // We set the finger to bottom to ensure that the bitmap 3280 // iteration that will follow this will not do anything. 3281 // (this is not a condition that holds when we set the region up, 3282 // as the region is not supposed to be empty in the first place) 3283 _finger = bottom; 3284 } else if (limit >= _region_limit) { 3285 assert(limit >= _finger, "peace of mind"); 3286 } else { 3287 assert(limit < _region_limit, "only way to get here"); 3288 // This can happen under some pretty unusual circumstances. An 3289 // evacuation pause empties the region underneath our feet (NTAMS 3290 // at bottom). We then do some allocation in the region (NTAMS 3291 // stays at bottom), followed by the region being used as a GC 3292 // alloc region (NTAMS will move to top() and the objects 3293 // originally below it will be grayed). All objects now marked in 3294 // the region are explicitly grayed, if below the global finger, 3295 // and we do not need in fact to scan anything else. So, we simply 3296 // set _finger to be limit to ensure that the bitmap iteration 3297 // doesn't do anything. 3298 _finger = limit; 3299 } 3300 3301 _region_limit = limit; 3302 } 3303 3304 void CMTask::giveup_current_region() { 3305 assert(_curr_region != NULL, "invariant"); 3306 if (_cm->verbose_low()) { 3307 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 3308 _task_id, _curr_region); 3309 } 3310 clear_region_fields(); 3311 } 3312 3313 void CMTask::clear_region_fields() { 3314 // Values for these three fields that indicate that we're not 3315 // holding on to a region. 3316 _curr_region = NULL; 3317 _finger = NULL; 3318 _region_limit = NULL; 3319 } 3320 3321 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3322 if (cm_oop_closure == NULL) { 3323 assert(_cm_oop_closure != NULL, "invariant"); 3324 } else { 3325 assert(_cm_oop_closure == NULL, "invariant"); 3326 } 3327 _cm_oop_closure = cm_oop_closure; 3328 } 3329 3330 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3331 guarantee(nextMarkBitMap != NULL, "invariant"); 3332 3333 if (_cm->verbose_low()) { 3334 gclog_or_tty->print_cr("[%d] resetting", _task_id); 3335 } 3336 3337 _nextMarkBitMap = nextMarkBitMap; 3338 clear_region_fields(); 3339 3340 _calls = 0; 3341 _elapsed_time_ms = 0.0; 3342 _termination_time_ms = 0.0; 3343 _termination_start_time_ms = 0.0; 3344 3345 #if _MARKING_STATS_ 3346 _local_pushes = 0; 3347 _local_pops = 0; 3348 _local_max_size = 0; 3349 _objs_scanned = 0; 3350 _global_pushes = 0; 3351 _global_pops = 0; 3352 _global_max_size = 0; 3353 _global_transfers_to = 0; 3354 _global_transfers_from = 0; 3355 _regions_claimed = 0; 3356 _objs_found_on_bitmap = 0; 3357 _satb_buffers_processed = 0; 3358 _steal_attempts = 0; 3359 _steals = 0; 3360 _aborted = 0; 3361 _aborted_overflow = 0; 3362 _aborted_cm_aborted = 0; 3363 _aborted_yield = 0; 3364 _aborted_timed_out = 0; 3365 _aborted_satb = 0; 3366 _aborted_termination = 0; 3367 #endif // _MARKING_STATS_ 3368 } 3369 3370 bool CMTask::should_exit_termination() { 3371 regular_clock_call(); 3372 // This is called when we are in the termination protocol. We should 3373 // quit if, for some reason, this task wants to abort or the global 3374 // stack is not empty (this means that we can get work from it). 3375 return !_cm->mark_stack_empty() || has_aborted(); 3376 } 3377 3378 void CMTask::reached_limit() { 3379 assert(_words_scanned >= _words_scanned_limit || 3380 _refs_reached >= _refs_reached_limit , 3381 "shouldn't have been called otherwise"); 3382 regular_clock_call(); 3383 } 3384 3385 void CMTask::regular_clock_call() { 3386 if (has_aborted()) return; 3387 3388 // First, we need to recalculate the words scanned and refs reached 3389 // limits for the next clock call. 3390 recalculate_limits(); 3391 3392 // During the regular clock call we do the following 3393 3394 // (1) If an overflow has been flagged, then we abort. 3395 if (_cm->has_overflown()) { 3396 set_has_aborted(); 3397 return; 3398 } 3399 3400 // If we are not concurrent (i.e. we're doing remark) we don't need 3401 // to check anything else. The other steps are only needed during 3402 // the concurrent marking phase. 3403 if (!concurrent()) return; 3404 3405 // (2) If marking has been aborted for Full GC, then we also abort. 3406 if (_cm->has_aborted()) { 3407 set_has_aborted(); 3408 statsOnly( ++_aborted_cm_aborted ); 3409 return; 3410 } 3411 3412 double curr_time_ms = os::elapsedVTime() * 1000.0; 3413 3414 // (3) If marking stats are enabled, then we update the step history. 3415 #if _MARKING_STATS_ 3416 if (_words_scanned >= _words_scanned_limit) { 3417 ++_clock_due_to_scanning; 3418 } 3419 if (_refs_reached >= _refs_reached_limit) { 3420 ++_clock_due_to_marking; 3421 } 3422 3423 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3424 _interval_start_time_ms = curr_time_ms; 3425 _all_clock_intervals_ms.add(last_interval_ms); 3426 3427 if (_cm->verbose_medium()) { 3428 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 3429 "scanned = %d%s, refs reached = %d%s", 3430 _task_id, last_interval_ms, 3431 _words_scanned, 3432 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3433 _refs_reached, 3434 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3435 } 3436 #endif // _MARKING_STATS_ 3437 3438 // (4) We check whether we should yield. If we have to, then we abort. 3439 if (_cm->should_yield()) { 3440 // We should yield. To do this we abort the task. The caller is 3441 // responsible for yielding. 3442 set_has_aborted(); 3443 statsOnly( ++_aborted_yield ); 3444 return; 3445 } 3446 3447 // (5) We check whether we've reached our time quota. If we have, 3448 // then we abort. 3449 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3450 if (elapsed_time_ms > _time_target_ms) { 3451 set_has_aborted(); 3452 _has_timed_out = true; 3453 statsOnly( ++_aborted_timed_out ); 3454 return; 3455 } 3456 3457 // (6) Finally, we check whether there are enough completed STAB 3458 // buffers available for processing. If there are, we abort. 3459 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3460 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3461 if (_cm->verbose_low()) { 3462 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 3463 _task_id); 3464 } 3465 // we do need to process SATB buffers, we'll abort and restart 3466 // the marking task to do so 3467 set_has_aborted(); 3468 statsOnly( ++_aborted_satb ); 3469 return; 3470 } 3471 } 3472 3473 void CMTask::recalculate_limits() { 3474 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3475 _words_scanned_limit = _real_words_scanned_limit; 3476 3477 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3478 _refs_reached_limit = _real_refs_reached_limit; 3479 } 3480 3481 void CMTask::decrease_limits() { 3482 // This is called when we believe that we're going to do an infrequent 3483 // operation which will increase the per byte scanned cost (i.e. move 3484 // entries to/from the global stack). It basically tries to decrease the 3485 // scanning limit so that the clock is called earlier. 3486 3487 if (_cm->verbose_medium()) { 3488 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 3489 } 3490 3491 _words_scanned_limit = _real_words_scanned_limit - 3492 3 * words_scanned_period / 4; 3493 _refs_reached_limit = _real_refs_reached_limit - 3494 3 * refs_reached_period / 4; 3495 } 3496 3497 void CMTask::move_entries_to_global_stack() { 3498 // local array where we'll store the entries that will be popped 3499 // from the local queue 3500 oop buffer[global_stack_transfer_size]; 3501 3502 int n = 0; 3503 oop obj; 3504 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3505 buffer[n] = obj; 3506 ++n; 3507 } 3508 3509 if (n > 0) { 3510 // we popped at least one entry from the local queue 3511 3512 statsOnly( ++_global_transfers_to; _local_pops += n ); 3513 3514 if (!_cm->mark_stack_push(buffer, n)) { 3515 if (_cm->verbose_low()) { 3516 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", 3517 _task_id); 3518 } 3519 set_has_aborted(); 3520 } else { 3521 // the transfer was successful 3522 3523 if (_cm->verbose_medium()) { 3524 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 3525 _task_id, n); 3526 } 3527 statsOnly( int tmp_size = _cm->mark_stack_size(); 3528 if (tmp_size > _global_max_size) { 3529 _global_max_size = tmp_size; 3530 } 3531 _global_pushes += n ); 3532 } 3533 } 3534 3535 // this operation was quite expensive, so decrease the limits 3536 decrease_limits(); 3537 } 3538 3539 void CMTask::get_entries_from_global_stack() { 3540 // local array where we'll store the entries that will be popped 3541 // from the global stack. 3542 oop buffer[global_stack_transfer_size]; 3543 int n; 3544 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3545 assert(n <= global_stack_transfer_size, 3546 "we should not pop more than the given limit"); 3547 if (n > 0) { 3548 // yes, we did actually pop at least one entry 3549 3550 statsOnly( ++_global_transfers_from; _global_pops += n ); 3551 if (_cm->verbose_medium()) { 3552 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 3553 _task_id, n); 3554 } 3555 for (int i = 0; i < n; ++i) { 3556 bool success = _task_queue->push(buffer[i]); 3557 // We only call this when the local queue is empty or under a 3558 // given target limit. So, we do not expect this push to fail. 3559 assert(success, "invariant"); 3560 } 3561 3562 statsOnly( int tmp_size = _task_queue->size(); 3563 if (tmp_size > _local_max_size) { 3564 _local_max_size = tmp_size; 3565 } 3566 _local_pushes += n ); 3567 } 3568 3569 // this operation was quite expensive, so decrease the limits 3570 decrease_limits(); 3571 } 3572 3573 void CMTask::drain_local_queue(bool partially) { 3574 if (has_aborted()) return; 3575 3576 // Decide what the target size is, depending whether we're going to 3577 // drain it partially (so that other tasks can steal if they run out 3578 // of things to do) or totally (at the very end). 3579 size_t target_size; 3580 if (partially) { 3581 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3582 } else { 3583 target_size = 0; 3584 } 3585 3586 if (_task_queue->size() > target_size) { 3587 if (_cm->verbose_high()) { 3588 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 3589 _task_id, target_size); 3590 } 3591 3592 oop obj; 3593 bool ret = _task_queue->pop_local(obj); 3594 while (ret) { 3595 statsOnly( ++_local_pops ); 3596 3597 if (_cm->verbose_high()) { 3598 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 3599 (void*) obj); 3600 } 3601 3602 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3603 assert(!_g1h->is_on_master_free_list( 3604 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3605 3606 scan_object(obj); 3607 3608 if (_task_queue->size() <= target_size || has_aborted()) { 3609 ret = false; 3610 } else { 3611 ret = _task_queue->pop_local(obj); 3612 } 3613 } 3614 3615 if (_cm->verbose_high()) { 3616 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 3617 _task_id, _task_queue->size()); 3618 } 3619 } 3620 } 3621 3622 void CMTask::drain_global_stack(bool partially) { 3623 if (has_aborted()) return; 3624 3625 // We have a policy to drain the local queue before we attempt to 3626 // drain the global stack. 3627 assert(partially || _task_queue->size() == 0, "invariant"); 3628 3629 // Decide what the target size is, depending whether we're going to 3630 // drain it partially (so that other tasks can steal if they run out 3631 // of things to do) or totally (at the very end). Notice that, 3632 // because we move entries from the global stack in chunks or 3633 // because another task might be doing the same, we might in fact 3634 // drop below the target. But, this is not a problem. 3635 size_t target_size; 3636 if (partially) { 3637 target_size = _cm->partial_mark_stack_size_target(); 3638 } else { 3639 target_size = 0; 3640 } 3641 3642 if (_cm->mark_stack_size() > target_size) { 3643 if (_cm->verbose_low()) { 3644 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 3645 _task_id, target_size); 3646 } 3647 3648 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3649 get_entries_from_global_stack(); 3650 drain_local_queue(partially); 3651 } 3652 3653 if (_cm->verbose_low()) { 3654 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 3655 _task_id, _cm->mark_stack_size()); 3656 } 3657 } 3658 } 3659 3660 // SATB Queue has several assumptions on whether to call the par or 3661 // non-par versions of the methods. this is why some of the code is 3662 // replicated. We should really get rid of the single-threaded version 3663 // of the code to simplify things. 3664 void CMTask::drain_satb_buffers() { 3665 if (has_aborted()) return; 3666 3667 // We set this so that the regular clock knows that we're in the 3668 // middle of draining buffers and doesn't set the abort flag when it 3669 // notices that SATB buffers are available for draining. It'd be 3670 // very counter productive if it did that. :-) 3671 _draining_satb_buffers = true; 3672 3673 CMObjectClosure oc(this); 3674 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3675 if (G1CollectedHeap::use_parallel_gc_threads()) { 3676 satb_mq_set.set_par_closure(_task_id, &oc); 3677 } else { 3678 satb_mq_set.set_closure(&oc); 3679 } 3680 3681 // This keeps claiming and applying the closure to completed buffers 3682 // until we run out of buffers or we need to abort. 3683 if (G1CollectedHeap::use_parallel_gc_threads()) { 3684 while (!has_aborted() && 3685 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 3686 if (_cm->verbose_medium()) { 3687 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3688 } 3689 statsOnly( ++_satb_buffers_processed ); 3690 regular_clock_call(); 3691 } 3692 } else { 3693 while (!has_aborted() && 3694 satb_mq_set.apply_closure_to_completed_buffer()) { 3695 if (_cm->verbose_medium()) { 3696 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3697 } 3698 statsOnly( ++_satb_buffers_processed ); 3699 regular_clock_call(); 3700 } 3701 } 3702 3703 if (!concurrent() && !has_aborted()) { 3704 // We should only do this during remark. 3705 if (G1CollectedHeap::use_parallel_gc_threads()) { 3706 satb_mq_set.par_iterate_closure_all_threads(_task_id); 3707 } else { 3708 satb_mq_set.iterate_closure_all_threads(); 3709 } 3710 } 3711 3712 _draining_satb_buffers = false; 3713 3714 assert(has_aborted() || 3715 concurrent() || 3716 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3717 3718 if (G1CollectedHeap::use_parallel_gc_threads()) { 3719 satb_mq_set.set_par_closure(_task_id, NULL); 3720 } else { 3721 satb_mq_set.set_closure(NULL); 3722 } 3723 3724 // again, this was a potentially expensive operation, decrease the 3725 // limits to get the regular clock call early 3726 decrease_limits(); 3727 } 3728 3729 void CMTask::print_stats() { 3730 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 3731 _task_id, _calls); 3732 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3733 _elapsed_time_ms, _termination_time_ms); 3734 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3735 _step_times_ms.num(), _step_times_ms.avg(), 3736 _step_times_ms.sd()); 3737 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3738 _step_times_ms.maximum(), _step_times_ms.sum()); 3739 3740 #if _MARKING_STATS_ 3741 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3742 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3743 _all_clock_intervals_ms.sd()); 3744 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3745 _all_clock_intervals_ms.maximum(), 3746 _all_clock_intervals_ms.sum()); 3747 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3748 _clock_due_to_scanning, _clock_due_to_marking); 3749 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3750 _objs_scanned, _objs_found_on_bitmap); 3751 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3752 _local_pushes, _local_pops, _local_max_size); 3753 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3754 _global_pushes, _global_pops, _global_max_size); 3755 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3756 _global_transfers_to,_global_transfers_from); 3757 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 3758 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3759 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3760 _steal_attempts, _steals); 3761 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3762 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3763 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3764 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3765 _aborted_timed_out, _aborted_satb, _aborted_termination); 3766 #endif // _MARKING_STATS_ 3767 } 3768 3769 /***************************************************************************** 3770 3771 The do_marking_step(time_target_ms) method is the building block 3772 of the parallel marking framework. It can be called in parallel 3773 with other invocations of do_marking_step() on different tasks 3774 (but only one per task, obviously) and concurrently with the 3775 mutator threads, or during remark, hence it eliminates the need 3776 for two versions of the code. When called during remark, it will 3777 pick up from where the task left off during the concurrent marking 3778 phase. Interestingly, tasks are also claimable during evacuation 3779 pauses too, since do_marking_step() ensures that it aborts before 3780 it needs to yield. 3781 3782 The data structures that is uses to do marking work are the 3783 following: 3784 3785 (1) Marking Bitmap. If there are gray objects that appear only 3786 on the bitmap (this happens either when dealing with an overflow 3787 or when the initial marking phase has simply marked the roots 3788 and didn't push them on the stack), then tasks claim heap 3789 regions whose bitmap they then scan to find gray objects. A 3790 global finger indicates where the end of the last claimed region 3791 is. A local finger indicates how far into the region a task has 3792 scanned. The two fingers are used to determine how to gray an 3793 object (i.e. whether simply marking it is OK, as it will be 3794 visited by a task in the future, or whether it needs to be also 3795 pushed on a stack). 3796 3797 (2) Local Queue. The local queue of the task which is accessed 3798 reasonably efficiently by the task. Other tasks can steal from 3799 it when they run out of work. Throughout the marking phase, a 3800 task attempts to keep its local queue short but not totally 3801 empty, so that entries are available for stealing by other 3802 tasks. Only when there is no more work, a task will totally 3803 drain its local queue. 3804 3805 (3) Global Mark Stack. This handles local queue overflow. During 3806 marking only sets of entries are moved between it and the local 3807 queues, as access to it requires a mutex and more fine-grain 3808 interaction with it which might cause contention. If it 3809 overflows, then the marking phase should restart and iterate 3810 over the bitmap to identify gray objects. Throughout the marking 3811 phase, tasks attempt to keep the global mark stack at a small 3812 length but not totally empty, so that entries are available for 3813 popping by other tasks. Only when there is no more work, tasks 3814 will totally drain the global mark stack. 3815 3816 (4) SATB Buffer Queue. This is where completed SATB buffers are 3817 made available. Buffers are regularly removed from this queue 3818 and scanned for roots, so that the queue doesn't get too 3819 long. During remark, all completed buffers are processed, as 3820 well as the filled in parts of any uncompleted buffers. 3821 3822 The do_marking_step() method tries to abort when the time target 3823 has been reached. There are a few other cases when the 3824 do_marking_step() method also aborts: 3825 3826 (1) When the marking phase has been aborted (after a Full GC). 3827 3828 (2) When a global overflow (on the global stack) has been 3829 triggered. Before the task aborts, it will actually sync up with 3830 the other tasks to ensure that all the marking data structures 3831 (local queues, stacks, fingers etc.) are re-initialised so that 3832 when do_marking_step() completes, the marking phase can 3833 immediately restart. 3834 3835 (3) When enough completed SATB buffers are available. The 3836 do_marking_step() method only tries to drain SATB buffers right 3837 at the beginning. So, if enough buffers are available, the 3838 marking step aborts and the SATB buffers are processed at 3839 the beginning of the next invocation. 3840 3841 (4) To yield. when we have to yield then we abort and yield 3842 right at the end of do_marking_step(). This saves us from a lot 3843 of hassle as, by yielding we might allow a Full GC. If this 3844 happens then objects will be compacted underneath our feet, the 3845 heap might shrink, etc. We save checking for this by just 3846 aborting and doing the yield right at the end. 3847 3848 From the above it follows that the do_marking_step() method should 3849 be called in a loop (or, otherwise, regularly) until it completes. 3850 3851 If a marking step completes without its has_aborted() flag being 3852 true, it means it has completed the current marking phase (and 3853 also all other marking tasks have done so and have all synced up). 3854 3855 A method called regular_clock_call() is invoked "regularly" (in 3856 sub ms intervals) throughout marking. It is this clock method that 3857 checks all the abort conditions which were mentioned above and 3858 decides when the task should abort. A work-based scheme is used to 3859 trigger this clock method: when the number of object words the 3860 marking phase has scanned or the number of references the marking 3861 phase has visited reach a given limit. Additional invocations to 3862 the method clock have been planted in a few other strategic places 3863 too. The initial reason for the clock method was to avoid calling 3864 vtime too regularly, as it is quite expensive. So, once it was in 3865 place, it was natural to piggy-back all the other conditions on it 3866 too and not constantly check them throughout the code. 3867 3868 *****************************************************************************/ 3869 3870 void CMTask::do_marking_step(double time_target_ms, 3871 bool do_stealing, 3872 bool do_termination) { 3873 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3874 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3875 3876 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3877 assert(_task_queues != NULL, "invariant"); 3878 assert(_task_queue != NULL, "invariant"); 3879 assert(_task_queues->queue(_task_id) == _task_queue, "invariant"); 3880 3881 assert(!_claimed, 3882 "only one thread should claim this task at any one time"); 3883 3884 // OK, this doesn't safeguard again all possible scenarios, as it is 3885 // possible for two threads to set the _claimed flag at the same 3886 // time. But it is only for debugging purposes anyway and it will 3887 // catch most problems. 3888 _claimed = true; 3889 3890 _start_time_ms = os::elapsedVTime() * 1000.0; 3891 statsOnly( _interval_start_time_ms = _start_time_ms ); 3892 3893 double diff_prediction_ms = 3894 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 3895 _time_target_ms = time_target_ms - diff_prediction_ms; 3896 3897 // set up the variables that are used in the work-based scheme to 3898 // call the regular clock method 3899 _words_scanned = 0; 3900 _refs_reached = 0; 3901 recalculate_limits(); 3902 3903 // clear all flags 3904 clear_has_aborted(); 3905 _has_timed_out = false; 3906 _draining_satb_buffers = false; 3907 3908 ++_calls; 3909 3910 if (_cm->verbose_low()) { 3911 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 3912 "target = %1.2lfms >>>>>>>>>>", 3913 _task_id, _calls, _time_target_ms); 3914 } 3915 3916 // Set up the bitmap and oop closures. Anything that uses them is 3917 // eventually called from this method, so it is OK to allocate these 3918 // statically. 3919 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3920 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3921 set_cm_oop_closure(&cm_oop_closure); 3922 3923 if (_cm->has_overflown()) { 3924 // This can happen if the mark stack overflows during a GC pause 3925 // and this task, after a yield point, restarts. We have to abort 3926 // as we need to get into the overflow protocol which happens 3927 // right at the end of this task. 3928 set_has_aborted(); 3929 } 3930 3931 // First drain any available SATB buffers. After this, we will not 3932 // look at SATB buffers before the next invocation of this method. 3933 // If enough completed SATB buffers are queued up, the regular clock 3934 // will abort this task so that it restarts. 3935 drain_satb_buffers(); 3936 // ...then partially drain the local queue and the global stack 3937 drain_local_queue(true); 3938 drain_global_stack(true); 3939 3940 do { 3941 if (!has_aborted() && _curr_region != NULL) { 3942 // This means that we're already holding on to a region. 3943 assert(_finger != NULL, "if region is not NULL, then the finger " 3944 "should not be NULL either"); 3945 3946 // We might have restarted this task after an evacuation pause 3947 // which might have evacuated the region we're holding on to 3948 // underneath our feet. Let's read its limit again to make sure 3949 // that we do not iterate over a region of the heap that 3950 // contains garbage (update_region_limit() will also move 3951 // _finger to the start of the region if it is found empty). 3952 update_region_limit(); 3953 // We will start from _finger not from the start of the region, 3954 // as we might be restarting this task after aborting half-way 3955 // through scanning this region. In this case, _finger points to 3956 // the address where we last found a marked object. If this is a 3957 // fresh region, _finger points to start(). 3958 MemRegion mr = MemRegion(_finger, _region_limit); 3959 3960 if (_cm->verbose_low()) { 3961 gclog_or_tty->print_cr("[%d] we're scanning part " 3962 "["PTR_FORMAT", "PTR_FORMAT") " 3963 "of region "PTR_FORMAT, 3964 _task_id, _finger, _region_limit, _curr_region); 3965 } 3966 3967 // Let's iterate over the bitmap of the part of the 3968 // region that is left. 3969 if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3970 // We successfully completed iterating over the region. Now, 3971 // let's give up the region. 3972 giveup_current_region(); 3973 regular_clock_call(); 3974 } else { 3975 assert(has_aborted(), "currently the only way to do so"); 3976 // The only way to abort the bitmap iteration is to return 3977 // false from the do_bit() method. However, inside the 3978 // do_bit() method we move the _finger to point to the 3979 // object currently being looked at. So, if we bail out, we 3980 // have definitely set _finger to something non-null. 3981 assert(_finger != NULL, "invariant"); 3982 3983 // Region iteration was actually aborted. So now _finger 3984 // points to the address of the object we last scanned. If we 3985 // leave it there, when we restart this task, we will rescan 3986 // the object. It is easy to avoid this. We move the finger by 3987 // enough to point to the next possible object header (the 3988 // bitmap knows by how much we need to move it as it knows its 3989 // granularity). 3990 assert(_finger < _region_limit, "invariant"); 3991 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 3992 // Check if bitmap iteration was aborted while scanning the last object 3993 if (new_finger >= _region_limit) { 3994 giveup_current_region(); 3995 } else { 3996 move_finger_to(new_finger); 3997 } 3998 } 3999 } 4000 // At this point we have either completed iterating over the 4001 // region we were holding on to, or we have aborted. 4002 4003 // We then partially drain the local queue and the global stack. 4004 // (Do we really need this?) 4005 drain_local_queue(true); 4006 drain_global_stack(true); 4007 4008 // Read the note on the claim_region() method on why it might 4009 // return NULL with potentially more regions available for 4010 // claiming and why we have to check out_of_regions() to determine 4011 // whether we're done or not. 4012 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4013 // We are going to try to claim a new region. We should have 4014 // given up on the previous one. 4015 // Separated the asserts so that we know which one fires. 4016 assert(_curr_region == NULL, "invariant"); 4017 assert(_finger == NULL, "invariant"); 4018 assert(_region_limit == NULL, "invariant"); 4019 if (_cm->verbose_low()) { 4020 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 4021 } 4022 HeapRegion* claimed_region = _cm->claim_region(_task_id); 4023 if (claimed_region != NULL) { 4024 // Yes, we managed to claim one 4025 statsOnly( ++_regions_claimed ); 4026 4027 if (_cm->verbose_low()) { 4028 gclog_or_tty->print_cr("[%d] we successfully claimed " 4029 "region "PTR_FORMAT, 4030 _task_id, claimed_region); 4031 } 4032 4033 setup_for_region(claimed_region); 4034 assert(_curr_region == claimed_region, "invariant"); 4035 } 4036 // It is important to call the regular clock here. It might take 4037 // a while to claim a region if, for example, we hit a large 4038 // block of empty regions. So we need to call the regular clock 4039 // method once round the loop to make sure it's called 4040 // frequently enough. 4041 regular_clock_call(); 4042 } 4043 4044 if (!has_aborted() && _curr_region == NULL) { 4045 assert(_cm->out_of_regions(), 4046 "at this point we should be out of regions"); 4047 } 4048 } while ( _curr_region != NULL && !has_aborted()); 4049 4050 if (!has_aborted()) { 4051 // We cannot check whether the global stack is empty, since other 4052 // tasks might be pushing objects to it concurrently. 4053 assert(_cm->out_of_regions(), 4054 "at this point we should be out of regions"); 4055 4056 if (_cm->verbose_low()) { 4057 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 4058 } 4059 4060 // Try to reduce the number of available SATB buffers so that 4061 // remark has less work to do. 4062 drain_satb_buffers(); 4063 } 4064 4065 // Since we've done everything else, we can now totally drain the 4066 // local queue and global stack. 4067 drain_local_queue(false); 4068 drain_global_stack(false); 4069 4070 // Attempt at work stealing from other task's queues. 4071 if (do_stealing && !has_aborted()) { 4072 // We have not aborted. This means that we have finished all that 4073 // we could. Let's try to do some stealing... 4074 4075 // We cannot check whether the global stack is empty, since other 4076 // tasks might be pushing objects to it concurrently. 4077 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4078 "only way to reach here"); 4079 4080 if (_cm->verbose_low()) { 4081 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 4082 } 4083 4084 while (!has_aborted()) { 4085 oop obj; 4086 statsOnly( ++_steal_attempts ); 4087 4088 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 4089 if (_cm->verbose_medium()) { 4090 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 4091 _task_id, (void*) obj); 4092 } 4093 4094 statsOnly( ++_steals ); 4095 4096 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4097 "any stolen object should be marked"); 4098 scan_object(obj); 4099 4100 // And since we're towards the end, let's totally drain the 4101 // local queue and global stack. 4102 drain_local_queue(false); 4103 drain_global_stack(false); 4104 } else { 4105 break; 4106 } 4107 } 4108 } 4109 4110 // If we are about to wrap up and go into termination, check if we 4111 // should raise the overflow flag. 4112 if (do_termination && !has_aborted()) { 4113 if (_cm->force_overflow()->should_force()) { 4114 _cm->set_has_overflown(); 4115 regular_clock_call(); 4116 } 4117 } 4118 4119 // We still haven't aborted. Now, let's try to get into the 4120 // termination protocol. 4121 if (do_termination && !has_aborted()) { 4122 // We cannot check whether the global stack is empty, since other 4123 // tasks might be concurrently pushing objects on it. 4124 // Separated the asserts so that we know which one fires. 4125 assert(_cm->out_of_regions(), "only way to reach here"); 4126 assert(_task_queue->size() == 0, "only way to reach here"); 4127 4128 if (_cm->verbose_low()) { 4129 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 4130 } 4131 4132 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4133 // The CMTask class also extends the TerminatorTerminator class, 4134 // hence its should_exit_termination() method will also decide 4135 // whether to exit the termination protocol or not. 4136 bool finished = _cm->terminator()->offer_termination(this); 4137 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4138 _termination_time_ms += 4139 termination_end_time_ms - _termination_start_time_ms; 4140 4141 if (finished) { 4142 // We're all done. 4143 4144 if (_task_id == 0) { 4145 // let's allow task 0 to do this 4146 if (concurrent()) { 4147 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4148 // we need to set this to false before the next 4149 // safepoint. This way we ensure that the marking phase 4150 // doesn't observe any more heap expansions. 4151 _cm->clear_concurrent_marking_in_progress(); 4152 } 4153 } 4154 4155 // We can now guarantee that the global stack is empty, since 4156 // all other tasks have finished. We separated the guarantees so 4157 // that, if a condition is false, we can immediately find out 4158 // which one. 4159 guarantee(_cm->out_of_regions(), "only way to reach here"); 4160 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4161 guarantee(_task_queue->size() == 0, "only way to reach here"); 4162 guarantee(!_cm->has_overflown(), "only way to reach here"); 4163 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4164 4165 if (_cm->verbose_low()) { 4166 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 4167 } 4168 } else { 4169 // Apparently there's more work to do. Let's abort this task. It 4170 // will restart it and we can hopefully find more things to do. 4171 4172 if (_cm->verbose_low()) { 4173 gclog_or_tty->print_cr("[%d] apparently there is more work to do", 4174 _task_id); 4175 } 4176 4177 set_has_aborted(); 4178 statsOnly( ++_aborted_termination ); 4179 } 4180 } 4181 4182 // Mainly for debugging purposes to make sure that a pointer to the 4183 // closure which was statically allocated in this frame doesn't 4184 // escape it by accident. 4185 set_cm_oop_closure(NULL); 4186 double end_time_ms = os::elapsedVTime() * 1000.0; 4187 double elapsed_time_ms = end_time_ms - _start_time_ms; 4188 // Update the step history. 4189 _step_times_ms.add(elapsed_time_ms); 4190 4191 if (has_aborted()) { 4192 // The task was aborted for some reason. 4193 4194 statsOnly( ++_aborted ); 4195 4196 if (_has_timed_out) { 4197 double diff_ms = elapsed_time_ms - _time_target_ms; 4198 // Keep statistics of how well we did with respect to hitting 4199 // our target only if we actually timed out (if we aborted for 4200 // other reasons, then the results might get skewed). 4201 _marking_step_diffs_ms.add(diff_ms); 4202 } 4203 4204 if (_cm->has_overflown()) { 4205 // This is the interesting one. We aborted because a global 4206 // overflow was raised. This means we have to restart the 4207 // marking phase and start iterating over regions. However, in 4208 // order to do this we have to make sure that all tasks stop 4209 // what they are doing and re-initialise in a safe manner. We 4210 // will achieve this with the use of two barrier sync points. 4211 4212 if (_cm->verbose_low()) { 4213 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 4214 } 4215 4216 _cm->enter_first_sync_barrier(_task_id); 4217 // When we exit this sync barrier we know that all tasks have 4218 // stopped doing marking work. So, it's now safe to 4219 // re-initialise our data structures. At the end of this method, 4220 // task 0 will clear the global data structures. 4221 4222 statsOnly( ++_aborted_overflow ); 4223 4224 // We clear the local state of this task... 4225 clear_region_fields(); 4226 4227 // ...and enter the second barrier. 4228 _cm->enter_second_sync_barrier(_task_id); 4229 // At this point everything has bee re-initialised and we're 4230 // ready to restart. 4231 } 4232 4233 if (_cm->verbose_low()) { 4234 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4235 "elapsed = %1.2lfms <<<<<<<<<<", 4236 _task_id, _time_target_ms, elapsed_time_ms); 4237 if (_cm->has_aborted()) { 4238 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 4239 _task_id); 4240 } 4241 } 4242 } else { 4243 if (_cm->verbose_low()) { 4244 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4245 "elapsed = %1.2lfms <<<<<<<<<<", 4246 _task_id, _time_target_ms, elapsed_time_ms); 4247 } 4248 } 4249 4250 _claimed = false; 4251 } 4252 4253 CMTask::CMTask(int task_id, 4254 ConcurrentMark* cm, 4255 size_t* marked_bytes, 4256 BitMap* card_bm, 4257 CMTaskQueue* task_queue, 4258 CMTaskQueueSet* task_queues) 4259 : _g1h(G1CollectedHeap::heap()), 4260 _task_id(task_id), _cm(cm), 4261 _claimed(false), 4262 _nextMarkBitMap(NULL), _hash_seed(17), 4263 _task_queue(task_queue), 4264 _task_queues(task_queues), 4265 _cm_oop_closure(NULL), 4266 _marked_bytes_array(marked_bytes), 4267 _card_bm(card_bm) { 4268 guarantee(task_queue != NULL, "invariant"); 4269 guarantee(task_queues != NULL, "invariant"); 4270 4271 statsOnly( _clock_due_to_scanning = 0; 4272 _clock_due_to_marking = 0 ); 4273 4274 _marking_step_diffs_ms.add(0.5); 4275 } 4276 4277 // These are formatting macros that are used below to ensure 4278 // consistent formatting. The *_H_* versions are used to format the 4279 // header for a particular value and they should be kept consistent 4280 // with the corresponding macro. Also note that most of the macros add 4281 // the necessary white space (as a prefix) which makes them a bit 4282 // easier to compose. 4283 4284 // All the output lines are prefixed with this string to be able to 4285 // identify them easily in a large log file. 4286 #define G1PPRL_LINE_PREFIX "###" 4287 4288 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4289 #ifdef _LP64 4290 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4291 #else // _LP64 4292 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4293 #endif // _LP64 4294 4295 // For per-region info 4296 #define G1PPRL_TYPE_FORMAT " %-4s" 4297 #define G1PPRL_TYPE_H_FORMAT " %4s" 4298 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4299 #define G1PPRL_BYTE_H_FORMAT " %9s" 4300 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4301 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4302 4303 // For summary info 4304 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4305 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4306 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4307 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4308 4309 G1PrintRegionLivenessInfoClosure:: 4310 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4311 : _out(out), 4312 _total_used_bytes(0), _total_capacity_bytes(0), 4313 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4314 _hum_used_bytes(0), _hum_capacity_bytes(0), 4315 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) { 4316 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4317 MemRegion g1_committed = g1h->g1_committed(); 4318 MemRegion g1_reserved = g1h->g1_reserved(); 4319 double now = os::elapsedTime(); 4320 4321 // Print the header of the output. 4322 _out->cr(); 4323 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4324 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4325 G1PPRL_SUM_ADDR_FORMAT("committed") 4326 G1PPRL_SUM_ADDR_FORMAT("reserved") 4327 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4328 g1_committed.start(), g1_committed.end(), 4329 g1_reserved.start(), g1_reserved.end(), 4330 HeapRegion::GrainBytes); 4331 _out->print_cr(G1PPRL_LINE_PREFIX); 4332 _out->print_cr(G1PPRL_LINE_PREFIX 4333 G1PPRL_TYPE_H_FORMAT 4334 G1PPRL_ADDR_BASE_H_FORMAT 4335 G1PPRL_BYTE_H_FORMAT 4336 G1PPRL_BYTE_H_FORMAT 4337 G1PPRL_BYTE_H_FORMAT 4338 G1PPRL_DOUBLE_H_FORMAT, 4339 "type", "address-range", 4340 "used", "prev-live", "next-live", "gc-eff"); 4341 _out->print_cr(G1PPRL_LINE_PREFIX 4342 G1PPRL_TYPE_H_FORMAT 4343 G1PPRL_ADDR_BASE_H_FORMAT 4344 G1PPRL_BYTE_H_FORMAT 4345 G1PPRL_BYTE_H_FORMAT 4346 G1PPRL_BYTE_H_FORMAT 4347 G1PPRL_DOUBLE_H_FORMAT, 4348 "", "", 4349 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)"); 4350 } 4351 4352 // It takes as a parameter a reference to one of the _hum_* fields, it 4353 // deduces the corresponding value for a region in a humongous region 4354 // series (either the region size, or what's left if the _hum_* field 4355 // is < the region size), and updates the _hum_* field accordingly. 4356 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4357 size_t bytes = 0; 4358 // The > 0 check is to deal with the prev and next live bytes which 4359 // could be 0. 4360 if (*hum_bytes > 0) { 4361 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4362 *hum_bytes -= bytes; 4363 } 4364 return bytes; 4365 } 4366 4367 // It deduces the values for a region in a humongous region series 4368 // from the _hum_* fields and updates those accordingly. It assumes 4369 // that that _hum_* fields have already been set up from the "starts 4370 // humongous" region and we visit the regions in address order. 4371 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4372 size_t* capacity_bytes, 4373 size_t* prev_live_bytes, 4374 size_t* next_live_bytes) { 4375 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4376 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4377 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4378 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4379 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4380 } 4381 4382 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4383 const char* type = ""; 4384 HeapWord* bottom = r->bottom(); 4385 HeapWord* end = r->end(); 4386 size_t capacity_bytes = r->capacity(); 4387 size_t used_bytes = r->used(); 4388 size_t prev_live_bytes = r->live_bytes(); 4389 size_t next_live_bytes = r->next_live_bytes(); 4390 double gc_eff = r->gc_efficiency(); 4391 if (r->used() == 0) { 4392 type = "FREE"; 4393 } else if (r->is_survivor()) { 4394 type = "SURV"; 4395 } else if (r->is_young()) { 4396 type = "EDEN"; 4397 } else if (r->startsHumongous()) { 4398 type = "HUMS"; 4399 4400 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4401 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4402 "they should have been zeroed after the last time we used them"); 4403 // Set up the _hum_* fields. 4404 _hum_capacity_bytes = capacity_bytes; 4405 _hum_used_bytes = used_bytes; 4406 _hum_prev_live_bytes = prev_live_bytes; 4407 _hum_next_live_bytes = next_live_bytes; 4408 get_hum_bytes(&used_bytes, &capacity_bytes, 4409 &prev_live_bytes, &next_live_bytes); 4410 end = bottom + HeapRegion::GrainWords; 4411 } else if (r->continuesHumongous()) { 4412 type = "HUMC"; 4413 get_hum_bytes(&used_bytes, &capacity_bytes, 4414 &prev_live_bytes, &next_live_bytes); 4415 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4416 } else { 4417 type = "OLD"; 4418 } 4419 4420 _total_used_bytes += used_bytes; 4421 _total_capacity_bytes += capacity_bytes; 4422 _total_prev_live_bytes += prev_live_bytes; 4423 _total_next_live_bytes += next_live_bytes; 4424 4425 // Print a line for this particular region. 4426 _out->print_cr(G1PPRL_LINE_PREFIX 4427 G1PPRL_TYPE_FORMAT 4428 G1PPRL_ADDR_BASE_FORMAT 4429 G1PPRL_BYTE_FORMAT 4430 G1PPRL_BYTE_FORMAT 4431 G1PPRL_BYTE_FORMAT 4432 G1PPRL_DOUBLE_FORMAT, 4433 type, bottom, end, 4434 used_bytes, prev_live_bytes, next_live_bytes, gc_eff); 4435 4436 return false; 4437 } 4438 4439 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4440 // Print the footer of the output. 4441 _out->print_cr(G1PPRL_LINE_PREFIX); 4442 _out->print_cr(G1PPRL_LINE_PREFIX 4443 " SUMMARY" 4444 G1PPRL_SUM_MB_FORMAT("capacity") 4445 G1PPRL_SUM_MB_PERC_FORMAT("used") 4446 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4447 G1PPRL_SUM_MB_PERC_FORMAT("next-live"), 4448 bytes_to_mb(_total_capacity_bytes), 4449 bytes_to_mb(_total_used_bytes), 4450 perc(_total_used_bytes, _total_capacity_bytes), 4451 bytes_to_mb(_total_prev_live_bytes), 4452 perc(_total_prev_live_bytes, _total_capacity_bytes), 4453 bytes_to_mb(_total_next_live_bytes), 4454 perc(_total_next_live_bytes, _total_capacity_bytes)); 4455 _out->cr(); 4456 }