1 /* 2 * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 32 #include "gc_implementation/g1/g1Log.hpp" 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 34 #include "gc_implementation/g1/g1RemSet.hpp" 35 #include "gc_implementation/g1/heapRegion.inline.hpp" 36 #include "gc_implementation/g1/heapRegionRemSet.hpp" 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 38 #include "gc_implementation/shared/vmGCOperations.hpp" 39 #include "memory/genOopClosures.inline.hpp" 40 #include "memory/referencePolicy.hpp" 41 #include "memory/resourceArea.hpp" 42 #include "oops/oop.inline.hpp" 43 #include "runtime/handles.inline.hpp" 44 #include "runtime/java.hpp" 45 #include "services/memTracker.hpp" 46 47 // Concurrent marking bit map wrapper 48 49 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) : 50 _bm((uintptr_t*)NULL,0), 51 _shifter(shifter) { 52 _bmStartWord = (HeapWord*)(rs.base()); 53 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 54 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 55 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 56 57 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC); 58 59 guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map"); 60 // For now we'll just commit all of the bit map up fromt. 61 // Later on we'll try to be more parsimonious with swap. 62 guarantee(_virtual_space.initialize(brs, brs.size()), 63 "couldn't reseve backing store for concurrent marking bit map"); 64 assert(_virtual_space.committed_size() == brs.size(), 65 "didn't reserve backing store for all of concurrent marking bit map?"); 66 _bm.set_map((uintptr_t*)_virtual_space.low()); 67 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 68 _bmWordSize, "inconsistency in bit map sizing"); 69 _bm.set_size(_bmWordSize >> _shifter); 70 } 71 72 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 73 HeapWord* limit) const { 74 // First we must round addr *up* to a possible object boundary. 75 addr = (HeapWord*)align_size_up((intptr_t)addr, 76 HeapWordSize << _shifter); 77 size_t addrOffset = heapWordToOffset(addr); 78 if (limit == NULL) { 79 limit = _bmStartWord + _bmWordSize; 80 } 81 size_t limitOffset = heapWordToOffset(limit); 82 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 83 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 84 assert(nextAddr >= addr, "get_next_one postcondition"); 85 assert(nextAddr == limit || isMarked(nextAddr), 86 "get_next_one postcondition"); 87 return nextAddr; 88 } 89 90 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 91 HeapWord* limit) const { 92 size_t addrOffset = heapWordToOffset(addr); 93 if (limit == NULL) { 94 limit = _bmStartWord + _bmWordSize; 95 } 96 size_t limitOffset = heapWordToOffset(limit); 97 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 98 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 99 assert(nextAddr >= addr, "get_next_one postcondition"); 100 assert(nextAddr == limit || !isMarked(nextAddr), 101 "get_next_one postcondition"); 102 return nextAddr; 103 } 104 105 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 106 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 107 return (int) (diff >> _shifter); 108 } 109 110 #ifndef PRODUCT 111 bool CMBitMapRO::covers(ReservedSpace rs) const { 112 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 113 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 114 "size inconsistency"); 115 return _bmStartWord == (HeapWord*)(rs.base()) && 116 _bmWordSize == rs.size()>>LogHeapWordSize; 117 } 118 #endif 119 120 void CMBitMap::clearAll() { 121 _bm.clear(); 122 return; 123 } 124 125 void CMBitMap::markRange(MemRegion mr) { 126 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 127 assert(!mr.is_empty(), "unexpected empty region"); 128 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 129 ((HeapWord *) mr.end())), 130 "markRange memory region end is not card aligned"); 131 // convert address range into offset range 132 _bm.at_put_range(heapWordToOffset(mr.start()), 133 heapWordToOffset(mr.end()), true); 134 } 135 136 void CMBitMap::clearRange(MemRegion mr) { 137 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 138 assert(!mr.is_empty(), "unexpected empty region"); 139 // convert address range into offset range 140 _bm.at_put_range(heapWordToOffset(mr.start()), 141 heapWordToOffset(mr.end()), false); 142 } 143 144 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 145 HeapWord* end_addr) { 146 HeapWord* start = getNextMarkedWordAddress(addr); 147 start = MIN2(start, end_addr); 148 HeapWord* end = getNextUnmarkedWordAddress(start); 149 end = MIN2(end, end_addr); 150 assert(start <= end, "Consistency check"); 151 MemRegion mr(start, end); 152 if (!mr.is_empty()) { 153 clearRange(mr); 154 } 155 return mr; 156 } 157 158 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 159 _base(NULL), _cm(cm) 160 #ifdef ASSERT 161 , _drain_in_progress(false) 162 , _drain_in_progress_yields(false) 163 #endif 164 {} 165 166 void CMMarkStack::allocate(size_t size) { 167 _base = NEW_C_HEAP_ARRAY(oop, size, mtGC); 168 if (_base == NULL) { 169 vm_exit_during_initialization("Failed to allocate CM region mark stack"); 170 } 171 _index = 0; 172 _capacity = (jint) size; 173 _saved_index = -1; 174 NOT_PRODUCT(_max_depth = 0); 175 } 176 177 CMMarkStack::~CMMarkStack() { 178 if (_base != NULL) { 179 FREE_C_HEAP_ARRAY(oop, _base, mtGC); 180 } 181 } 182 183 void CMMarkStack::par_push(oop ptr) { 184 while (true) { 185 if (isFull()) { 186 _overflow = true; 187 return; 188 } 189 // Otherwise... 190 jint index = _index; 191 jint next_index = index+1; 192 jint res = Atomic::cmpxchg(next_index, &_index, index); 193 if (res == index) { 194 _base[index] = ptr; 195 // Note that we don't maintain this atomically. We could, but it 196 // doesn't seem necessary. 197 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 198 return; 199 } 200 // Otherwise, we need to try again. 201 } 202 } 203 204 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 205 while (true) { 206 if (isFull()) { 207 _overflow = true; 208 return; 209 } 210 // Otherwise... 211 jint index = _index; 212 jint next_index = index + n; 213 if (next_index > _capacity) { 214 _overflow = true; 215 return; 216 } 217 jint res = Atomic::cmpxchg(next_index, &_index, index); 218 if (res == index) { 219 for (int i = 0; i < n; i++) { 220 int ind = index + i; 221 assert(ind < _capacity, "By overflow test above."); 222 _base[ind] = ptr_arr[i]; 223 } 224 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 225 return; 226 } 227 // Otherwise, we need to try again. 228 } 229 } 230 231 232 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 233 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 234 jint start = _index; 235 jint next_index = start + n; 236 if (next_index > _capacity) { 237 _overflow = true; 238 return; 239 } 240 // Otherwise. 241 _index = next_index; 242 for (int i = 0; i < n; i++) { 243 int ind = start + i; 244 assert(ind < _capacity, "By overflow test above."); 245 _base[ind] = ptr_arr[i]; 246 } 247 } 248 249 250 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 251 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 252 jint index = _index; 253 if (index == 0) { 254 *n = 0; 255 return false; 256 } else { 257 int k = MIN2(max, index); 258 jint new_ind = index - k; 259 for (int j = 0; j < k; j++) { 260 ptr_arr[j] = _base[new_ind + j]; 261 } 262 _index = new_ind; 263 *n = k; 264 return true; 265 } 266 } 267 268 template<class OopClosureClass> 269 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 270 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 271 || SafepointSynchronize::is_at_safepoint(), 272 "Drain recursion must be yield-safe."); 273 bool res = true; 274 debug_only(_drain_in_progress = true); 275 debug_only(_drain_in_progress_yields = yield_after); 276 while (!isEmpty()) { 277 oop newOop = pop(); 278 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 279 assert(newOop->is_oop(), "Expected an oop"); 280 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 281 "only grey objects on this stack"); 282 newOop->oop_iterate(cl); 283 if (yield_after && _cm->do_yield_check()) { 284 res = false; 285 break; 286 } 287 } 288 debug_only(_drain_in_progress = false); 289 return res; 290 } 291 292 void CMMarkStack::note_start_of_gc() { 293 assert(_saved_index == -1, 294 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 295 _saved_index = _index; 296 } 297 298 void CMMarkStack::note_end_of_gc() { 299 // This is intentionally a guarantee, instead of an assert. If we 300 // accidentally add something to the mark stack during GC, it 301 // will be a correctness issue so it's better if we crash. we'll 302 // only check this once per GC anyway, so it won't be a performance 303 // issue in any way. 304 guarantee(_saved_index == _index, 305 err_msg("saved index: %d index: %d", _saved_index, _index)); 306 _saved_index = -1; 307 } 308 309 void CMMarkStack::oops_do(OopClosure* f) { 310 assert(_saved_index == _index, 311 err_msg("saved index: %d index: %d", _saved_index, _index)); 312 for (int i = 0; i < _index; i += 1) { 313 f->do_oop(&_base[i]); 314 } 315 } 316 317 bool ConcurrentMark::not_yet_marked(oop obj) const { 318 return _g1h->is_obj_ill(obj); 319 } 320 321 CMRootRegions::CMRootRegions() : 322 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 323 _should_abort(false), _next_survivor(NULL) { } 324 325 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 326 _young_list = g1h->young_list(); 327 _cm = cm; 328 } 329 330 void CMRootRegions::prepare_for_scan() { 331 assert(!scan_in_progress(), "pre-condition"); 332 333 // Currently, only survivors can be root regions. 334 assert(_next_survivor == NULL, "pre-condition"); 335 _next_survivor = _young_list->first_survivor_region(); 336 _scan_in_progress = (_next_survivor != NULL); 337 _should_abort = false; 338 } 339 340 HeapRegion* CMRootRegions::claim_next() { 341 if (_should_abort) { 342 // If someone has set the should_abort flag, we return NULL to 343 // force the caller to bail out of their loop. 344 return NULL; 345 } 346 347 // Currently, only survivors can be root regions. 348 HeapRegion* res = _next_survivor; 349 if (res != NULL) { 350 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 351 // Read it again in case it changed while we were waiting for the lock. 352 res = _next_survivor; 353 if (res != NULL) { 354 if (res == _young_list->last_survivor_region()) { 355 // We just claimed the last survivor so store NULL to indicate 356 // that we're done. 357 _next_survivor = NULL; 358 } else { 359 _next_survivor = res->get_next_young_region(); 360 } 361 } else { 362 // Someone else claimed the last survivor while we were trying 363 // to take the lock so nothing else to do. 364 } 365 } 366 assert(res == NULL || res->is_survivor(), "post-condition"); 367 368 return res; 369 } 370 371 void CMRootRegions::scan_finished() { 372 assert(scan_in_progress(), "pre-condition"); 373 374 // Currently, only survivors can be root regions. 375 if (!_should_abort) { 376 assert(_next_survivor == NULL, "we should have claimed all survivors"); 377 } 378 _next_survivor = NULL; 379 380 { 381 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 382 _scan_in_progress = false; 383 RootRegionScan_lock->notify_all(); 384 } 385 } 386 387 bool CMRootRegions::wait_until_scan_finished() { 388 if (!scan_in_progress()) return false; 389 390 { 391 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 392 while (scan_in_progress()) { 393 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 394 } 395 } 396 return true; 397 } 398 399 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 400 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 401 #endif // _MSC_VER 402 403 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 404 return MAX2((n_par_threads + 2) / 4, 1U); 405 } 406 407 ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) : 408 _markBitMap1(rs, MinObjAlignment - 1), 409 _markBitMap2(rs, MinObjAlignment - 1), 410 411 _parallel_marking_threads(0), 412 _max_parallel_marking_threads(0), 413 _sleep_factor(0.0), 414 _marking_task_overhead(1.0), 415 _cleanup_sleep_factor(0.0), 416 _cleanup_task_overhead(1.0), 417 _cleanup_list("Cleanup List"), 418 _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/), 419 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 420 CardTableModRefBS::card_shift, 421 false /* in_resource_area*/), 422 423 _prevMarkBitMap(&_markBitMap1), 424 _nextMarkBitMap(&_markBitMap2), 425 426 _markStack(this), 427 // _finger set in set_non_marking_state 428 429 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)), 430 // _active_tasks set in set_non_marking_state 431 // _tasks set inside the constructor 432 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 433 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 434 435 _has_overflown(false), 436 _concurrent(false), 437 _has_aborted(false), 438 _restart_for_overflow(false), 439 _concurrent_marking_in_progress(false), 440 441 // _verbose_level set below 442 443 _init_times(), 444 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 445 _cleanup_times(), 446 _total_counting_time(0.0), 447 _total_rs_scrub_time(0.0), 448 449 _parallel_workers(NULL), 450 451 _count_card_bitmaps(NULL), 452 _count_marked_bytes(NULL) { 453 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 454 if (verbose_level < no_verbose) { 455 verbose_level = no_verbose; 456 } 457 if (verbose_level > high_verbose) { 458 verbose_level = high_verbose; 459 } 460 _verbose_level = verbose_level; 461 462 if (verbose_low()) { 463 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 464 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 465 } 466 467 _markStack.allocate(MarkStackSize); 468 469 // Create & start a ConcurrentMark thread. 470 _cmThread = new ConcurrentMarkThread(this); 471 assert(cmThread() != NULL, "CM Thread should have been created"); 472 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 473 474 _g1h = G1CollectedHeap::heap(); 475 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 476 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 477 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 478 479 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 480 satb_qs.set_buffer_size(G1SATBBufferSize); 481 482 _root_regions.init(_g1h, this); 483 484 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 485 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 486 487 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 488 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 489 490 BitMap::idx_t card_bm_size = _card_bm.size(); 491 492 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 493 _active_tasks = _max_worker_id; 494 for (uint i = 0; i < _max_worker_id; ++i) { 495 CMTaskQueue* task_queue = new CMTaskQueue(); 496 task_queue->initialize(); 497 _task_queues->register_queue(i, task_queue); 498 499 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 500 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC); 501 502 _tasks[i] = new CMTask(i, this, 503 _count_marked_bytes[i], 504 &_count_card_bitmaps[i], 505 task_queue, _task_queues); 506 507 _accum_task_vtime[i] = 0.0; 508 } 509 510 // Calculate the card number for the bottom of the heap. Used 511 // in biasing indexes into the accounting card bitmaps. 512 _heap_bottom_card_num = 513 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 514 CardTableModRefBS::card_shift); 515 516 // Clear all the liveness counting data 517 clear_all_count_data(); 518 519 if (ConcGCThreads > ParallelGCThreads) { 520 vm_exit_during_initialization("Can't have more ConcGCThreads " 521 "than ParallelGCThreads."); 522 } 523 if (ParallelGCThreads == 0) { 524 // if we are not running with any parallel GC threads we will not 525 // spawn any marking threads either 526 _parallel_marking_threads = 0; 527 _max_parallel_marking_threads = 0; 528 _sleep_factor = 0.0; 529 _marking_task_overhead = 1.0; 530 } else { 531 if (ConcGCThreads > 0) { 532 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 533 // if both are set 534 535 _parallel_marking_threads = (uint) ConcGCThreads; 536 _max_parallel_marking_threads = _parallel_marking_threads; 537 _sleep_factor = 0.0; 538 _marking_task_overhead = 1.0; 539 } else if (G1MarkingOverheadPercent > 0) { 540 // we will calculate the number of parallel marking threads 541 // based on a target overhead with respect to the soft real-time 542 // goal 543 544 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 545 double overall_cm_overhead = 546 (double) MaxGCPauseMillis * marking_overhead / 547 (double) GCPauseIntervalMillis; 548 double cpu_ratio = 1.0 / (double) os::processor_count(); 549 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 550 double marking_task_overhead = 551 overall_cm_overhead / marking_thread_num * 552 (double) os::processor_count(); 553 double sleep_factor = 554 (1.0 - marking_task_overhead) / marking_task_overhead; 555 556 _parallel_marking_threads = (uint) marking_thread_num; 557 _max_parallel_marking_threads = _parallel_marking_threads; 558 _sleep_factor = sleep_factor; 559 _marking_task_overhead = marking_task_overhead; 560 } else { 561 _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads); 562 _max_parallel_marking_threads = _parallel_marking_threads; 563 _sleep_factor = 0.0; 564 _marking_task_overhead = 1.0; 565 } 566 567 if (parallel_marking_threads() > 1) { 568 _cleanup_task_overhead = 1.0; 569 } else { 570 _cleanup_task_overhead = marking_task_overhead(); 571 } 572 _cleanup_sleep_factor = 573 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 574 575 #if 0 576 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 577 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 578 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 579 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 580 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 581 #endif 582 583 guarantee(parallel_marking_threads() > 0, "peace of mind"); 584 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 585 _max_parallel_marking_threads, false, true); 586 if (_parallel_workers == NULL) { 587 vm_exit_during_initialization("Failed necessary allocation."); 588 } else { 589 _parallel_workers->initialize_workers(); 590 } 591 } 592 593 // so that the call below can read a sensible value 594 _heap_start = (HeapWord*) rs.base(); 595 set_non_marking_state(); 596 } 597 598 void ConcurrentMark::update_g1_committed(bool force) { 599 // If concurrent marking is not in progress, then we do not need to 600 // update _heap_end. 601 if (!concurrent_marking_in_progress() && !force) return; 602 603 MemRegion committed = _g1h->g1_committed(); 604 assert(committed.start() == _heap_start, "start shouldn't change"); 605 HeapWord* new_end = committed.end(); 606 if (new_end > _heap_end) { 607 // The heap has been expanded. 608 609 _heap_end = new_end; 610 } 611 // Notice that the heap can also shrink. However, this only happens 612 // during a Full GC (at least currently) and the entire marking 613 // phase will bail out and the task will not be restarted. So, let's 614 // do nothing. 615 } 616 617 void ConcurrentMark::reset() { 618 // Starting values for these two. This should be called in a STW 619 // phase. CM will be notified of any future g1_committed expansions 620 // will be at the end of evacuation pauses, when tasks are 621 // inactive. 622 MemRegion committed = _g1h->g1_committed(); 623 _heap_start = committed.start(); 624 _heap_end = committed.end(); 625 626 // Separated the asserts so that we know which one fires. 627 assert(_heap_start != NULL, "heap bounds should look ok"); 628 assert(_heap_end != NULL, "heap bounds should look ok"); 629 assert(_heap_start < _heap_end, "heap bounds should look ok"); 630 631 // reset all the marking data structures and any necessary flags 632 clear_marking_state(); 633 634 if (verbose_low()) { 635 gclog_or_tty->print_cr("[global] resetting"); 636 } 637 638 // We do reset all of them, since different phases will use 639 // different number of active threads. So, it's easiest to have all 640 // of them ready. 641 for (uint i = 0; i < _max_worker_id; ++i) { 642 _tasks[i]->reset(_nextMarkBitMap); 643 } 644 645 // we need this to make sure that the flag is on during the evac 646 // pause with initial mark piggy-backed 647 set_concurrent_marking_in_progress(); 648 } 649 650 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) { 651 assert(active_tasks <= _max_worker_id, "we should not have more"); 652 653 _active_tasks = active_tasks; 654 // Need to update the three data structures below according to the 655 // number of active threads for this phase. 656 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 657 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 658 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 659 660 _concurrent = concurrent; 661 // We propagate this to all tasks, not just the active ones. 662 for (uint i = 0; i < _max_worker_id; ++i) 663 _tasks[i]->set_concurrent(concurrent); 664 665 if (concurrent) { 666 set_concurrent_marking_in_progress(); 667 } else { 668 // We currently assume that the concurrent flag has been set to 669 // false before we start remark. At this point we should also be 670 // in a STW phase. 671 assert(!concurrent_marking_in_progress(), "invariant"); 672 assert(_finger == _heap_end, "only way to get here"); 673 update_g1_committed(true); 674 } 675 } 676 677 void ConcurrentMark::set_non_marking_state() { 678 // We set the global marking state to some default values when we're 679 // not doing marking. 680 clear_marking_state(); 681 _active_tasks = 0; 682 clear_concurrent_marking_in_progress(); 683 } 684 685 ConcurrentMark::~ConcurrentMark() { 686 // The ConcurrentMark instance is never freed. 687 ShouldNotReachHere(); 688 } 689 690 void ConcurrentMark::clearNextBitmap() { 691 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 692 G1CollectorPolicy* g1p = g1h->g1_policy(); 693 694 // Make sure that the concurrent mark thread looks to still be in 695 // the current cycle. 696 guarantee(cmThread()->during_cycle(), "invariant"); 697 698 // We are finishing up the current cycle by clearing the next 699 // marking bitmap and getting it ready for the next cycle. During 700 // this time no other cycle can start. So, let's make sure that this 701 // is the case. 702 guarantee(!g1h->mark_in_progress(), "invariant"); 703 704 // clear the mark bitmap (no grey objects to start with). 705 // We need to do this in chunks and offer to yield in between 706 // each chunk. 707 HeapWord* start = _nextMarkBitMap->startWord(); 708 HeapWord* end = _nextMarkBitMap->endWord(); 709 HeapWord* cur = start; 710 size_t chunkSize = M; 711 while (cur < end) { 712 HeapWord* next = cur + chunkSize; 713 if (next > end) { 714 next = end; 715 } 716 MemRegion mr(cur,next); 717 _nextMarkBitMap->clearRange(mr); 718 cur = next; 719 do_yield_check(); 720 721 // Repeat the asserts from above. We'll do them as asserts here to 722 // minimize their overhead on the product. However, we'll have 723 // them as guarantees at the beginning / end of the bitmap 724 // clearing to get some checking in the product. 725 assert(cmThread()->during_cycle(), "invariant"); 726 assert(!g1h->mark_in_progress(), "invariant"); 727 } 728 729 // Clear the liveness counting data 730 clear_all_count_data(); 731 732 // Repeat the asserts from above. 733 guarantee(cmThread()->during_cycle(), "invariant"); 734 guarantee(!g1h->mark_in_progress(), "invariant"); 735 } 736 737 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 738 public: 739 bool doHeapRegion(HeapRegion* r) { 740 if (!r->continuesHumongous()) { 741 r->note_start_of_marking(); 742 } 743 return false; 744 } 745 }; 746 747 void ConcurrentMark::checkpointRootsInitialPre() { 748 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 749 G1CollectorPolicy* g1p = g1h->g1_policy(); 750 751 _has_aborted = false; 752 753 #ifndef PRODUCT 754 if (G1PrintReachableAtInitialMark) { 755 print_reachable("at-cycle-start", 756 VerifyOption_G1UsePrevMarking, true /* all */); 757 } 758 #endif 759 760 // Initialise marking structures. This has to be done in a STW phase. 761 reset(); 762 763 // For each region note start of marking. 764 NoteStartOfMarkHRClosure startcl; 765 g1h->heap_region_iterate(&startcl); 766 } 767 768 769 void ConcurrentMark::checkpointRootsInitialPost() { 770 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 771 772 // If we force an overflow during remark, the remark operation will 773 // actually abort and we'll restart concurrent marking. If we always 774 // force an oveflow during remark we'll never actually complete the 775 // marking phase. So, we initilize this here, at the start of the 776 // cycle, so that at the remaining overflow number will decrease at 777 // every remark and we'll eventually not need to cause one. 778 force_overflow_stw()->init(); 779 780 // Start Concurrent Marking weak-reference discovery. 781 ReferenceProcessor* rp = g1h->ref_processor_cm(); 782 // enable ("weak") refs discovery 783 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 784 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 785 786 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 787 // This is the start of the marking cycle, we're expected all 788 // threads to have SATB queues with active set to false. 789 satb_mq_set.set_active_all_threads(true, /* new active value */ 790 false /* expected_active */); 791 792 _root_regions.prepare_for_scan(); 793 794 // update_g1_committed() will be called at the end of an evac pause 795 // when marking is on. So, it's also called at the end of the 796 // initial-mark pause to update the heap end, if the heap expands 797 // during it. No need to call it here. 798 } 799 800 /* 801 * Notice that in the next two methods, we actually leave the STS 802 * during the barrier sync and join it immediately afterwards. If we 803 * do not do this, the following deadlock can occur: one thread could 804 * be in the barrier sync code, waiting for the other thread to also 805 * sync up, whereas another one could be trying to yield, while also 806 * waiting for the other threads to sync up too. 807 * 808 * Note, however, that this code is also used during remark and in 809 * this case we should not attempt to leave / enter the STS, otherwise 810 * we'll either hit an asseert (debug / fastdebug) or deadlock 811 * (product). So we should only leave / enter the STS if we are 812 * operating concurrently. 813 * 814 * Because the thread that does the sync barrier has left the STS, it 815 * is possible to be suspended for a Full GC or an evacuation pause 816 * could occur. This is actually safe, since the entering the sync 817 * barrier is one of the last things do_marking_step() does, and it 818 * doesn't manipulate any data structures afterwards. 819 */ 820 821 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 822 if (verbose_low()) { 823 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id); 824 } 825 826 if (concurrent()) { 827 ConcurrentGCThread::stsLeave(); 828 } 829 _first_overflow_barrier_sync.enter(); 830 if (concurrent()) { 831 ConcurrentGCThread::stsJoin(); 832 } 833 // at this point everyone should have synced up and not be doing any 834 // more work 835 836 if (verbose_low()) { 837 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); 838 } 839 840 // let the task associated with with worker 0 do this 841 if (worker_id == 0) { 842 // task 0 is responsible for clearing the global data structures 843 // We should be here because of an overflow. During STW we should 844 // not clear the overflow flag since we rely on it being true when 845 // we exit this method to abort the pause and restart concurent 846 // marking. 847 clear_marking_state(concurrent() /* clear_overflow */); 848 force_overflow()->update(); 849 850 if (G1Log::fine()) { 851 gclog_or_tty->date_stamp(PrintGCDateStamps); 852 gclog_or_tty->stamp(PrintGCTimeStamps); 853 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 854 } 855 } 856 857 // after this, each task should reset its own data structures then 858 // then go into the second barrier 859 } 860 861 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 862 if (verbose_low()) { 863 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id); 864 } 865 866 if (concurrent()) { 867 ConcurrentGCThread::stsLeave(); 868 } 869 _second_overflow_barrier_sync.enter(); 870 if (concurrent()) { 871 ConcurrentGCThread::stsJoin(); 872 } 873 // at this point everything should be re-initialised and ready to go 874 875 if (verbose_low()) { 876 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); 877 } 878 } 879 880 #ifndef PRODUCT 881 void ForceOverflowSettings::init() { 882 _num_remaining = G1ConcMarkForceOverflow; 883 _force = false; 884 update(); 885 } 886 887 void ForceOverflowSettings::update() { 888 if (_num_remaining > 0) { 889 _num_remaining -= 1; 890 _force = true; 891 } else { 892 _force = false; 893 } 894 } 895 896 bool ForceOverflowSettings::should_force() { 897 if (_force) { 898 _force = false; 899 return true; 900 } else { 901 return false; 902 } 903 } 904 #endif // !PRODUCT 905 906 class CMConcurrentMarkingTask: public AbstractGangTask { 907 private: 908 ConcurrentMark* _cm; 909 ConcurrentMarkThread* _cmt; 910 911 public: 912 void work(uint worker_id) { 913 assert(Thread::current()->is_ConcurrentGC_thread(), 914 "this should only be done by a conc GC thread"); 915 ResourceMark rm; 916 917 double start_vtime = os::elapsedVTime(); 918 919 ConcurrentGCThread::stsJoin(); 920 921 assert(worker_id < _cm->active_tasks(), "invariant"); 922 CMTask* the_task = _cm->task(worker_id); 923 the_task->record_start_time(); 924 if (!_cm->has_aborted()) { 925 do { 926 double start_vtime_sec = os::elapsedVTime(); 927 double start_time_sec = os::elapsedTime(); 928 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 929 930 the_task->do_marking_step(mark_step_duration_ms, 931 true /* do_stealing */, 932 true /* do_termination */); 933 934 double end_time_sec = os::elapsedTime(); 935 double end_vtime_sec = os::elapsedVTime(); 936 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 937 double elapsed_time_sec = end_time_sec - start_time_sec; 938 _cm->clear_has_overflown(); 939 940 bool ret = _cm->do_yield_check(worker_id); 941 942 jlong sleep_time_ms; 943 if (!_cm->has_aborted() && the_task->has_aborted()) { 944 sleep_time_ms = 945 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 946 ConcurrentGCThread::stsLeave(); 947 os::sleep(Thread::current(), sleep_time_ms, false); 948 ConcurrentGCThread::stsJoin(); 949 } 950 double end_time2_sec = os::elapsedTime(); 951 double elapsed_time2_sec = end_time2_sec - start_time_sec; 952 953 #if 0 954 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 955 "overhead %1.4lf", 956 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 957 the_task->conc_overhead(os::elapsedTime()) * 8.0); 958 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 959 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 960 #endif 961 } while (!_cm->has_aborted() && the_task->has_aborted()); 962 } 963 the_task->record_end_time(); 964 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 965 966 ConcurrentGCThread::stsLeave(); 967 968 double end_vtime = os::elapsedVTime(); 969 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 970 } 971 972 CMConcurrentMarkingTask(ConcurrentMark* cm, 973 ConcurrentMarkThread* cmt) : 974 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 975 976 ~CMConcurrentMarkingTask() { } 977 }; 978 979 // Calculates the number of active workers for a concurrent 980 // phase. 981 uint ConcurrentMark::calc_parallel_marking_threads() { 982 if (G1CollectedHeap::use_parallel_gc_threads()) { 983 uint n_conc_workers = 0; 984 if (!UseDynamicNumberOfGCThreads || 985 (!FLAG_IS_DEFAULT(ConcGCThreads) && 986 !ForceDynamicNumberOfGCThreads)) { 987 n_conc_workers = max_parallel_marking_threads(); 988 } else { 989 n_conc_workers = 990 AdaptiveSizePolicy::calc_default_active_workers( 991 max_parallel_marking_threads(), 992 1, /* Minimum workers */ 993 parallel_marking_threads(), 994 Threads::number_of_non_daemon_threads()); 995 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 996 // that scaling has already gone into "_max_parallel_marking_threads". 997 } 998 assert(n_conc_workers > 0, "Always need at least 1"); 999 return n_conc_workers; 1000 } 1001 // If we are not running with any parallel GC threads we will not 1002 // have spawned any marking threads either. Hence the number of 1003 // concurrent workers should be 0. 1004 return 0; 1005 } 1006 1007 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1008 // Currently, only survivors can be root regions. 1009 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1010 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1011 1012 const uintx interval = PrefetchScanIntervalInBytes; 1013 HeapWord* curr = hr->bottom(); 1014 const HeapWord* end = hr->top(); 1015 while (curr < end) { 1016 Prefetch::read(curr, interval); 1017 oop obj = oop(curr); 1018 int size = obj->oop_iterate(&cl); 1019 assert(size == obj->size(), "sanity"); 1020 curr += size; 1021 } 1022 } 1023 1024 class CMRootRegionScanTask : public AbstractGangTask { 1025 private: 1026 ConcurrentMark* _cm; 1027 1028 public: 1029 CMRootRegionScanTask(ConcurrentMark* cm) : 1030 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1031 1032 void work(uint worker_id) { 1033 assert(Thread::current()->is_ConcurrentGC_thread(), 1034 "this should only be done by a conc GC thread"); 1035 1036 CMRootRegions* root_regions = _cm->root_regions(); 1037 HeapRegion* hr = root_regions->claim_next(); 1038 while (hr != NULL) { 1039 _cm->scanRootRegion(hr, worker_id); 1040 hr = root_regions->claim_next(); 1041 } 1042 } 1043 }; 1044 1045 void ConcurrentMark::scanRootRegions() { 1046 // scan_in_progress() will have been set to true only if there was 1047 // at least one root region to scan. So, if it's false, we 1048 // should not attempt to do any further work. 1049 if (root_regions()->scan_in_progress()) { 1050 _parallel_marking_threads = calc_parallel_marking_threads(); 1051 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1052 "Maximum number of marking threads exceeded"); 1053 uint active_workers = MAX2(1U, parallel_marking_threads()); 1054 1055 CMRootRegionScanTask task(this); 1056 if (parallel_marking_threads() > 0) { 1057 _parallel_workers->set_active_workers((int) active_workers); 1058 _parallel_workers->run_task(&task); 1059 } else { 1060 task.work(0); 1061 } 1062 1063 // It's possible that has_aborted() is true here without actually 1064 // aborting the survivor scan earlier. This is OK as it's 1065 // mainly used for sanity checking. 1066 root_regions()->scan_finished(); 1067 } 1068 } 1069 1070 void ConcurrentMark::markFromRoots() { 1071 // we might be tempted to assert that: 1072 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1073 // "inconsistent argument?"); 1074 // However that wouldn't be right, because it's possible that 1075 // a safepoint is indeed in progress as a younger generation 1076 // stop-the-world GC happens even as we mark in this generation. 1077 1078 _restart_for_overflow = false; 1079 force_overflow_conc()->init(); 1080 1081 // _g1h has _n_par_threads 1082 _parallel_marking_threads = calc_parallel_marking_threads(); 1083 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1084 "Maximum number of marking threads exceeded"); 1085 1086 uint active_workers = MAX2(1U, parallel_marking_threads()); 1087 1088 // Parallel task terminator is set in "set_phase()" 1089 set_phase(active_workers, true /* concurrent */); 1090 1091 CMConcurrentMarkingTask markingTask(this, cmThread()); 1092 if (parallel_marking_threads() > 0) { 1093 _parallel_workers->set_active_workers((int)active_workers); 1094 // Don't set _n_par_threads because it affects MT in proceess_strong_roots() 1095 // and the decisions on that MT processing is made elsewhere. 1096 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1097 _parallel_workers->run_task(&markingTask); 1098 } else { 1099 markingTask.work(0); 1100 } 1101 print_stats(); 1102 } 1103 1104 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1105 // world is stopped at this checkpoint 1106 assert(SafepointSynchronize::is_at_safepoint(), 1107 "world should be stopped"); 1108 1109 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1110 1111 // If a full collection has happened, we shouldn't do this. 1112 if (has_aborted()) { 1113 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1114 return; 1115 } 1116 1117 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1118 1119 if (VerifyDuringGC) { 1120 HandleMark hm; // handle scope 1121 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1122 Universe::heap()->prepare_for_verify(); 1123 Universe::verify(/* silent */ false, 1124 /* option */ VerifyOption_G1UsePrevMarking); 1125 } 1126 1127 G1CollectorPolicy* g1p = g1h->g1_policy(); 1128 g1p->record_concurrent_mark_remark_start(); 1129 1130 double start = os::elapsedTime(); 1131 1132 checkpointRootsFinalWork(); 1133 1134 double mark_work_end = os::elapsedTime(); 1135 1136 weakRefsWork(clear_all_soft_refs); 1137 1138 if (has_overflown()) { 1139 // Oops. We overflowed. Restart concurrent marking. 1140 _restart_for_overflow = true; 1141 // Clear the flag. We do not need it any more. 1142 clear_has_overflown(); 1143 if (G1TraceMarkStackOverflow) { 1144 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1145 } 1146 } else { 1147 // Aggregate the per-task counting data that we have accumulated 1148 // while marking. 1149 aggregate_count_data(); 1150 1151 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1152 // We're done with marking. 1153 // This is the end of the marking cycle, we're expected all 1154 // threads to have SATB queues with active set to true. 1155 satb_mq_set.set_active_all_threads(false, /* new active value */ 1156 true /* expected_active */); 1157 1158 if (VerifyDuringGC) { 1159 HandleMark hm; // handle scope 1160 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1161 Universe::heap()->prepare_for_verify(); 1162 Universe::verify(/* silent */ false, 1163 /* option */ VerifyOption_G1UseNextMarking); 1164 } 1165 assert(!restart_for_overflow(), "sanity"); 1166 } 1167 1168 // Reset the marking state if marking completed 1169 if (!restart_for_overflow()) { 1170 set_non_marking_state(); 1171 } 1172 1173 #if VERIFY_OBJS_PROCESSED 1174 _scan_obj_cl.objs_processed = 0; 1175 ThreadLocalObjQueue::objs_enqueued = 0; 1176 #endif 1177 1178 // Statistics 1179 double now = os::elapsedTime(); 1180 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1181 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1182 _remark_times.add((now - start) * 1000.0); 1183 1184 g1p->record_concurrent_mark_remark_end(); 1185 } 1186 1187 // Base class of the closures that finalize and verify the 1188 // liveness counting data. 1189 class CMCountDataClosureBase: public HeapRegionClosure { 1190 protected: 1191 G1CollectedHeap* _g1h; 1192 ConcurrentMark* _cm; 1193 CardTableModRefBS* _ct_bs; 1194 1195 BitMap* _region_bm; 1196 BitMap* _card_bm; 1197 1198 // Takes a region that's not empty (i.e., it has at least one 1199 // live object in it and sets its corresponding bit on the region 1200 // bitmap to 1. If the region is "starts humongous" it will also set 1201 // to 1 the bits on the region bitmap that correspond to its 1202 // associated "continues humongous" regions. 1203 void set_bit_for_region(HeapRegion* hr) { 1204 assert(!hr->continuesHumongous(), "should have filtered those out"); 1205 1206 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1207 if (!hr->startsHumongous()) { 1208 // Normal (non-humongous) case: just set the bit. 1209 _region_bm->par_at_put(index, true); 1210 } else { 1211 // Starts humongous case: calculate how many regions are part of 1212 // this humongous region and then set the bit range. 1213 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1214 _region_bm->par_at_put_range(index, end_index, true); 1215 } 1216 } 1217 1218 public: 1219 CMCountDataClosureBase(G1CollectedHeap* g1h, 1220 BitMap* region_bm, BitMap* card_bm): 1221 _g1h(g1h), _cm(g1h->concurrent_mark()), 1222 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 1223 _region_bm(region_bm), _card_bm(card_bm) { } 1224 }; 1225 1226 // Closure that calculates the # live objects per region. Used 1227 // for verification purposes during the cleanup pause. 1228 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1229 CMBitMapRO* _bm; 1230 size_t _region_marked_bytes; 1231 1232 public: 1233 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1234 BitMap* region_bm, BitMap* card_bm) : 1235 CMCountDataClosureBase(g1h, region_bm, card_bm), 1236 _bm(bm), _region_marked_bytes(0) { } 1237 1238 bool doHeapRegion(HeapRegion* hr) { 1239 1240 if (hr->continuesHumongous()) { 1241 // We will ignore these here and process them when their 1242 // associated "starts humongous" region is processed (see 1243 // set_bit_for_heap_region()). Note that we cannot rely on their 1244 // associated "starts humongous" region to have their bit set to 1245 // 1 since, due to the region chunking in the parallel region 1246 // iteration, a "continues humongous" region might be visited 1247 // before its associated "starts humongous". 1248 return false; 1249 } 1250 1251 HeapWord* ntams = hr->next_top_at_mark_start(); 1252 HeapWord* start = hr->bottom(); 1253 1254 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1255 err_msg("Preconditions not met - " 1256 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT, 1257 start, ntams, hr->end())); 1258 1259 // Find the first marked object at or after "start". 1260 start = _bm->getNextMarkedWordAddress(start, ntams); 1261 1262 size_t marked_bytes = 0; 1263 1264 while (start < ntams) { 1265 oop obj = oop(start); 1266 int obj_sz = obj->size(); 1267 HeapWord* obj_end = start + obj_sz; 1268 1269 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1270 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1271 1272 // Note: if we're looking at the last region in heap - obj_end 1273 // could be actually just beyond the end of the heap; end_idx 1274 // will then correspond to a (non-existent) card that is also 1275 // just beyond the heap. 1276 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1277 // end of object is not card aligned - increment to cover 1278 // all the cards spanned by the object 1279 end_idx += 1; 1280 } 1281 1282 // Set the bits in the card BM for the cards spanned by this object. 1283 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1284 1285 // Add the size of this object to the number of marked bytes. 1286 marked_bytes += (size_t)obj_sz * HeapWordSize; 1287 1288 // Find the next marked object after this one. 1289 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1290 } 1291 1292 // Mark the allocated-since-marking portion... 1293 HeapWord* top = hr->top(); 1294 if (ntams < top) { 1295 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1296 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1297 1298 // Note: if we're looking at the last region in heap - top 1299 // could be actually just beyond the end of the heap; end_idx 1300 // will then correspond to a (non-existent) card that is also 1301 // just beyond the heap. 1302 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1303 // end of object is not card aligned - increment to cover 1304 // all the cards spanned by the object 1305 end_idx += 1; 1306 } 1307 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1308 1309 // This definitely means the region has live objects. 1310 set_bit_for_region(hr); 1311 } 1312 1313 // Update the live region bitmap. 1314 if (marked_bytes > 0) { 1315 set_bit_for_region(hr); 1316 } 1317 1318 // Set the marked bytes for the current region so that 1319 // it can be queried by a calling verificiation routine 1320 _region_marked_bytes = marked_bytes; 1321 1322 return false; 1323 } 1324 1325 size_t region_marked_bytes() const { return _region_marked_bytes; } 1326 }; 1327 1328 // Heap region closure used for verifying the counting data 1329 // that was accumulated concurrently and aggregated during 1330 // the remark pause. This closure is applied to the heap 1331 // regions during the STW cleanup pause. 1332 1333 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1334 G1CollectedHeap* _g1h; 1335 ConcurrentMark* _cm; 1336 CalcLiveObjectsClosure _calc_cl; 1337 BitMap* _region_bm; // Region BM to be verified 1338 BitMap* _card_bm; // Card BM to be verified 1339 bool _verbose; // verbose output? 1340 1341 BitMap* _exp_region_bm; // Expected Region BM values 1342 BitMap* _exp_card_bm; // Expected card BM values 1343 1344 int _failures; 1345 1346 public: 1347 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1348 BitMap* region_bm, 1349 BitMap* card_bm, 1350 BitMap* exp_region_bm, 1351 BitMap* exp_card_bm, 1352 bool verbose) : 1353 _g1h(g1h), _cm(g1h->concurrent_mark()), 1354 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1355 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1356 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1357 _failures(0) { } 1358 1359 int failures() const { return _failures; } 1360 1361 bool doHeapRegion(HeapRegion* hr) { 1362 if (hr->continuesHumongous()) { 1363 // We will ignore these here and process them when their 1364 // associated "starts humongous" region is processed (see 1365 // set_bit_for_heap_region()). Note that we cannot rely on their 1366 // associated "starts humongous" region to have their bit set to 1367 // 1 since, due to the region chunking in the parallel region 1368 // iteration, a "continues humongous" region might be visited 1369 // before its associated "starts humongous". 1370 return false; 1371 } 1372 1373 int failures = 0; 1374 1375 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1376 // this region and set the corresponding bits in the expected region 1377 // and card bitmaps. 1378 bool res = _calc_cl.doHeapRegion(hr); 1379 assert(res == false, "should be continuing"); 1380 1381 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1382 Mutex::_no_safepoint_check_flag); 1383 1384 // Verify the marked bytes for this region. 1385 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1386 size_t act_marked_bytes = hr->next_marked_bytes(); 1387 1388 // We're not OK if expected marked bytes > actual marked bytes. It means 1389 // we have missed accounting some objects during the actual marking. 1390 if (exp_marked_bytes > act_marked_bytes) { 1391 if (_verbose) { 1392 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1393 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1394 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1395 } 1396 failures += 1; 1397 } 1398 1399 // Verify the bit, for this region, in the actual and expected 1400 // (which was just calculated) region bit maps. 1401 // We're not OK if the bit in the calculated expected region 1402 // bitmap is set and the bit in the actual region bitmap is not. 1403 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1404 1405 bool expected = _exp_region_bm->at(index); 1406 bool actual = _region_bm->at(index); 1407 if (expected && !actual) { 1408 if (_verbose) { 1409 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1410 "expected: %s, actual: %s", 1411 hr->hrs_index(), 1412 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1413 } 1414 failures += 1; 1415 } 1416 1417 // Verify that the card bit maps for the cards spanned by the current 1418 // region match. We have an error if we have a set bit in the expected 1419 // bit map and the corresponding bit in the actual bitmap is not set. 1420 1421 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1422 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1423 1424 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1425 expected = _exp_card_bm->at(i); 1426 actual = _card_bm->at(i); 1427 1428 if (expected && !actual) { 1429 if (_verbose) { 1430 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1431 "expected: %s, actual: %s", 1432 hr->hrs_index(), i, 1433 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1434 } 1435 failures += 1; 1436 } 1437 } 1438 1439 if (failures > 0 && _verbose) { 1440 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1441 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1442 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(), 1443 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1444 } 1445 1446 _failures += failures; 1447 1448 // We could stop iteration over the heap when we 1449 // find the first violating region by returning true. 1450 return false; 1451 } 1452 }; 1453 1454 1455 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1456 protected: 1457 G1CollectedHeap* _g1h; 1458 ConcurrentMark* _cm; 1459 BitMap* _actual_region_bm; 1460 BitMap* _actual_card_bm; 1461 1462 uint _n_workers; 1463 1464 BitMap* _expected_region_bm; 1465 BitMap* _expected_card_bm; 1466 1467 int _failures; 1468 bool _verbose; 1469 1470 public: 1471 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1472 BitMap* region_bm, BitMap* card_bm, 1473 BitMap* expected_region_bm, BitMap* expected_card_bm) 1474 : AbstractGangTask("G1 verify final counting"), 1475 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1476 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1477 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1478 _failures(0), _verbose(false), 1479 _n_workers(0) { 1480 assert(VerifyDuringGC, "don't call this otherwise"); 1481 1482 // Use the value already set as the number of active threads 1483 // in the call to run_task(). 1484 if (G1CollectedHeap::use_parallel_gc_threads()) { 1485 assert( _g1h->workers()->active_workers() > 0, 1486 "Should have been previously set"); 1487 _n_workers = _g1h->workers()->active_workers(); 1488 } else { 1489 _n_workers = 1; 1490 } 1491 1492 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1493 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1494 1495 _verbose = _cm->verbose_medium(); 1496 } 1497 1498 void work(uint worker_id) { 1499 assert(worker_id < _n_workers, "invariant"); 1500 1501 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1502 _actual_region_bm, _actual_card_bm, 1503 _expected_region_bm, 1504 _expected_card_bm, 1505 _verbose); 1506 1507 if (G1CollectedHeap::use_parallel_gc_threads()) { 1508 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1509 worker_id, 1510 _n_workers, 1511 HeapRegion::VerifyCountClaimValue); 1512 } else { 1513 _g1h->heap_region_iterate(&verify_cl); 1514 } 1515 1516 Atomic::add(verify_cl.failures(), &_failures); 1517 } 1518 1519 int failures() const { return _failures; } 1520 }; 1521 1522 // Closure that finalizes the liveness counting data. 1523 // Used during the cleanup pause. 1524 // Sets the bits corresponding to the interval [NTAMS, top] 1525 // (which contains the implicitly live objects) in the 1526 // card liveness bitmap. Also sets the bit for each region, 1527 // containing live data, in the region liveness bitmap. 1528 1529 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1530 public: 1531 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1532 BitMap* region_bm, 1533 BitMap* card_bm) : 1534 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1535 1536 bool doHeapRegion(HeapRegion* hr) { 1537 1538 if (hr->continuesHumongous()) { 1539 // We will ignore these here and process them when their 1540 // associated "starts humongous" region is processed (see 1541 // set_bit_for_heap_region()). Note that we cannot rely on their 1542 // associated "starts humongous" region to have their bit set to 1543 // 1 since, due to the region chunking in the parallel region 1544 // iteration, a "continues humongous" region might be visited 1545 // before its associated "starts humongous". 1546 return false; 1547 } 1548 1549 HeapWord* ntams = hr->next_top_at_mark_start(); 1550 HeapWord* top = hr->top(); 1551 1552 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1553 1554 // Mark the allocated-since-marking portion... 1555 if (ntams < top) { 1556 // This definitely means the region has live objects. 1557 set_bit_for_region(hr); 1558 1559 // Now set the bits in the card bitmap for [ntams, top) 1560 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1561 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1562 1563 // Note: if we're looking at the last region in heap - top 1564 // could be actually just beyond the end of the heap; end_idx 1565 // will then correspond to a (non-existent) card that is also 1566 // just beyond the heap. 1567 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1568 // end of object is not card aligned - increment to cover 1569 // all the cards spanned by the object 1570 end_idx += 1; 1571 } 1572 1573 assert(end_idx <= _card_bm->size(), 1574 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1575 end_idx, _card_bm->size())); 1576 assert(start_idx < _card_bm->size(), 1577 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1578 start_idx, _card_bm->size())); 1579 1580 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1581 } 1582 1583 // Set the bit for the region if it contains live data 1584 if (hr->next_marked_bytes() > 0) { 1585 set_bit_for_region(hr); 1586 } 1587 1588 return false; 1589 } 1590 }; 1591 1592 class G1ParFinalCountTask: public AbstractGangTask { 1593 protected: 1594 G1CollectedHeap* _g1h; 1595 ConcurrentMark* _cm; 1596 BitMap* _actual_region_bm; 1597 BitMap* _actual_card_bm; 1598 1599 uint _n_workers; 1600 1601 public: 1602 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1603 : AbstractGangTask("G1 final counting"), 1604 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1605 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1606 _n_workers(0) { 1607 // Use the value already set as the number of active threads 1608 // in the call to run_task(). 1609 if (G1CollectedHeap::use_parallel_gc_threads()) { 1610 assert( _g1h->workers()->active_workers() > 0, 1611 "Should have been previously set"); 1612 _n_workers = _g1h->workers()->active_workers(); 1613 } else { 1614 _n_workers = 1; 1615 } 1616 } 1617 1618 void work(uint worker_id) { 1619 assert(worker_id < _n_workers, "invariant"); 1620 1621 FinalCountDataUpdateClosure final_update_cl(_g1h, 1622 _actual_region_bm, 1623 _actual_card_bm); 1624 1625 if (G1CollectedHeap::use_parallel_gc_threads()) { 1626 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1627 worker_id, 1628 _n_workers, 1629 HeapRegion::FinalCountClaimValue); 1630 } else { 1631 _g1h->heap_region_iterate(&final_update_cl); 1632 } 1633 } 1634 }; 1635 1636 class G1ParNoteEndTask; 1637 1638 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1639 G1CollectedHeap* _g1; 1640 int _worker_num; 1641 size_t _max_live_bytes; 1642 uint _regions_claimed; 1643 size_t _freed_bytes; 1644 FreeRegionList* _local_cleanup_list; 1645 OldRegionSet* _old_proxy_set; 1646 HumongousRegionSet* _humongous_proxy_set; 1647 HRRSCleanupTask* _hrrs_cleanup_task; 1648 double _claimed_region_time; 1649 double _max_region_time; 1650 1651 public: 1652 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1653 int worker_num, 1654 FreeRegionList* local_cleanup_list, 1655 OldRegionSet* old_proxy_set, 1656 HumongousRegionSet* humongous_proxy_set, 1657 HRRSCleanupTask* hrrs_cleanup_task) : 1658 _g1(g1), _worker_num(worker_num), 1659 _max_live_bytes(0), _regions_claimed(0), 1660 _freed_bytes(0), 1661 _claimed_region_time(0.0), _max_region_time(0.0), 1662 _local_cleanup_list(local_cleanup_list), 1663 _old_proxy_set(old_proxy_set), 1664 _humongous_proxy_set(humongous_proxy_set), 1665 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1666 1667 size_t freed_bytes() { return _freed_bytes; } 1668 1669 bool doHeapRegion(HeapRegion *hr) { 1670 if (hr->continuesHumongous()) { 1671 return false; 1672 } 1673 // We use a claim value of zero here because all regions 1674 // were claimed with value 1 in the FinalCount task. 1675 _g1->reset_gc_time_stamps(hr); 1676 double start = os::elapsedTime(); 1677 _regions_claimed++; 1678 hr->note_end_of_marking(); 1679 _max_live_bytes += hr->max_live_bytes(); 1680 _g1->free_region_if_empty(hr, 1681 &_freed_bytes, 1682 _local_cleanup_list, 1683 _old_proxy_set, 1684 _humongous_proxy_set, 1685 _hrrs_cleanup_task, 1686 true /* par */); 1687 double region_time = (os::elapsedTime() - start); 1688 _claimed_region_time += region_time; 1689 if (region_time > _max_region_time) { 1690 _max_region_time = region_time; 1691 } 1692 return false; 1693 } 1694 1695 size_t max_live_bytes() { return _max_live_bytes; } 1696 uint regions_claimed() { return _regions_claimed; } 1697 double claimed_region_time_sec() { return _claimed_region_time; } 1698 double max_region_time_sec() { return _max_region_time; } 1699 }; 1700 1701 class G1ParNoteEndTask: public AbstractGangTask { 1702 friend class G1NoteEndOfConcMarkClosure; 1703 1704 protected: 1705 G1CollectedHeap* _g1h; 1706 size_t _max_live_bytes; 1707 size_t _freed_bytes; 1708 FreeRegionList* _cleanup_list; 1709 1710 public: 1711 G1ParNoteEndTask(G1CollectedHeap* g1h, 1712 FreeRegionList* cleanup_list) : 1713 AbstractGangTask("G1 note end"), _g1h(g1h), 1714 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1715 1716 void work(uint worker_id) { 1717 double start = os::elapsedTime(); 1718 FreeRegionList local_cleanup_list("Local Cleanup List"); 1719 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set"); 1720 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1721 HRRSCleanupTask hrrs_cleanup_task; 1722 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list, 1723 &old_proxy_set, 1724 &humongous_proxy_set, 1725 &hrrs_cleanup_task); 1726 if (G1CollectedHeap::use_parallel_gc_threads()) { 1727 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1728 _g1h->workers()->active_workers(), 1729 HeapRegion::NoteEndClaimValue); 1730 } else { 1731 _g1h->heap_region_iterate(&g1_note_end); 1732 } 1733 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1734 1735 // Now update the lists 1736 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1737 NULL /* free_list */, 1738 &old_proxy_set, 1739 &humongous_proxy_set, 1740 true /* par */); 1741 { 1742 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1743 _max_live_bytes += g1_note_end.max_live_bytes(); 1744 _freed_bytes += g1_note_end.freed_bytes(); 1745 1746 // If we iterate over the global cleanup list at the end of 1747 // cleanup to do this printing we will not guarantee to only 1748 // generate output for the newly-reclaimed regions (the list 1749 // might not be empty at the beginning of cleanup; we might 1750 // still be working on its previous contents). So we do the 1751 // printing here, before we append the new regions to the global 1752 // cleanup list. 1753 1754 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1755 if (hr_printer->is_active()) { 1756 HeapRegionLinkedListIterator iter(&local_cleanup_list); 1757 while (iter.more_available()) { 1758 HeapRegion* hr = iter.get_next(); 1759 hr_printer->cleanup(hr); 1760 } 1761 } 1762 1763 _cleanup_list->add_as_tail(&local_cleanup_list); 1764 assert(local_cleanup_list.is_empty(), "post-condition"); 1765 1766 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1767 } 1768 } 1769 size_t max_live_bytes() { return _max_live_bytes; } 1770 size_t freed_bytes() { return _freed_bytes; } 1771 }; 1772 1773 class G1ParScrubRemSetTask: public AbstractGangTask { 1774 protected: 1775 G1RemSet* _g1rs; 1776 BitMap* _region_bm; 1777 BitMap* _card_bm; 1778 public: 1779 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1780 BitMap* region_bm, BitMap* card_bm) : 1781 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1782 _region_bm(region_bm), _card_bm(card_bm) { } 1783 1784 void work(uint worker_id) { 1785 if (G1CollectedHeap::use_parallel_gc_threads()) { 1786 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1787 HeapRegion::ScrubRemSetClaimValue); 1788 } else { 1789 _g1rs->scrub(_region_bm, _card_bm); 1790 } 1791 } 1792 1793 }; 1794 1795 void ConcurrentMark::cleanup() { 1796 // world is stopped at this checkpoint 1797 assert(SafepointSynchronize::is_at_safepoint(), 1798 "world should be stopped"); 1799 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1800 1801 // If a full collection has happened, we shouldn't do this. 1802 if (has_aborted()) { 1803 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1804 return; 1805 } 1806 1807 HRSPhaseSetter x(HRSPhaseCleanup); 1808 g1h->verify_region_sets_optional(); 1809 1810 if (VerifyDuringGC) { 1811 HandleMark hm; // handle scope 1812 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1813 Universe::heap()->prepare_for_verify(); 1814 Universe::verify(/* silent */ false, 1815 /* option */ VerifyOption_G1UsePrevMarking); 1816 } 1817 1818 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1819 g1p->record_concurrent_mark_cleanup_start(); 1820 1821 double start = os::elapsedTime(); 1822 1823 HeapRegionRemSet::reset_for_cleanup_tasks(); 1824 1825 uint n_workers; 1826 1827 // Do counting once more with the world stopped for good measure. 1828 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1829 1830 if (G1CollectedHeap::use_parallel_gc_threads()) { 1831 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 1832 "sanity check"); 1833 1834 g1h->set_par_threads(); 1835 n_workers = g1h->n_par_threads(); 1836 assert(g1h->n_par_threads() == n_workers, 1837 "Should not have been reset"); 1838 g1h->workers()->run_task(&g1_par_count_task); 1839 // Done with the parallel phase so reset to 0. 1840 g1h->set_par_threads(0); 1841 1842 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 1843 "sanity check"); 1844 } else { 1845 n_workers = 1; 1846 g1_par_count_task.work(0); 1847 } 1848 1849 if (VerifyDuringGC) { 1850 // Verify that the counting data accumulated during marking matches 1851 // that calculated by walking the marking bitmap. 1852 1853 // Bitmaps to hold expected values 1854 BitMap expected_region_bm(_region_bm.size(), false); 1855 BitMap expected_card_bm(_card_bm.size(), false); 1856 1857 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1858 &_region_bm, 1859 &_card_bm, 1860 &expected_region_bm, 1861 &expected_card_bm); 1862 1863 if (G1CollectedHeap::use_parallel_gc_threads()) { 1864 g1h->set_par_threads((int)n_workers); 1865 g1h->workers()->run_task(&g1_par_verify_task); 1866 // Done with the parallel phase so reset to 0. 1867 g1h->set_par_threads(0); 1868 1869 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 1870 "sanity check"); 1871 } else { 1872 g1_par_verify_task.work(0); 1873 } 1874 1875 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1876 } 1877 1878 size_t start_used_bytes = g1h->used(); 1879 g1h->set_marking_complete(); 1880 1881 double count_end = os::elapsedTime(); 1882 double this_final_counting_time = (count_end - start); 1883 _total_counting_time += this_final_counting_time; 1884 1885 if (G1PrintRegionLivenessInfo) { 1886 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1887 _g1h->heap_region_iterate(&cl); 1888 } 1889 1890 // Install newly created mark bitMap as "prev". 1891 swapMarkBitMaps(); 1892 1893 g1h->reset_gc_time_stamp(); 1894 1895 // Note end of marking in all heap regions. 1896 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 1897 if (G1CollectedHeap::use_parallel_gc_threads()) { 1898 g1h->set_par_threads((int)n_workers); 1899 g1h->workers()->run_task(&g1_par_note_end_task); 1900 g1h->set_par_threads(0); 1901 1902 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 1903 "sanity check"); 1904 } else { 1905 g1_par_note_end_task.work(0); 1906 } 1907 g1h->check_gc_time_stamps(); 1908 1909 if (!cleanup_list_is_empty()) { 1910 // The cleanup list is not empty, so we'll have to process it 1911 // concurrently. Notify anyone else that might be wanting free 1912 // regions that there will be more free regions coming soon. 1913 g1h->set_free_regions_coming(); 1914 } 1915 1916 // call below, since it affects the metric by which we sort the heap 1917 // regions. 1918 if (G1ScrubRemSets) { 1919 double rs_scrub_start = os::elapsedTime(); 1920 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1921 if (G1CollectedHeap::use_parallel_gc_threads()) { 1922 g1h->set_par_threads((int)n_workers); 1923 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1924 g1h->set_par_threads(0); 1925 1926 assert(g1h->check_heap_region_claim_values( 1927 HeapRegion::ScrubRemSetClaimValue), 1928 "sanity check"); 1929 } else { 1930 g1_par_scrub_rs_task.work(0); 1931 } 1932 1933 double rs_scrub_end = os::elapsedTime(); 1934 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1935 _total_rs_scrub_time += this_rs_scrub_time; 1936 } 1937 1938 // this will also free any regions totally full of garbage objects, 1939 // and sort the regions. 1940 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 1941 1942 // Statistics. 1943 double end = os::elapsedTime(); 1944 _cleanup_times.add((end - start) * 1000.0); 1945 1946 if (G1Log::fine()) { 1947 g1h->print_size_transition(gclog_or_tty, 1948 start_used_bytes, 1949 g1h->used(), 1950 g1h->capacity()); 1951 } 1952 1953 // Clean up will have freed any regions completely full of garbage. 1954 // Update the soft reference policy with the new heap occupancy. 1955 Universe::update_heap_info_at_gc(); 1956 1957 // We need to make this be a "collection" so any collection pause that 1958 // races with it goes around and waits for completeCleanup to finish. 1959 g1h->increment_total_collections(); 1960 1961 // We reclaimed old regions so we should calculate the sizes to make 1962 // sure we update the old gen/space data. 1963 g1h->g1mm()->update_sizes(); 1964 1965 if (VerifyDuringGC) { 1966 HandleMark hm; // handle scope 1967 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1968 Universe::heap()->prepare_for_verify(); 1969 Universe::verify(/* silent */ false, 1970 /* option */ VerifyOption_G1UsePrevMarking); 1971 } 1972 1973 g1h->verify_region_sets_optional(); 1974 } 1975 1976 void ConcurrentMark::completeCleanup() { 1977 if (has_aborted()) return; 1978 1979 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1980 1981 _cleanup_list.verify_optional(); 1982 FreeRegionList tmp_free_list("Tmp Free List"); 1983 1984 if (G1ConcRegionFreeingVerbose) { 1985 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1986 "cleanup list has %u entries", 1987 _cleanup_list.length()); 1988 } 1989 1990 // Noone else should be accessing the _cleanup_list at this point, 1991 // so it's not necessary to take any locks 1992 while (!_cleanup_list.is_empty()) { 1993 HeapRegion* hr = _cleanup_list.remove_head(); 1994 assert(hr != NULL, "the list was not empty"); 1995 hr->par_clear(); 1996 tmp_free_list.add_as_tail(hr); 1997 1998 // Instead of adding one region at a time to the secondary_free_list, 1999 // we accumulate them in the local list and move them a few at a 2000 // time. This also cuts down on the number of notify_all() calls 2001 // we do during this process. We'll also append the local list when 2002 // _cleanup_list is empty (which means we just removed the last 2003 // region from the _cleanup_list). 2004 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2005 _cleanup_list.is_empty()) { 2006 if (G1ConcRegionFreeingVerbose) { 2007 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2008 "appending %u entries to the secondary_free_list, " 2009 "cleanup list still has %u entries", 2010 tmp_free_list.length(), 2011 _cleanup_list.length()); 2012 } 2013 2014 { 2015 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2016 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 2017 SecondaryFreeList_lock->notify_all(); 2018 } 2019 2020 if (G1StressConcRegionFreeing) { 2021 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2022 os::sleep(Thread::current(), (jlong) 1, false); 2023 } 2024 } 2025 } 2026 } 2027 assert(tmp_free_list.is_empty(), "post-condition"); 2028 } 2029 2030 // Support closures for reference procssing in G1 2031 2032 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2033 HeapWord* addr = (HeapWord*)obj; 2034 return addr != NULL && 2035 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2036 } 2037 2038 class G1CMKeepAliveClosure: public ExtendedOopClosure { 2039 G1CollectedHeap* _g1; 2040 ConcurrentMark* _cm; 2041 public: 2042 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) : 2043 _g1(g1), _cm(cm) { 2044 assert(Thread::current()->is_VM_thread(), "otherwise fix worker id"); 2045 } 2046 2047 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2048 virtual void do_oop( oop* p) { do_oop_work(p); } 2049 2050 template <class T> void do_oop_work(T* p) { 2051 oop obj = oopDesc::load_decode_heap_oop(p); 2052 HeapWord* addr = (HeapWord*)obj; 2053 2054 if (_cm->verbose_high()) { 2055 gclog_or_tty->print_cr("\t[0] we're looking at location " 2056 "*"PTR_FORMAT" = "PTR_FORMAT, 2057 p, (void*) obj); 2058 } 2059 2060 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) { 2061 _cm->mark_and_count(obj); 2062 _cm->mark_stack_push(obj); 2063 } 2064 } 2065 }; 2066 2067 class G1CMDrainMarkingStackClosure: public VoidClosure { 2068 ConcurrentMark* _cm; 2069 CMMarkStack* _markStack; 2070 G1CMKeepAliveClosure* _oopClosure; 2071 public: 2072 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack, 2073 G1CMKeepAliveClosure* oopClosure) : 2074 _cm(cm), 2075 _markStack(markStack), 2076 _oopClosure(oopClosure) { } 2077 2078 void do_void() { 2079 _markStack->drain(_oopClosure, _cm->nextMarkBitMap(), false); 2080 } 2081 }; 2082 2083 // 'Keep Alive' closure used by parallel reference processing. 2084 // An instance of this closure is used in the parallel reference processing 2085 // code rather than an instance of G1CMKeepAliveClosure. We could have used 2086 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are 2087 // placed on to discovered ref lists once so we can mark and push with no 2088 // need to check whether the object has already been marked. Using the 2089 // G1CMKeepAliveClosure would mean, however, having all the worker threads 2090 // operating on the global mark stack. This means that an individual 2091 // worker would be doing lock-free pushes while it processes its own 2092 // discovered ref list followed by drain call. If the discovered ref lists 2093 // are unbalanced then this could cause interference with the other 2094 // workers. Using a CMTask (and its embedded local data structures) 2095 // avoids that potential interference. 2096 class G1CMParKeepAliveAndDrainClosure: public OopClosure { 2097 ConcurrentMark* _cm; 2098 CMTask* _task; 2099 int _ref_counter_limit; 2100 int _ref_counter; 2101 public: 2102 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) : 2103 _cm(cm), _task(task), 2104 _ref_counter_limit(G1RefProcDrainInterval) { 2105 assert(_ref_counter_limit > 0, "sanity"); 2106 _ref_counter = _ref_counter_limit; 2107 } 2108 2109 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2110 virtual void do_oop( oop* p) { do_oop_work(p); } 2111 2112 template <class T> void do_oop_work(T* p) { 2113 if (!_cm->has_overflown()) { 2114 oop obj = oopDesc::load_decode_heap_oop(p); 2115 if (_cm->verbose_high()) { 2116 gclog_or_tty->print_cr("\t[%u] we're looking at location " 2117 "*"PTR_FORMAT" = "PTR_FORMAT, 2118 _task->worker_id(), p, (void*) obj); 2119 } 2120 2121 _task->deal_with_reference(obj); 2122 _ref_counter--; 2123 2124 if (_ref_counter == 0) { 2125 // We have dealt with _ref_counter_limit references, pushing them and objects 2126 // reachable from them on to the local stack (and possibly the global stack). 2127 // Call do_marking_step() to process these entries. We call the routine in a 2128 // loop, which we'll exit if there's nothing more to do (i.e. we're done 2129 // with the entries that we've pushed as a result of the deal_with_reference 2130 // calls above) or we overflow. 2131 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2132 // while there may still be some work to do. (See the comment at the 2133 // beginning of CMTask::do_marking_step() for those conditions - one of which 2134 // is reaching the specified time target.) It is only when 2135 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2136 // that the marking has completed. 2137 do { 2138 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2139 _task->do_marking_step(mark_step_duration_ms, 2140 false /* do_stealing */, 2141 false /* do_termination */); 2142 } while (_task->has_aborted() && !_cm->has_overflown()); 2143 _ref_counter = _ref_counter_limit; 2144 } 2145 } else { 2146 if (_cm->verbose_high()) { 2147 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id()); 2148 } 2149 } 2150 } 2151 }; 2152 2153 class G1CMParDrainMarkingStackClosure: public VoidClosure { 2154 ConcurrentMark* _cm; 2155 CMTask* _task; 2156 public: 2157 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) : 2158 _cm(cm), _task(task) { } 2159 2160 void do_void() { 2161 do { 2162 if (_cm->verbose_high()) { 2163 gclog_or_tty->print_cr("\t[%u] Drain: Calling do marking_step", 2164 _task->worker_id()); 2165 } 2166 2167 // We call CMTask::do_marking_step() to completely drain the local and 2168 // global marking stacks. The routine is called in a loop, which we'll 2169 // exit if there's nothing more to do (i.e. we'completely drained the 2170 // entries that were pushed as a result of applying the 2171 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref 2172 // lists above) or we overflow the global marking stack. 2173 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2174 // while there may still be some work to do. (See the comment at the 2175 // beginning of CMTask::do_marking_step() for those conditions - one of which 2176 // is reaching the specified time target.) It is only when 2177 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2178 // that the marking has completed. 2179 2180 _task->do_marking_step(1000000000.0 /* something very large */, 2181 true /* do_stealing */, 2182 true /* do_termination */); 2183 } while (_task->has_aborted() && !_cm->has_overflown()); 2184 } 2185 }; 2186 2187 // Implementation of AbstractRefProcTaskExecutor for parallel 2188 // reference processing at the end of G1 concurrent marking 2189 2190 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2191 private: 2192 G1CollectedHeap* _g1h; 2193 ConcurrentMark* _cm; 2194 WorkGang* _workers; 2195 int _active_workers; 2196 2197 public: 2198 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2199 ConcurrentMark* cm, 2200 WorkGang* workers, 2201 int n_workers) : 2202 _g1h(g1h), _cm(cm), 2203 _workers(workers), _active_workers(n_workers) { } 2204 2205 // Executes the given task using concurrent marking worker threads. 2206 virtual void execute(ProcessTask& task); 2207 virtual void execute(EnqueueTask& task); 2208 }; 2209 2210 class G1CMRefProcTaskProxy: public AbstractGangTask { 2211 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2212 ProcessTask& _proc_task; 2213 G1CollectedHeap* _g1h; 2214 ConcurrentMark* _cm; 2215 2216 public: 2217 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2218 G1CollectedHeap* g1h, 2219 ConcurrentMark* cm) : 2220 AbstractGangTask("Process reference objects in parallel"), 2221 _proc_task(proc_task), _g1h(g1h), _cm(cm) { } 2222 2223 virtual void work(uint worker_id) { 2224 CMTask* marking_task = _cm->task(worker_id); 2225 G1CMIsAliveClosure g1_is_alive(_g1h); 2226 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task); 2227 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task); 2228 2229 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2230 } 2231 }; 2232 2233 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2234 assert(_workers != NULL, "Need parallel worker threads."); 2235 2236 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2237 2238 // We need to reset the phase for each task execution so that 2239 // the termination protocol of CMTask::do_marking_step works. 2240 _cm->set_phase(_active_workers, false /* concurrent */); 2241 _g1h->set_par_threads(_active_workers); 2242 _workers->run_task(&proc_task_proxy); 2243 _g1h->set_par_threads(0); 2244 } 2245 2246 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2247 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2248 EnqueueTask& _enq_task; 2249 2250 public: 2251 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2252 AbstractGangTask("Enqueue reference objects in parallel"), 2253 _enq_task(enq_task) { } 2254 2255 virtual void work(uint worker_id) { 2256 _enq_task.work(worker_id); 2257 } 2258 }; 2259 2260 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2261 assert(_workers != NULL, "Need parallel worker threads."); 2262 2263 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2264 2265 _g1h->set_par_threads(_active_workers); 2266 _workers->run_task(&enq_task_proxy); 2267 _g1h->set_par_threads(0); 2268 } 2269 2270 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2271 ResourceMark rm; 2272 HandleMark hm; 2273 2274 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2275 2276 // Is alive closure. 2277 G1CMIsAliveClosure g1_is_alive(g1h); 2278 2279 // Inner scope to exclude the cleaning of the string and symbol 2280 // tables from the displayed time. 2281 { 2282 if (G1Log::finer()) { 2283 gclog_or_tty->put(' '); 2284 } 2285 TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty); 2286 2287 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2288 2289 // See the comment in G1CollectedHeap::ref_processing_init() 2290 // about how reference processing currently works in G1. 2291 2292 // Process weak references. 2293 rp->setup_policy(clear_all_soft_refs); 2294 assert(_markStack.isEmpty(), "mark stack should be empty"); 2295 2296 G1CMKeepAliveClosure g1_keep_alive(g1h, this); 2297 G1CMDrainMarkingStackClosure 2298 g1_drain_mark_stack(this, &_markStack, &g1_keep_alive); 2299 2300 // We use the work gang from the G1CollectedHeap and we utilize all 2301 // the worker threads. 2302 uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U; 2303 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2304 2305 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2306 g1h->workers(), active_workers); 2307 2308 if (rp->processing_is_mt()) { 2309 // Set the degree of MT here. If the discovery is done MT, there 2310 // may have been a different number of threads doing the discovery 2311 // and a different number of discovered lists may have Ref objects. 2312 // That is OK as long as the Reference lists are balanced (see 2313 // balance_all_queues() and balance_queues()). 2314 rp->set_active_mt_degree(active_workers); 2315 2316 rp->process_discovered_references(&g1_is_alive, 2317 &g1_keep_alive, 2318 &g1_drain_mark_stack, 2319 &par_task_executor); 2320 2321 // The work routines of the parallel keep_alive and drain_marking_stack 2322 // will set the has_overflown flag if we overflow the global marking 2323 // stack. 2324 } else { 2325 rp->process_discovered_references(&g1_is_alive, 2326 &g1_keep_alive, 2327 &g1_drain_mark_stack, 2328 NULL); 2329 } 2330 2331 assert(_markStack.overflow() || _markStack.isEmpty(), 2332 "mark stack should be empty (unless it overflowed)"); 2333 if (_markStack.overflow()) { 2334 // Should have been done already when we tried to push an 2335 // entry on to the global mark stack. But let's do it again. 2336 set_has_overflown(); 2337 } 2338 2339 if (rp->processing_is_mt()) { 2340 assert(rp->num_q() == active_workers, "why not"); 2341 rp->enqueue_discovered_references(&par_task_executor); 2342 } else { 2343 rp->enqueue_discovered_references(); 2344 } 2345 2346 rp->verify_no_references_recorded(); 2347 assert(!rp->discovery_enabled(), "Post condition"); 2348 } 2349 2350 // Now clean up stale oops in StringTable 2351 StringTable::unlink(&g1_is_alive); 2352 // Clean up unreferenced symbols in symbol table. 2353 SymbolTable::unlink(); 2354 } 2355 2356 void ConcurrentMark::swapMarkBitMaps() { 2357 CMBitMapRO* temp = _prevMarkBitMap; 2358 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2359 _nextMarkBitMap = (CMBitMap*) temp; 2360 } 2361 2362 class CMRemarkTask: public AbstractGangTask { 2363 private: 2364 ConcurrentMark *_cm; 2365 2366 public: 2367 void work(uint worker_id) { 2368 // Since all available tasks are actually started, we should 2369 // only proceed if we're supposed to be actived. 2370 if (worker_id < _cm->active_tasks()) { 2371 CMTask* task = _cm->task(worker_id); 2372 task->record_start_time(); 2373 do { 2374 task->do_marking_step(1000000000.0 /* something very large */, 2375 true /* do_stealing */, 2376 true /* do_termination */); 2377 } while (task->has_aborted() && !_cm->has_overflown()); 2378 // If we overflow, then we do not want to restart. We instead 2379 // want to abort remark and do concurrent marking again. 2380 task->record_end_time(); 2381 } 2382 } 2383 2384 CMRemarkTask(ConcurrentMark* cm, int active_workers) : 2385 AbstractGangTask("Par Remark"), _cm(cm) { 2386 _cm->terminator()->reset_for_reuse(active_workers); 2387 } 2388 }; 2389 2390 void ConcurrentMark::checkpointRootsFinalWork() { 2391 ResourceMark rm; 2392 HandleMark hm; 2393 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2394 2395 g1h->ensure_parsability(false); 2396 2397 if (G1CollectedHeap::use_parallel_gc_threads()) { 2398 G1CollectedHeap::StrongRootsScope srs(g1h); 2399 // this is remark, so we'll use up all active threads 2400 uint active_workers = g1h->workers()->active_workers(); 2401 if (active_workers == 0) { 2402 assert(active_workers > 0, "Should have been set earlier"); 2403 active_workers = (uint) ParallelGCThreads; 2404 g1h->workers()->set_active_workers(active_workers); 2405 } 2406 set_phase(active_workers, false /* concurrent */); 2407 // Leave _parallel_marking_threads at it's 2408 // value originally calculated in the ConcurrentMark 2409 // constructor and pass values of the active workers 2410 // through the gang in the task. 2411 2412 CMRemarkTask remarkTask(this, active_workers); 2413 g1h->set_par_threads(active_workers); 2414 g1h->workers()->run_task(&remarkTask); 2415 g1h->set_par_threads(0); 2416 } else { 2417 G1CollectedHeap::StrongRootsScope srs(g1h); 2418 // this is remark, so we'll use up all available threads 2419 uint active_workers = 1; 2420 set_phase(active_workers, false /* concurrent */); 2421 2422 CMRemarkTask remarkTask(this, active_workers); 2423 // We will start all available threads, even if we decide that the 2424 // active_workers will be fewer. The extra ones will just bail out 2425 // immediately. 2426 remarkTask.work(0); 2427 } 2428 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2429 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2430 2431 print_stats(); 2432 2433 #if VERIFY_OBJS_PROCESSED 2434 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2435 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2436 _scan_obj_cl.objs_processed, 2437 ThreadLocalObjQueue::objs_enqueued); 2438 guarantee(_scan_obj_cl.objs_processed == 2439 ThreadLocalObjQueue::objs_enqueued, 2440 "Different number of objs processed and enqueued."); 2441 } 2442 #endif 2443 } 2444 2445 #ifndef PRODUCT 2446 2447 class PrintReachableOopClosure: public OopClosure { 2448 private: 2449 G1CollectedHeap* _g1h; 2450 outputStream* _out; 2451 VerifyOption _vo; 2452 bool _all; 2453 2454 public: 2455 PrintReachableOopClosure(outputStream* out, 2456 VerifyOption vo, 2457 bool all) : 2458 _g1h(G1CollectedHeap::heap()), 2459 _out(out), _vo(vo), _all(all) { } 2460 2461 void do_oop(narrowOop* p) { do_oop_work(p); } 2462 void do_oop( oop* p) { do_oop_work(p); } 2463 2464 template <class T> void do_oop_work(T* p) { 2465 oop obj = oopDesc::load_decode_heap_oop(p); 2466 const char* str = NULL; 2467 const char* str2 = ""; 2468 2469 if (obj == NULL) { 2470 str = ""; 2471 } else if (!_g1h->is_in_g1_reserved(obj)) { 2472 str = " O"; 2473 } else { 2474 HeapRegion* hr = _g1h->heap_region_containing(obj); 2475 guarantee(hr != NULL, "invariant"); 2476 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2477 bool marked = _g1h->is_marked(obj, _vo); 2478 2479 if (over_tams) { 2480 str = " >"; 2481 if (marked) { 2482 str2 = " AND MARKED"; 2483 } 2484 } else if (marked) { 2485 str = " M"; 2486 } else { 2487 str = " NOT"; 2488 } 2489 } 2490 2491 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2492 p, (void*) obj, str, str2); 2493 } 2494 }; 2495 2496 class PrintReachableObjectClosure : public ObjectClosure { 2497 private: 2498 G1CollectedHeap* _g1h; 2499 outputStream* _out; 2500 VerifyOption _vo; 2501 bool _all; 2502 HeapRegion* _hr; 2503 2504 public: 2505 PrintReachableObjectClosure(outputStream* out, 2506 VerifyOption vo, 2507 bool all, 2508 HeapRegion* hr) : 2509 _g1h(G1CollectedHeap::heap()), 2510 _out(out), _vo(vo), _all(all), _hr(hr) { } 2511 2512 void do_object(oop o) { 2513 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2514 bool marked = _g1h->is_marked(o, _vo); 2515 bool print_it = _all || over_tams || marked; 2516 2517 if (print_it) { 2518 _out->print_cr(" "PTR_FORMAT"%s", 2519 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2520 PrintReachableOopClosure oopCl(_out, _vo, _all); 2521 o->oop_iterate_no_header(&oopCl); 2522 } 2523 } 2524 }; 2525 2526 class PrintReachableRegionClosure : public HeapRegionClosure { 2527 private: 2528 G1CollectedHeap* _g1h; 2529 outputStream* _out; 2530 VerifyOption _vo; 2531 bool _all; 2532 2533 public: 2534 bool doHeapRegion(HeapRegion* hr) { 2535 HeapWord* b = hr->bottom(); 2536 HeapWord* e = hr->end(); 2537 HeapWord* t = hr->top(); 2538 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2539 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2540 "TAMS: "PTR_FORMAT, b, e, t, p); 2541 _out->cr(); 2542 2543 HeapWord* from = b; 2544 HeapWord* to = t; 2545 2546 if (to > from) { 2547 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2548 _out->cr(); 2549 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2550 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2551 _out->cr(); 2552 } 2553 2554 return false; 2555 } 2556 2557 PrintReachableRegionClosure(outputStream* out, 2558 VerifyOption vo, 2559 bool all) : 2560 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2561 }; 2562 2563 void ConcurrentMark::print_reachable(const char* str, 2564 VerifyOption vo, 2565 bool all) { 2566 gclog_or_tty->cr(); 2567 gclog_or_tty->print_cr("== Doing heap dump... "); 2568 2569 if (G1PrintReachableBaseFile == NULL) { 2570 gclog_or_tty->print_cr(" #### error: no base file defined"); 2571 return; 2572 } 2573 2574 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2575 (JVM_MAXPATHLEN - 1)) { 2576 gclog_or_tty->print_cr(" #### error: file name too long"); 2577 return; 2578 } 2579 2580 char file_name[JVM_MAXPATHLEN]; 2581 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2582 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2583 2584 fileStream fout(file_name); 2585 if (!fout.is_open()) { 2586 gclog_or_tty->print_cr(" #### error: could not open file"); 2587 return; 2588 } 2589 2590 outputStream* out = &fout; 2591 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2592 out->cr(); 2593 2594 out->print_cr("--- ITERATING OVER REGIONS"); 2595 out->cr(); 2596 PrintReachableRegionClosure rcl(out, vo, all); 2597 _g1h->heap_region_iterate(&rcl); 2598 out->cr(); 2599 2600 gclog_or_tty->print_cr(" done"); 2601 gclog_or_tty->flush(); 2602 } 2603 2604 #endif // PRODUCT 2605 2606 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2607 // Note we are overriding the read-only view of the prev map here, via 2608 // the cast. 2609 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2610 } 2611 2612 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2613 _nextMarkBitMap->clearRange(mr); 2614 } 2615 2616 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2617 clearRangePrevBitmap(mr); 2618 clearRangeNextBitmap(mr); 2619 } 2620 2621 HeapRegion* 2622 ConcurrentMark::claim_region(uint worker_id) { 2623 // "checkpoint" the finger 2624 HeapWord* finger = _finger; 2625 2626 // _heap_end will not change underneath our feet; it only changes at 2627 // yield points. 2628 while (finger < _heap_end) { 2629 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2630 2631 // Note on how this code handles humongous regions. In the 2632 // normal case the finger will reach the start of a "starts 2633 // humongous" (SH) region. Its end will either be the end of the 2634 // last "continues humongous" (CH) region in the sequence, or the 2635 // standard end of the SH region (if the SH is the only region in 2636 // the sequence). That way claim_region() will skip over the CH 2637 // regions. However, there is a subtle race between a CM thread 2638 // executing this method and a mutator thread doing a humongous 2639 // object allocation. The two are not mutually exclusive as the CM 2640 // thread does not need to hold the Heap_lock when it gets 2641 // here. So there is a chance that claim_region() will come across 2642 // a free region that's in the progress of becoming a SH or a CH 2643 // region. In the former case, it will either 2644 // a) Miss the update to the region's end, in which case it will 2645 // visit every subsequent CH region, will find their bitmaps 2646 // empty, and do nothing, or 2647 // b) Will observe the update of the region's end (in which case 2648 // it will skip the subsequent CH regions). 2649 // If it comes across a region that suddenly becomes CH, the 2650 // scenario will be similar to b). So, the race between 2651 // claim_region() and a humongous object allocation might force us 2652 // to do a bit of unnecessary work (due to some unnecessary bitmap 2653 // iterations) but it should not introduce and correctness issues. 2654 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2655 HeapWord* bottom = curr_region->bottom(); 2656 HeapWord* end = curr_region->end(); 2657 HeapWord* limit = curr_region->next_top_at_mark_start(); 2658 2659 if (verbose_low()) { 2660 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" " 2661 "["PTR_FORMAT", "PTR_FORMAT"), " 2662 "limit = "PTR_FORMAT, 2663 worker_id, curr_region, bottom, end, limit); 2664 } 2665 2666 // Is the gap between reading the finger and doing the CAS too long? 2667 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2668 if (res == finger) { 2669 // we succeeded 2670 2671 // notice that _finger == end cannot be guaranteed here since, 2672 // someone else might have moved the finger even further 2673 assert(_finger >= end, "the finger should have moved forward"); 2674 2675 if (verbose_low()) { 2676 gclog_or_tty->print_cr("[%u] we were successful with region = " 2677 PTR_FORMAT, worker_id, curr_region); 2678 } 2679 2680 if (limit > bottom) { 2681 if (verbose_low()) { 2682 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, " 2683 "returning it ", worker_id, curr_region); 2684 } 2685 return curr_region; 2686 } else { 2687 assert(limit == bottom, 2688 "the region limit should be at bottom"); 2689 if (verbose_low()) { 2690 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, " 2691 "returning NULL", worker_id, curr_region); 2692 } 2693 // we return NULL and the caller should try calling 2694 // claim_region() again. 2695 return NULL; 2696 } 2697 } else { 2698 assert(_finger > finger, "the finger should have moved forward"); 2699 if (verbose_low()) { 2700 gclog_or_tty->print_cr("[%u] somebody else moved the finger, " 2701 "global finger = "PTR_FORMAT", " 2702 "our finger = "PTR_FORMAT, 2703 worker_id, _finger, finger); 2704 } 2705 2706 // read it again 2707 finger = _finger; 2708 } 2709 } 2710 2711 return NULL; 2712 } 2713 2714 #ifndef PRODUCT 2715 enum VerifyNoCSetOopsPhase { 2716 VerifyNoCSetOopsStack, 2717 VerifyNoCSetOopsQueues, 2718 VerifyNoCSetOopsSATBCompleted, 2719 VerifyNoCSetOopsSATBThread 2720 }; 2721 2722 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 2723 private: 2724 G1CollectedHeap* _g1h; 2725 VerifyNoCSetOopsPhase _phase; 2726 int _info; 2727 2728 const char* phase_str() { 2729 switch (_phase) { 2730 case VerifyNoCSetOopsStack: return "Stack"; 2731 case VerifyNoCSetOopsQueues: return "Queue"; 2732 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 2733 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 2734 default: ShouldNotReachHere(); 2735 } 2736 return NULL; 2737 } 2738 2739 void do_object_work(oop obj) { 2740 guarantee(!_g1h->obj_in_cs(obj), 2741 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 2742 (void*) obj, phase_str(), _info)); 2743 } 2744 2745 public: 2746 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 2747 2748 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 2749 _phase = phase; 2750 _info = info; 2751 } 2752 2753 virtual void do_oop(oop* p) { 2754 oop obj = oopDesc::load_decode_heap_oop(p); 2755 do_object_work(obj); 2756 } 2757 2758 virtual void do_oop(narrowOop* p) { 2759 // We should not come across narrow oops while scanning marking 2760 // stacks and SATB buffers. 2761 ShouldNotReachHere(); 2762 } 2763 2764 virtual void do_object(oop obj) { 2765 do_object_work(obj); 2766 } 2767 }; 2768 2769 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 2770 bool verify_enqueued_buffers, 2771 bool verify_thread_buffers, 2772 bool verify_fingers) { 2773 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2774 if (!G1CollectedHeap::heap()->mark_in_progress()) { 2775 return; 2776 } 2777 2778 VerifyNoCSetOopsClosure cl; 2779 2780 if (verify_stacks) { 2781 // Verify entries on the global mark stack 2782 cl.set_phase(VerifyNoCSetOopsStack); 2783 _markStack.oops_do(&cl); 2784 2785 // Verify entries on the task queues 2786 for (uint i = 0; i < _max_worker_id; i += 1) { 2787 cl.set_phase(VerifyNoCSetOopsQueues, i); 2788 OopTaskQueue* queue = _task_queues->queue(i); 2789 queue->oops_do(&cl); 2790 } 2791 } 2792 2793 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 2794 2795 // Verify entries on the enqueued SATB buffers 2796 if (verify_enqueued_buffers) { 2797 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 2798 satb_qs.iterate_completed_buffers_read_only(&cl); 2799 } 2800 2801 // Verify entries on the per-thread SATB buffers 2802 if (verify_thread_buffers) { 2803 cl.set_phase(VerifyNoCSetOopsSATBThread); 2804 satb_qs.iterate_thread_buffers_read_only(&cl); 2805 } 2806 2807 if (verify_fingers) { 2808 // Verify the global finger 2809 HeapWord* global_finger = finger(); 2810 if (global_finger != NULL && global_finger < _heap_end) { 2811 // The global finger always points to a heap region boundary. We 2812 // use heap_region_containing_raw() to get the containing region 2813 // given that the global finger could be pointing to a free region 2814 // which subsequently becomes continues humongous. If that 2815 // happens, heap_region_containing() will return the bottom of the 2816 // corresponding starts humongous region and the check below will 2817 // not hold any more. 2818 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 2819 guarantee(global_finger == global_hr->bottom(), 2820 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 2821 global_finger, HR_FORMAT_PARAMS(global_hr))); 2822 } 2823 2824 // Verify the task fingers 2825 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2826 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 2827 CMTask* task = _tasks[i]; 2828 HeapWord* task_finger = task->finger(); 2829 if (task_finger != NULL && task_finger < _heap_end) { 2830 // See above note on the global finger verification. 2831 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 2832 guarantee(task_finger == task_hr->bottom() || 2833 !task_hr->in_collection_set(), 2834 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 2835 task_finger, HR_FORMAT_PARAMS(task_hr))); 2836 } 2837 } 2838 } 2839 } 2840 #endif // PRODUCT 2841 2842 void ConcurrentMark::clear_marking_state(bool clear_overflow) { 2843 _markStack.setEmpty(); 2844 _markStack.clear_overflow(); 2845 if (clear_overflow) { 2846 clear_has_overflown(); 2847 } else { 2848 assert(has_overflown(), "pre-condition"); 2849 } 2850 _finger = _heap_start; 2851 2852 for (uint i = 0; i < _max_worker_id; ++i) { 2853 OopTaskQueue* queue = _task_queues->queue(i); 2854 queue->set_empty(); 2855 } 2856 } 2857 2858 // Aggregate the counting data that was constructed concurrently 2859 // with marking. 2860 class AggregateCountDataHRClosure: public HeapRegionClosure { 2861 G1CollectedHeap* _g1h; 2862 ConcurrentMark* _cm; 2863 CardTableModRefBS* _ct_bs; 2864 BitMap* _cm_card_bm; 2865 uint _max_worker_id; 2866 2867 public: 2868 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2869 BitMap* cm_card_bm, 2870 uint max_worker_id) : 2871 _g1h(g1h), _cm(g1h->concurrent_mark()), 2872 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 2873 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 2874 2875 bool doHeapRegion(HeapRegion* hr) { 2876 if (hr->continuesHumongous()) { 2877 // We will ignore these here and process them when their 2878 // associated "starts humongous" region is processed. 2879 // Note that we cannot rely on their associated 2880 // "starts humongous" region to have their bit set to 1 2881 // since, due to the region chunking in the parallel region 2882 // iteration, a "continues humongous" region might be visited 2883 // before its associated "starts humongous". 2884 return false; 2885 } 2886 2887 HeapWord* start = hr->bottom(); 2888 HeapWord* limit = hr->next_top_at_mark_start(); 2889 HeapWord* end = hr->end(); 2890 2891 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2892 err_msg("Preconditions not met - " 2893 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 2894 "top: "PTR_FORMAT", end: "PTR_FORMAT, 2895 start, limit, hr->top(), hr->end())); 2896 2897 assert(hr->next_marked_bytes() == 0, "Precondition"); 2898 2899 if (start == limit) { 2900 // NTAMS of this region has not been set so nothing to do. 2901 return false; 2902 } 2903 2904 // 'start' should be in the heap. 2905 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2906 // 'end' *may* be just beyone the end of the heap (if hr is the last region) 2907 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2908 2909 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2910 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2911 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2912 2913 // If ntams is not card aligned then we bump card bitmap index 2914 // for limit so that we get the all the cards spanned by 2915 // the object ending at ntams. 2916 // Note: if this is the last region in the heap then ntams 2917 // could be actually just beyond the end of the the heap; 2918 // limit_idx will then correspond to a (non-existent) card 2919 // that is also outside the heap. 2920 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2921 limit_idx += 1; 2922 } 2923 2924 assert(limit_idx <= end_idx, "or else use atomics"); 2925 2926 // Aggregate the "stripe" in the count data associated with hr. 2927 uint hrs_index = hr->hrs_index(); 2928 size_t marked_bytes = 0; 2929 2930 for (uint i = 0; i < _max_worker_id; i += 1) { 2931 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2932 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2933 2934 // Fetch the marked_bytes in this region for task i and 2935 // add it to the running total for this region. 2936 marked_bytes += marked_bytes_array[hrs_index]; 2937 2938 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 2939 // into the global card bitmap. 2940 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2941 2942 while (scan_idx < limit_idx) { 2943 assert(task_card_bm->at(scan_idx) == true, "should be"); 2944 _cm_card_bm->set_bit(scan_idx); 2945 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2946 2947 // BitMap::get_next_one_offset() can handle the case when 2948 // its left_offset parameter is greater than its right_offset 2949 // parameter. It does, however, have an early exit if 2950 // left_offset == right_offset. So let's limit the value 2951 // passed in for left offset here. 2952 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2953 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2954 } 2955 } 2956 2957 // Update the marked bytes for this region. 2958 hr->add_to_marked_bytes(marked_bytes); 2959 2960 // Next heap region 2961 return false; 2962 } 2963 }; 2964 2965 class G1AggregateCountDataTask: public AbstractGangTask { 2966 protected: 2967 G1CollectedHeap* _g1h; 2968 ConcurrentMark* _cm; 2969 BitMap* _cm_card_bm; 2970 uint _max_worker_id; 2971 int _active_workers; 2972 2973 public: 2974 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2975 ConcurrentMark* cm, 2976 BitMap* cm_card_bm, 2977 uint max_worker_id, 2978 int n_workers) : 2979 AbstractGangTask("Count Aggregation"), 2980 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2981 _max_worker_id(max_worker_id), 2982 _active_workers(n_workers) { } 2983 2984 void work(uint worker_id) { 2985 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 2986 2987 if (G1CollectedHeap::use_parallel_gc_threads()) { 2988 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 2989 _active_workers, 2990 HeapRegion::AggregateCountClaimValue); 2991 } else { 2992 _g1h->heap_region_iterate(&cl); 2993 } 2994 } 2995 }; 2996 2997 2998 void ConcurrentMark::aggregate_count_data() { 2999 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3000 _g1h->workers()->active_workers() : 3001 1); 3002 3003 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3004 _max_worker_id, n_workers); 3005 3006 if (G1CollectedHeap::use_parallel_gc_threads()) { 3007 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3008 "sanity check"); 3009 _g1h->set_par_threads(n_workers); 3010 _g1h->workers()->run_task(&g1_par_agg_task); 3011 _g1h->set_par_threads(0); 3012 3013 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3014 "sanity check"); 3015 _g1h->reset_heap_region_claim_values(); 3016 } else { 3017 g1_par_agg_task.work(0); 3018 } 3019 } 3020 3021 // Clear the per-worker arrays used to store the per-region counting data 3022 void ConcurrentMark::clear_all_count_data() { 3023 // Clear the global card bitmap - it will be filled during 3024 // liveness count aggregation (during remark) and the 3025 // final counting task. 3026 _card_bm.clear(); 3027 3028 // Clear the global region bitmap - it will be filled as part 3029 // of the final counting task. 3030 _region_bm.clear(); 3031 3032 uint max_regions = _g1h->max_regions(); 3033 assert(_max_worker_id > 0, "uninitialized"); 3034 3035 for (uint i = 0; i < _max_worker_id; i += 1) { 3036 BitMap* task_card_bm = count_card_bitmap_for(i); 3037 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3038 3039 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3040 assert(marked_bytes_array != NULL, "uninitialized"); 3041 3042 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3043 task_card_bm->clear(); 3044 } 3045 } 3046 3047 void ConcurrentMark::print_stats() { 3048 if (verbose_stats()) { 3049 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3050 for (size_t i = 0; i < _active_tasks; ++i) { 3051 _tasks[i]->print_stats(); 3052 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3053 } 3054 } 3055 } 3056 3057 // abandon current marking iteration due to a Full GC 3058 void ConcurrentMark::abort() { 3059 // Clear all marks to force marking thread to do nothing 3060 _nextMarkBitMap->clearAll(); 3061 // Clear the liveness counting data 3062 clear_all_count_data(); 3063 // Empty mark stack 3064 clear_marking_state(); 3065 for (uint i = 0; i < _max_worker_id; ++i) { 3066 _tasks[i]->clear_region_fields(); 3067 } 3068 _has_aborted = true; 3069 3070 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3071 satb_mq_set.abandon_partial_marking(); 3072 // This can be called either during or outside marking, we'll read 3073 // the expected_active value from the SATB queue set. 3074 satb_mq_set.set_active_all_threads( 3075 false, /* new active value */ 3076 satb_mq_set.is_active() /* expected_active */); 3077 } 3078 3079 static void print_ms_time_info(const char* prefix, const char* name, 3080 NumberSeq& ns) { 3081 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3082 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3083 if (ns.num() > 0) { 3084 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3085 prefix, ns.sd(), ns.maximum()); 3086 } 3087 } 3088 3089 void ConcurrentMark::print_summary_info() { 3090 gclog_or_tty->print_cr(" Concurrent marking:"); 3091 print_ms_time_info(" ", "init marks", _init_times); 3092 print_ms_time_info(" ", "remarks", _remark_times); 3093 { 3094 print_ms_time_info(" ", "final marks", _remark_mark_times); 3095 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3096 3097 } 3098 print_ms_time_info(" ", "cleanups", _cleanup_times); 3099 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3100 _total_counting_time, 3101 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3102 (double)_cleanup_times.num() 3103 : 0.0)); 3104 if (G1ScrubRemSets) { 3105 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3106 _total_rs_scrub_time, 3107 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3108 (double)_cleanup_times.num() 3109 : 0.0)); 3110 } 3111 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3112 (_init_times.sum() + _remark_times.sum() + 3113 _cleanup_times.sum())/1000.0); 3114 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3115 "(%8.2f s marking).", 3116 cmThread()->vtime_accum(), 3117 cmThread()->vtime_mark_accum()); 3118 } 3119 3120 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3121 _parallel_workers->print_worker_threads_on(st); 3122 } 3123 3124 // We take a break if someone is trying to stop the world. 3125 bool ConcurrentMark::do_yield_check(uint worker_id) { 3126 if (should_yield()) { 3127 if (worker_id == 0) { 3128 _g1h->g1_policy()->record_concurrent_pause(); 3129 } 3130 cmThread()->yield(); 3131 return true; 3132 } else { 3133 return false; 3134 } 3135 } 3136 3137 bool ConcurrentMark::should_yield() { 3138 return cmThread()->should_yield(); 3139 } 3140 3141 bool ConcurrentMark::containing_card_is_marked(void* p) { 3142 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3143 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3144 } 3145 3146 bool ConcurrentMark::containing_cards_are_marked(void* start, 3147 void* last) { 3148 return containing_card_is_marked(start) && 3149 containing_card_is_marked(last); 3150 } 3151 3152 #ifndef PRODUCT 3153 // for debugging purposes 3154 void ConcurrentMark::print_finger() { 3155 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3156 _heap_start, _heap_end, _finger); 3157 for (uint i = 0; i < _max_worker_id; ++i) { 3158 gclog_or_tty->print(" %u: "PTR_FORMAT, i, _tasks[i]->finger()); 3159 } 3160 gclog_or_tty->print_cr(""); 3161 } 3162 #endif 3163 3164 void CMTask::scan_object(oop obj) { 3165 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3166 3167 if (_cm->verbose_high()) { 3168 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT, 3169 _worker_id, (void*) obj); 3170 } 3171 3172 size_t obj_size = obj->size(); 3173 _words_scanned += obj_size; 3174 3175 obj->oop_iterate(_cm_oop_closure); 3176 statsOnly( ++_objs_scanned ); 3177 check_limits(); 3178 } 3179 3180 // Closure for iteration over bitmaps 3181 class CMBitMapClosure : public BitMapClosure { 3182 private: 3183 // the bitmap that is being iterated over 3184 CMBitMap* _nextMarkBitMap; 3185 ConcurrentMark* _cm; 3186 CMTask* _task; 3187 3188 public: 3189 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3190 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3191 3192 bool do_bit(size_t offset) { 3193 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3194 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3195 assert( addr < _cm->finger(), "invariant"); 3196 3197 statsOnly( _task->increase_objs_found_on_bitmap() ); 3198 assert(addr >= _task->finger(), "invariant"); 3199 3200 // We move that task's local finger along. 3201 _task->move_finger_to(addr); 3202 3203 _task->scan_object(oop(addr)); 3204 // we only partially drain the local queue and global stack 3205 _task->drain_local_queue(true); 3206 _task->drain_global_stack(true); 3207 3208 // if the has_aborted flag has been raised, we need to bail out of 3209 // the iteration 3210 return !_task->has_aborted(); 3211 } 3212 }; 3213 3214 // Closure for iterating over objects, currently only used for 3215 // processing SATB buffers. 3216 class CMObjectClosure : public ObjectClosure { 3217 private: 3218 CMTask* _task; 3219 3220 public: 3221 void do_object(oop obj) { 3222 _task->deal_with_reference(obj); 3223 } 3224 3225 CMObjectClosure(CMTask* task) : _task(task) { } 3226 }; 3227 3228 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3229 ConcurrentMark* cm, 3230 CMTask* task) 3231 : _g1h(g1h), _cm(cm), _task(task) { 3232 assert(_ref_processor == NULL, "should be initialized to NULL"); 3233 3234 if (G1UseConcMarkReferenceProcessing) { 3235 _ref_processor = g1h->ref_processor_cm(); 3236 assert(_ref_processor != NULL, "should not be NULL"); 3237 } 3238 } 3239 3240 void CMTask::setup_for_region(HeapRegion* hr) { 3241 // Separated the asserts so that we know which one fires. 3242 assert(hr != NULL, 3243 "claim_region() should have filtered out continues humongous regions"); 3244 assert(!hr->continuesHumongous(), 3245 "claim_region() should have filtered out continues humongous regions"); 3246 3247 if (_cm->verbose_low()) { 3248 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT, 3249 _worker_id, hr); 3250 } 3251 3252 _curr_region = hr; 3253 _finger = hr->bottom(); 3254 update_region_limit(); 3255 } 3256 3257 void CMTask::update_region_limit() { 3258 HeapRegion* hr = _curr_region; 3259 HeapWord* bottom = hr->bottom(); 3260 HeapWord* limit = hr->next_top_at_mark_start(); 3261 3262 if (limit == bottom) { 3263 if (_cm->verbose_low()) { 3264 gclog_or_tty->print_cr("[%u] found an empty region " 3265 "["PTR_FORMAT", "PTR_FORMAT")", 3266 _worker_id, bottom, limit); 3267 } 3268 // The region was collected underneath our feet. 3269 // We set the finger to bottom to ensure that the bitmap 3270 // iteration that will follow this will not do anything. 3271 // (this is not a condition that holds when we set the region up, 3272 // as the region is not supposed to be empty in the first place) 3273 _finger = bottom; 3274 } else if (limit >= _region_limit) { 3275 assert(limit >= _finger, "peace of mind"); 3276 } else { 3277 assert(limit < _region_limit, "only way to get here"); 3278 // This can happen under some pretty unusual circumstances. An 3279 // evacuation pause empties the region underneath our feet (NTAMS 3280 // at bottom). We then do some allocation in the region (NTAMS 3281 // stays at bottom), followed by the region being used as a GC 3282 // alloc region (NTAMS will move to top() and the objects 3283 // originally below it will be grayed). All objects now marked in 3284 // the region are explicitly grayed, if below the global finger, 3285 // and we do not need in fact to scan anything else. So, we simply 3286 // set _finger to be limit to ensure that the bitmap iteration 3287 // doesn't do anything. 3288 _finger = limit; 3289 } 3290 3291 _region_limit = limit; 3292 } 3293 3294 void CMTask::giveup_current_region() { 3295 assert(_curr_region != NULL, "invariant"); 3296 if (_cm->verbose_low()) { 3297 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT, 3298 _worker_id, _curr_region); 3299 } 3300 clear_region_fields(); 3301 } 3302 3303 void CMTask::clear_region_fields() { 3304 // Values for these three fields that indicate that we're not 3305 // holding on to a region. 3306 _curr_region = NULL; 3307 _finger = NULL; 3308 _region_limit = NULL; 3309 } 3310 3311 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3312 if (cm_oop_closure == NULL) { 3313 assert(_cm_oop_closure != NULL, "invariant"); 3314 } else { 3315 assert(_cm_oop_closure == NULL, "invariant"); 3316 } 3317 _cm_oop_closure = cm_oop_closure; 3318 } 3319 3320 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3321 guarantee(nextMarkBitMap != NULL, "invariant"); 3322 3323 if (_cm->verbose_low()) { 3324 gclog_or_tty->print_cr("[%u] resetting", _worker_id); 3325 } 3326 3327 _nextMarkBitMap = nextMarkBitMap; 3328 clear_region_fields(); 3329 3330 _calls = 0; 3331 _elapsed_time_ms = 0.0; 3332 _termination_time_ms = 0.0; 3333 _termination_start_time_ms = 0.0; 3334 3335 #if _MARKING_STATS_ 3336 _local_pushes = 0; 3337 _local_pops = 0; 3338 _local_max_size = 0; 3339 _objs_scanned = 0; 3340 _global_pushes = 0; 3341 _global_pops = 0; 3342 _global_max_size = 0; 3343 _global_transfers_to = 0; 3344 _global_transfers_from = 0; 3345 _regions_claimed = 0; 3346 _objs_found_on_bitmap = 0; 3347 _satb_buffers_processed = 0; 3348 _steal_attempts = 0; 3349 _steals = 0; 3350 _aborted = 0; 3351 _aborted_overflow = 0; 3352 _aborted_cm_aborted = 0; 3353 _aborted_yield = 0; 3354 _aborted_timed_out = 0; 3355 _aborted_satb = 0; 3356 _aborted_termination = 0; 3357 #endif // _MARKING_STATS_ 3358 } 3359 3360 bool CMTask::should_exit_termination() { 3361 regular_clock_call(); 3362 // This is called when we are in the termination protocol. We should 3363 // quit if, for some reason, this task wants to abort or the global 3364 // stack is not empty (this means that we can get work from it). 3365 return !_cm->mark_stack_empty() || has_aborted(); 3366 } 3367 3368 void CMTask::reached_limit() { 3369 assert(_words_scanned >= _words_scanned_limit || 3370 _refs_reached >= _refs_reached_limit , 3371 "shouldn't have been called otherwise"); 3372 regular_clock_call(); 3373 } 3374 3375 void CMTask::regular_clock_call() { 3376 if (has_aborted()) return; 3377 3378 // First, we need to recalculate the words scanned and refs reached 3379 // limits for the next clock call. 3380 recalculate_limits(); 3381 3382 // During the regular clock call we do the following 3383 3384 // (1) If an overflow has been flagged, then we abort. 3385 if (_cm->has_overflown()) { 3386 set_has_aborted(); 3387 return; 3388 } 3389 3390 // If we are not concurrent (i.e. we're doing remark) we don't need 3391 // to check anything else. The other steps are only needed during 3392 // the concurrent marking phase. 3393 if (!concurrent()) return; 3394 3395 // (2) If marking has been aborted for Full GC, then we also abort. 3396 if (_cm->has_aborted()) { 3397 set_has_aborted(); 3398 statsOnly( ++_aborted_cm_aborted ); 3399 return; 3400 } 3401 3402 double curr_time_ms = os::elapsedVTime() * 1000.0; 3403 3404 // (3) If marking stats are enabled, then we update the step history. 3405 #if _MARKING_STATS_ 3406 if (_words_scanned >= _words_scanned_limit) { 3407 ++_clock_due_to_scanning; 3408 } 3409 if (_refs_reached >= _refs_reached_limit) { 3410 ++_clock_due_to_marking; 3411 } 3412 3413 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3414 _interval_start_time_ms = curr_time_ms; 3415 _all_clock_intervals_ms.add(last_interval_ms); 3416 3417 if (_cm->verbose_medium()) { 3418 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, " 3419 "scanned = %d%s, refs reached = %d%s", 3420 _worker_id, last_interval_ms, 3421 _words_scanned, 3422 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3423 _refs_reached, 3424 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3425 } 3426 #endif // _MARKING_STATS_ 3427 3428 // (4) We check whether we should yield. If we have to, then we abort. 3429 if (_cm->should_yield()) { 3430 // We should yield. To do this we abort the task. The caller is 3431 // responsible for yielding. 3432 set_has_aborted(); 3433 statsOnly( ++_aborted_yield ); 3434 return; 3435 } 3436 3437 // (5) We check whether we've reached our time quota. If we have, 3438 // then we abort. 3439 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3440 if (elapsed_time_ms > _time_target_ms) { 3441 set_has_aborted(); 3442 _has_timed_out = true; 3443 statsOnly( ++_aborted_timed_out ); 3444 return; 3445 } 3446 3447 // (6) Finally, we check whether there are enough completed STAB 3448 // buffers available for processing. If there are, we abort. 3449 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3450 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3451 if (_cm->verbose_low()) { 3452 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers", 3453 _worker_id); 3454 } 3455 // we do need to process SATB buffers, we'll abort and restart 3456 // the marking task to do so 3457 set_has_aborted(); 3458 statsOnly( ++_aborted_satb ); 3459 return; 3460 } 3461 } 3462 3463 void CMTask::recalculate_limits() { 3464 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3465 _words_scanned_limit = _real_words_scanned_limit; 3466 3467 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3468 _refs_reached_limit = _real_refs_reached_limit; 3469 } 3470 3471 void CMTask::decrease_limits() { 3472 // This is called when we believe that we're going to do an infrequent 3473 // operation which will increase the per byte scanned cost (i.e. move 3474 // entries to/from the global stack). It basically tries to decrease the 3475 // scanning limit so that the clock is called earlier. 3476 3477 if (_cm->verbose_medium()) { 3478 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id); 3479 } 3480 3481 _words_scanned_limit = _real_words_scanned_limit - 3482 3 * words_scanned_period / 4; 3483 _refs_reached_limit = _real_refs_reached_limit - 3484 3 * refs_reached_period / 4; 3485 } 3486 3487 void CMTask::move_entries_to_global_stack() { 3488 // local array where we'll store the entries that will be popped 3489 // from the local queue 3490 oop buffer[global_stack_transfer_size]; 3491 3492 int n = 0; 3493 oop obj; 3494 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3495 buffer[n] = obj; 3496 ++n; 3497 } 3498 3499 if (n > 0) { 3500 // we popped at least one entry from the local queue 3501 3502 statsOnly( ++_global_transfers_to; _local_pops += n ); 3503 3504 if (!_cm->mark_stack_push(buffer, n)) { 3505 if (_cm->verbose_low()) { 3506 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow", 3507 _worker_id); 3508 } 3509 set_has_aborted(); 3510 } else { 3511 // the transfer was successful 3512 3513 if (_cm->verbose_medium()) { 3514 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack", 3515 _worker_id, n); 3516 } 3517 statsOnly( int tmp_size = _cm->mark_stack_size(); 3518 if (tmp_size > _global_max_size) { 3519 _global_max_size = tmp_size; 3520 } 3521 _global_pushes += n ); 3522 } 3523 } 3524 3525 // this operation was quite expensive, so decrease the limits 3526 decrease_limits(); 3527 } 3528 3529 void CMTask::get_entries_from_global_stack() { 3530 // local array where we'll store the entries that will be popped 3531 // from the global stack. 3532 oop buffer[global_stack_transfer_size]; 3533 int n; 3534 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3535 assert(n <= global_stack_transfer_size, 3536 "we should not pop more than the given limit"); 3537 if (n > 0) { 3538 // yes, we did actually pop at least one entry 3539 3540 statsOnly( ++_global_transfers_from; _global_pops += n ); 3541 if (_cm->verbose_medium()) { 3542 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack", 3543 _worker_id, n); 3544 } 3545 for (int i = 0; i < n; ++i) { 3546 bool success = _task_queue->push(buffer[i]); 3547 // We only call this when the local queue is empty or under a 3548 // given target limit. So, we do not expect this push to fail. 3549 assert(success, "invariant"); 3550 } 3551 3552 statsOnly( int tmp_size = _task_queue->size(); 3553 if (tmp_size > _local_max_size) { 3554 _local_max_size = tmp_size; 3555 } 3556 _local_pushes += n ); 3557 } 3558 3559 // this operation was quite expensive, so decrease the limits 3560 decrease_limits(); 3561 } 3562 3563 void CMTask::drain_local_queue(bool partially) { 3564 if (has_aborted()) return; 3565 3566 // Decide what the target size is, depending whether we're going to 3567 // drain it partially (so that other tasks can steal if they run out 3568 // of things to do) or totally (at the very end). 3569 size_t target_size; 3570 if (partially) { 3571 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3572 } else { 3573 target_size = 0; 3574 } 3575 3576 if (_task_queue->size() > target_size) { 3577 if (_cm->verbose_high()) { 3578 gclog_or_tty->print_cr("[%u] draining local queue, target size = %d", 3579 _worker_id, target_size); 3580 } 3581 3582 oop obj; 3583 bool ret = _task_queue->pop_local(obj); 3584 while (ret) { 3585 statsOnly( ++_local_pops ); 3586 3587 if (_cm->verbose_high()) { 3588 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id, 3589 (void*) obj); 3590 } 3591 3592 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3593 assert(!_g1h->is_on_master_free_list( 3594 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3595 3596 scan_object(obj); 3597 3598 if (_task_queue->size() <= target_size || has_aborted()) { 3599 ret = false; 3600 } else { 3601 ret = _task_queue->pop_local(obj); 3602 } 3603 } 3604 3605 if (_cm->verbose_high()) { 3606 gclog_or_tty->print_cr("[%u] drained local queue, size = %d", 3607 _worker_id, _task_queue->size()); 3608 } 3609 } 3610 } 3611 3612 void CMTask::drain_global_stack(bool partially) { 3613 if (has_aborted()) return; 3614 3615 // We have a policy to drain the local queue before we attempt to 3616 // drain the global stack. 3617 assert(partially || _task_queue->size() == 0, "invariant"); 3618 3619 // Decide what the target size is, depending whether we're going to 3620 // drain it partially (so that other tasks can steal if they run out 3621 // of things to do) or totally (at the very end). Notice that, 3622 // because we move entries from the global stack in chunks or 3623 // because another task might be doing the same, we might in fact 3624 // drop below the target. But, this is not a problem. 3625 size_t target_size; 3626 if (partially) { 3627 target_size = _cm->partial_mark_stack_size_target(); 3628 } else { 3629 target_size = 0; 3630 } 3631 3632 if (_cm->mark_stack_size() > target_size) { 3633 if (_cm->verbose_low()) { 3634 gclog_or_tty->print_cr("[%u] draining global_stack, target size %d", 3635 _worker_id, target_size); 3636 } 3637 3638 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3639 get_entries_from_global_stack(); 3640 drain_local_queue(partially); 3641 } 3642 3643 if (_cm->verbose_low()) { 3644 gclog_or_tty->print_cr("[%u] drained global stack, size = %d", 3645 _worker_id, _cm->mark_stack_size()); 3646 } 3647 } 3648 } 3649 3650 // SATB Queue has several assumptions on whether to call the par or 3651 // non-par versions of the methods. this is why some of the code is 3652 // replicated. We should really get rid of the single-threaded version 3653 // of the code to simplify things. 3654 void CMTask::drain_satb_buffers() { 3655 if (has_aborted()) return; 3656 3657 // We set this so that the regular clock knows that we're in the 3658 // middle of draining buffers and doesn't set the abort flag when it 3659 // notices that SATB buffers are available for draining. It'd be 3660 // very counter productive if it did that. :-) 3661 _draining_satb_buffers = true; 3662 3663 CMObjectClosure oc(this); 3664 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3665 if (G1CollectedHeap::use_parallel_gc_threads()) { 3666 satb_mq_set.set_par_closure(_worker_id, &oc); 3667 } else { 3668 satb_mq_set.set_closure(&oc); 3669 } 3670 3671 // This keeps claiming and applying the closure to completed buffers 3672 // until we run out of buffers or we need to abort. 3673 if (G1CollectedHeap::use_parallel_gc_threads()) { 3674 while (!has_aborted() && 3675 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) { 3676 if (_cm->verbose_medium()) { 3677 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3678 } 3679 statsOnly( ++_satb_buffers_processed ); 3680 regular_clock_call(); 3681 } 3682 } else { 3683 while (!has_aborted() && 3684 satb_mq_set.apply_closure_to_completed_buffer()) { 3685 if (_cm->verbose_medium()) { 3686 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3687 } 3688 statsOnly( ++_satb_buffers_processed ); 3689 regular_clock_call(); 3690 } 3691 } 3692 3693 if (!concurrent() && !has_aborted()) { 3694 // We should only do this during remark. 3695 if (G1CollectedHeap::use_parallel_gc_threads()) { 3696 satb_mq_set.par_iterate_closure_all_threads(_worker_id); 3697 } else { 3698 satb_mq_set.iterate_closure_all_threads(); 3699 } 3700 } 3701 3702 _draining_satb_buffers = false; 3703 3704 assert(has_aborted() || 3705 concurrent() || 3706 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3707 3708 if (G1CollectedHeap::use_parallel_gc_threads()) { 3709 satb_mq_set.set_par_closure(_worker_id, NULL); 3710 } else { 3711 satb_mq_set.set_closure(NULL); 3712 } 3713 3714 // again, this was a potentially expensive operation, decrease the 3715 // limits to get the regular clock call early 3716 decrease_limits(); 3717 } 3718 3719 void CMTask::print_stats() { 3720 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 3721 _worker_id, _calls); 3722 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3723 _elapsed_time_ms, _termination_time_ms); 3724 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3725 _step_times_ms.num(), _step_times_ms.avg(), 3726 _step_times_ms.sd()); 3727 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3728 _step_times_ms.maximum(), _step_times_ms.sum()); 3729 3730 #if _MARKING_STATS_ 3731 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3732 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3733 _all_clock_intervals_ms.sd()); 3734 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3735 _all_clock_intervals_ms.maximum(), 3736 _all_clock_intervals_ms.sum()); 3737 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3738 _clock_due_to_scanning, _clock_due_to_marking); 3739 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3740 _objs_scanned, _objs_found_on_bitmap); 3741 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3742 _local_pushes, _local_pops, _local_max_size); 3743 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3744 _global_pushes, _global_pops, _global_max_size); 3745 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3746 _global_transfers_to,_global_transfers_from); 3747 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 3748 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3749 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3750 _steal_attempts, _steals); 3751 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3752 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3753 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3754 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3755 _aborted_timed_out, _aborted_satb, _aborted_termination); 3756 #endif // _MARKING_STATS_ 3757 } 3758 3759 /***************************************************************************** 3760 3761 The do_marking_step(time_target_ms) method is the building block 3762 of the parallel marking framework. It can be called in parallel 3763 with other invocations of do_marking_step() on different tasks 3764 (but only one per task, obviously) and concurrently with the 3765 mutator threads, or during remark, hence it eliminates the need 3766 for two versions of the code. When called during remark, it will 3767 pick up from where the task left off during the concurrent marking 3768 phase. Interestingly, tasks are also claimable during evacuation 3769 pauses too, since do_marking_step() ensures that it aborts before 3770 it needs to yield. 3771 3772 The data structures that is uses to do marking work are the 3773 following: 3774 3775 (1) Marking Bitmap. If there are gray objects that appear only 3776 on the bitmap (this happens either when dealing with an overflow 3777 or when the initial marking phase has simply marked the roots 3778 and didn't push them on the stack), then tasks claim heap 3779 regions whose bitmap they then scan to find gray objects. A 3780 global finger indicates where the end of the last claimed region 3781 is. A local finger indicates how far into the region a task has 3782 scanned. The two fingers are used to determine how to gray an 3783 object (i.e. whether simply marking it is OK, as it will be 3784 visited by a task in the future, or whether it needs to be also 3785 pushed on a stack). 3786 3787 (2) Local Queue. The local queue of the task which is accessed 3788 reasonably efficiently by the task. Other tasks can steal from 3789 it when they run out of work. Throughout the marking phase, a 3790 task attempts to keep its local queue short but not totally 3791 empty, so that entries are available for stealing by other 3792 tasks. Only when there is no more work, a task will totally 3793 drain its local queue. 3794 3795 (3) Global Mark Stack. This handles local queue overflow. During 3796 marking only sets of entries are moved between it and the local 3797 queues, as access to it requires a mutex and more fine-grain 3798 interaction with it which might cause contention. If it 3799 overflows, then the marking phase should restart and iterate 3800 over the bitmap to identify gray objects. Throughout the marking 3801 phase, tasks attempt to keep the global mark stack at a small 3802 length but not totally empty, so that entries are available for 3803 popping by other tasks. Only when there is no more work, tasks 3804 will totally drain the global mark stack. 3805 3806 (4) SATB Buffer Queue. This is where completed SATB buffers are 3807 made available. Buffers are regularly removed from this queue 3808 and scanned for roots, so that the queue doesn't get too 3809 long. During remark, all completed buffers are processed, as 3810 well as the filled in parts of any uncompleted buffers. 3811 3812 The do_marking_step() method tries to abort when the time target 3813 has been reached. There are a few other cases when the 3814 do_marking_step() method also aborts: 3815 3816 (1) When the marking phase has been aborted (after a Full GC). 3817 3818 (2) When a global overflow (on the global stack) has been 3819 triggered. Before the task aborts, it will actually sync up with 3820 the other tasks to ensure that all the marking data structures 3821 (local queues, stacks, fingers etc.) are re-initialised so that 3822 when do_marking_step() completes, the marking phase can 3823 immediately restart. 3824 3825 (3) When enough completed SATB buffers are available. The 3826 do_marking_step() method only tries to drain SATB buffers right 3827 at the beginning. So, if enough buffers are available, the 3828 marking step aborts and the SATB buffers are processed at 3829 the beginning of the next invocation. 3830 3831 (4) To yield. when we have to yield then we abort and yield 3832 right at the end of do_marking_step(). This saves us from a lot 3833 of hassle as, by yielding we might allow a Full GC. If this 3834 happens then objects will be compacted underneath our feet, the 3835 heap might shrink, etc. We save checking for this by just 3836 aborting and doing the yield right at the end. 3837 3838 From the above it follows that the do_marking_step() method should 3839 be called in a loop (or, otherwise, regularly) until it completes. 3840 3841 If a marking step completes without its has_aborted() flag being 3842 true, it means it has completed the current marking phase (and 3843 also all other marking tasks have done so and have all synced up). 3844 3845 A method called regular_clock_call() is invoked "regularly" (in 3846 sub ms intervals) throughout marking. It is this clock method that 3847 checks all the abort conditions which were mentioned above and 3848 decides when the task should abort. A work-based scheme is used to 3849 trigger this clock method: when the number of object words the 3850 marking phase has scanned or the number of references the marking 3851 phase has visited reach a given limit. Additional invocations to 3852 the method clock have been planted in a few other strategic places 3853 too. The initial reason for the clock method was to avoid calling 3854 vtime too regularly, as it is quite expensive. So, once it was in 3855 place, it was natural to piggy-back all the other conditions on it 3856 too and not constantly check them throughout the code. 3857 3858 *****************************************************************************/ 3859 3860 void CMTask::do_marking_step(double time_target_ms, 3861 bool do_stealing, 3862 bool do_termination) { 3863 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3864 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3865 3866 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3867 assert(_task_queues != NULL, "invariant"); 3868 assert(_task_queue != NULL, "invariant"); 3869 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 3870 3871 assert(!_claimed, 3872 "only one thread should claim this task at any one time"); 3873 3874 // OK, this doesn't safeguard again all possible scenarios, as it is 3875 // possible for two threads to set the _claimed flag at the same 3876 // time. But it is only for debugging purposes anyway and it will 3877 // catch most problems. 3878 _claimed = true; 3879 3880 _start_time_ms = os::elapsedVTime() * 1000.0; 3881 statsOnly( _interval_start_time_ms = _start_time_ms ); 3882 3883 double diff_prediction_ms = 3884 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 3885 _time_target_ms = time_target_ms - diff_prediction_ms; 3886 3887 // set up the variables that are used in the work-based scheme to 3888 // call the regular clock method 3889 _words_scanned = 0; 3890 _refs_reached = 0; 3891 recalculate_limits(); 3892 3893 // clear all flags 3894 clear_has_aborted(); 3895 _has_timed_out = false; 3896 _draining_satb_buffers = false; 3897 3898 ++_calls; 3899 3900 if (_cm->verbose_low()) { 3901 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, " 3902 "target = %1.2lfms >>>>>>>>>>", 3903 _worker_id, _calls, _time_target_ms); 3904 } 3905 3906 // Set up the bitmap and oop closures. Anything that uses them is 3907 // eventually called from this method, so it is OK to allocate these 3908 // statically. 3909 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3910 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3911 set_cm_oop_closure(&cm_oop_closure); 3912 3913 if (_cm->has_overflown()) { 3914 // This can happen if the mark stack overflows during a GC pause 3915 // and this task, after a yield point, restarts. We have to abort 3916 // as we need to get into the overflow protocol which happens 3917 // right at the end of this task. 3918 set_has_aborted(); 3919 } 3920 3921 // First drain any available SATB buffers. After this, we will not 3922 // look at SATB buffers before the next invocation of this method. 3923 // If enough completed SATB buffers are queued up, the regular clock 3924 // will abort this task so that it restarts. 3925 drain_satb_buffers(); 3926 // ...then partially drain the local queue and the global stack 3927 drain_local_queue(true); 3928 drain_global_stack(true); 3929 3930 do { 3931 if (!has_aborted() && _curr_region != NULL) { 3932 // This means that we're already holding on to a region. 3933 assert(_finger != NULL, "if region is not NULL, then the finger " 3934 "should not be NULL either"); 3935 3936 // We might have restarted this task after an evacuation pause 3937 // which might have evacuated the region we're holding on to 3938 // underneath our feet. Let's read its limit again to make sure 3939 // that we do not iterate over a region of the heap that 3940 // contains garbage (update_region_limit() will also move 3941 // _finger to the start of the region if it is found empty). 3942 update_region_limit(); 3943 // We will start from _finger not from the start of the region, 3944 // as we might be restarting this task after aborting half-way 3945 // through scanning this region. In this case, _finger points to 3946 // the address where we last found a marked object. If this is a 3947 // fresh region, _finger points to start(). 3948 MemRegion mr = MemRegion(_finger, _region_limit); 3949 3950 if (_cm->verbose_low()) { 3951 gclog_or_tty->print_cr("[%u] we're scanning part " 3952 "["PTR_FORMAT", "PTR_FORMAT") " 3953 "of region "PTR_FORMAT, 3954 _worker_id, _finger, _region_limit, _curr_region); 3955 } 3956 3957 // Let's iterate over the bitmap of the part of the 3958 // region that is left. 3959 if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3960 // We successfully completed iterating over the region. Now, 3961 // let's give up the region. 3962 giveup_current_region(); 3963 regular_clock_call(); 3964 } else { 3965 assert(has_aborted(), "currently the only way to do so"); 3966 // The only way to abort the bitmap iteration is to return 3967 // false from the do_bit() method. However, inside the 3968 // do_bit() method we move the _finger to point to the 3969 // object currently being looked at. So, if we bail out, we 3970 // have definitely set _finger to something non-null. 3971 assert(_finger != NULL, "invariant"); 3972 3973 // Region iteration was actually aborted. So now _finger 3974 // points to the address of the object we last scanned. If we 3975 // leave it there, when we restart this task, we will rescan 3976 // the object. It is easy to avoid this. We move the finger by 3977 // enough to point to the next possible object header (the 3978 // bitmap knows by how much we need to move it as it knows its 3979 // granularity). 3980 assert(_finger < _region_limit, "invariant"); 3981 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 3982 // Check if bitmap iteration was aborted while scanning the last object 3983 if (new_finger >= _region_limit) { 3984 giveup_current_region(); 3985 } else { 3986 move_finger_to(new_finger); 3987 } 3988 } 3989 } 3990 // At this point we have either completed iterating over the 3991 // region we were holding on to, or we have aborted. 3992 3993 // We then partially drain the local queue and the global stack. 3994 // (Do we really need this?) 3995 drain_local_queue(true); 3996 drain_global_stack(true); 3997 3998 // Read the note on the claim_region() method on why it might 3999 // return NULL with potentially more regions available for 4000 // claiming and why we have to check out_of_regions() to determine 4001 // whether we're done or not. 4002 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4003 // We are going to try to claim a new region. We should have 4004 // given up on the previous one. 4005 // Separated the asserts so that we know which one fires. 4006 assert(_curr_region == NULL, "invariant"); 4007 assert(_finger == NULL, "invariant"); 4008 assert(_region_limit == NULL, "invariant"); 4009 if (_cm->verbose_low()) { 4010 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id); 4011 } 4012 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 4013 if (claimed_region != NULL) { 4014 // Yes, we managed to claim one 4015 statsOnly( ++_regions_claimed ); 4016 4017 if (_cm->verbose_low()) { 4018 gclog_or_tty->print_cr("[%u] we successfully claimed " 4019 "region "PTR_FORMAT, 4020 _worker_id, claimed_region); 4021 } 4022 4023 setup_for_region(claimed_region); 4024 assert(_curr_region == claimed_region, "invariant"); 4025 } 4026 // It is important to call the regular clock here. It might take 4027 // a while to claim a region if, for example, we hit a large 4028 // block of empty regions. So we need to call the regular clock 4029 // method once round the loop to make sure it's called 4030 // frequently enough. 4031 regular_clock_call(); 4032 } 4033 4034 if (!has_aborted() && _curr_region == NULL) { 4035 assert(_cm->out_of_regions(), 4036 "at this point we should be out of regions"); 4037 } 4038 } while ( _curr_region != NULL && !has_aborted()); 4039 4040 if (!has_aborted()) { 4041 // We cannot check whether the global stack is empty, since other 4042 // tasks might be pushing objects to it concurrently. 4043 assert(_cm->out_of_regions(), 4044 "at this point we should be out of regions"); 4045 4046 if (_cm->verbose_low()) { 4047 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id); 4048 } 4049 4050 // Try to reduce the number of available SATB buffers so that 4051 // remark has less work to do. 4052 drain_satb_buffers(); 4053 } 4054 4055 // Since we've done everything else, we can now totally drain the 4056 // local queue and global stack. 4057 drain_local_queue(false); 4058 drain_global_stack(false); 4059 4060 // Attempt at work stealing from other task's queues. 4061 if (do_stealing && !has_aborted()) { 4062 // We have not aborted. This means that we have finished all that 4063 // we could. Let's try to do some stealing... 4064 4065 // We cannot check whether the global stack is empty, since other 4066 // tasks might be pushing objects to it concurrently. 4067 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4068 "only way to reach here"); 4069 4070 if (_cm->verbose_low()) { 4071 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id); 4072 } 4073 4074 while (!has_aborted()) { 4075 oop obj; 4076 statsOnly( ++_steal_attempts ); 4077 4078 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 4079 if (_cm->verbose_medium()) { 4080 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully", 4081 _worker_id, (void*) obj); 4082 } 4083 4084 statsOnly( ++_steals ); 4085 4086 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4087 "any stolen object should be marked"); 4088 scan_object(obj); 4089 4090 // And since we're towards the end, let's totally drain the 4091 // local queue and global stack. 4092 drain_local_queue(false); 4093 drain_global_stack(false); 4094 } else { 4095 break; 4096 } 4097 } 4098 } 4099 4100 // If we are about to wrap up and go into termination, check if we 4101 // should raise the overflow flag. 4102 if (do_termination && !has_aborted()) { 4103 if (_cm->force_overflow()->should_force()) { 4104 _cm->set_has_overflown(); 4105 regular_clock_call(); 4106 } 4107 } 4108 4109 // We still haven't aborted. Now, let's try to get into the 4110 // termination protocol. 4111 if (do_termination && !has_aborted()) { 4112 // We cannot check whether the global stack is empty, since other 4113 // tasks might be concurrently pushing objects on it. 4114 // Separated the asserts so that we know which one fires. 4115 assert(_cm->out_of_regions(), "only way to reach here"); 4116 assert(_task_queue->size() == 0, "only way to reach here"); 4117 4118 if (_cm->verbose_low()) { 4119 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id); 4120 } 4121 4122 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4123 // The CMTask class also extends the TerminatorTerminator class, 4124 // hence its should_exit_termination() method will also decide 4125 // whether to exit the termination protocol or not. 4126 bool finished = _cm->terminator()->offer_termination(this); 4127 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4128 _termination_time_ms += 4129 termination_end_time_ms - _termination_start_time_ms; 4130 4131 if (finished) { 4132 // We're all done. 4133 4134 if (_worker_id == 0) { 4135 // let's allow task 0 to do this 4136 if (concurrent()) { 4137 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4138 // we need to set this to false before the next 4139 // safepoint. This way we ensure that the marking phase 4140 // doesn't observe any more heap expansions. 4141 _cm->clear_concurrent_marking_in_progress(); 4142 } 4143 } 4144 4145 // We can now guarantee that the global stack is empty, since 4146 // all other tasks have finished. We separated the guarantees so 4147 // that, if a condition is false, we can immediately find out 4148 // which one. 4149 guarantee(_cm->out_of_regions(), "only way to reach here"); 4150 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4151 guarantee(_task_queue->size() == 0, "only way to reach here"); 4152 guarantee(!_cm->has_overflown(), "only way to reach here"); 4153 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4154 4155 if (_cm->verbose_low()) { 4156 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id); 4157 } 4158 } else { 4159 // Apparently there's more work to do. Let's abort this task. It 4160 // will restart it and we can hopefully find more things to do. 4161 4162 if (_cm->verbose_low()) { 4163 gclog_or_tty->print_cr("[%u] apparently there is more work to do", 4164 _worker_id); 4165 } 4166 4167 set_has_aborted(); 4168 statsOnly( ++_aborted_termination ); 4169 } 4170 } 4171 4172 // Mainly for debugging purposes to make sure that a pointer to the 4173 // closure which was statically allocated in this frame doesn't 4174 // escape it by accident. 4175 set_cm_oop_closure(NULL); 4176 double end_time_ms = os::elapsedVTime() * 1000.0; 4177 double elapsed_time_ms = end_time_ms - _start_time_ms; 4178 // Update the step history. 4179 _step_times_ms.add(elapsed_time_ms); 4180 4181 if (has_aborted()) { 4182 // The task was aborted for some reason. 4183 4184 statsOnly( ++_aborted ); 4185 4186 if (_has_timed_out) { 4187 double diff_ms = elapsed_time_ms - _time_target_ms; 4188 // Keep statistics of how well we did with respect to hitting 4189 // our target only if we actually timed out (if we aborted for 4190 // other reasons, then the results might get skewed). 4191 _marking_step_diffs_ms.add(diff_ms); 4192 } 4193 4194 if (_cm->has_overflown()) { 4195 // This is the interesting one. We aborted because a global 4196 // overflow was raised. This means we have to restart the 4197 // marking phase and start iterating over regions. However, in 4198 // order to do this we have to make sure that all tasks stop 4199 // what they are doing and re-initialise in a safe manner. We 4200 // will achieve this with the use of two barrier sync points. 4201 4202 if (_cm->verbose_low()) { 4203 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id); 4204 } 4205 4206 _cm->enter_first_sync_barrier(_worker_id); 4207 // When we exit this sync barrier we know that all tasks have 4208 // stopped doing marking work. So, it's now safe to 4209 // re-initialise our data structures. At the end of this method, 4210 // task 0 will clear the global data structures. 4211 4212 statsOnly( ++_aborted_overflow ); 4213 4214 // We clear the local state of this task... 4215 clear_region_fields(); 4216 4217 // ...and enter the second barrier. 4218 _cm->enter_second_sync_barrier(_worker_id); 4219 // At this point everything has bee re-initialised and we're 4220 // ready to restart. 4221 } 4222 4223 if (_cm->verbose_low()) { 4224 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4225 "elapsed = %1.2lfms <<<<<<<<<<", 4226 _worker_id, _time_target_ms, elapsed_time_ms); 4227 if (_cm->has_aborted()) { 4228 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========", 4229 _worker_id); 4230 } 4231 } 4232 } else { 4233 if (_cm->verbose_low()) { 4234 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4235 "elapsed = %1.2lfms <<<<<<<<<<", 4236 _worker_id, _time_target_ms, elapsed_time_ms); 4237 } 4238 } 4239 4240 _claimed = false; 4241 } 4242 4243 CMTask::CMTask(uint worker_id, 4244 ConcurrentMark* cm, 4245 size_t* marked_bytes, 4246 BitMap* card_bm, 4247 CMTaskQueue* task_queue, 4248 CMTaskQueueSet* task_queues) 4249 : _g1h(G1CollectedHeap::heap()), 4250 _worker_id(worker_id), _cm(cm), 4251 _claimed(false), 4252 _nextMarkBitMap(NULL), _hash_seed(17), 4253 _task_queue(task_queue), 4254 _task_queues(task_queues), 4255 _cm_oop_closure(NULL), 4256 _marked_bytes_array(marked_bytes), 4257 _card_bm(card_bm) { 4258 guarantee(task_queue != NULL, "invariant"); 4259 guarantee(task_queues != NULL, "invariant"); 4260 4261 statsOnly( _clock_due_to_scanning = 0; 4262 _clock_due_to_marking = 0 ); 4263 4264 _marking_step_diffs_ms.add(0.5); 4265 } 4266 4267 // These are formatting macros that are used below to ensure 4268 // consistent formatting. The *_H_* versions are used to format the 4269 // header for a particular value and they should be kept consistent 4270 // with the corresponding macro. Also note that most of the macros add 4271 // the necessary white space (as a prefix) which makes them a bit 4272 // easier to compose. 4273 4274 // All the output lines are prefixed with this string to be able to 4275 // identify them easily in a large log file. 4276 #define G1PPRL_LINE_PREFIX "###" 4277 4278 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4279 #ifdef _LP64 4280 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4281 #else // _LP64 4282 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4283 #endif // _LP64 4284 4285 // For per-region info 4286 #define G1PPRL_TYPE_FORMAT " %-4s" 4287 #define G1PPRL_TYPE_H_FORMAT " %4s" 4288 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4289 #define G1PPRL_BYTE_H_FORMAT " %9s" 4290 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4291 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4292 4293 // For summary info 4294 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4295 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4296 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4297 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4298 4299 G1PrintRegionLivenessInfoClosure:: 4300 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4301 : _out(out), 4302 _total_used_bytes(0), _total_capacity_bytes(0), 4303 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4304 _hum_used_bytes(0), _hum_capacity_bytes(0), 4305 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) { 4306 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4307 MemRegion g1_committed = g1h->g1_committed(); 4308 MemRegion g1_reserved = g1h->g1_reserved(); 4309 double now = os::elapsedTime(); 4310 4311 // Print the header of the output. 4312 _out->cr(); 4313 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4314 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4315 G1PPRL_SUM_ADDR_FORMAT("committed") 4316 G1PPRL_SUM_ADDR_FORMAT("reserved") 4317 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4318 g1_committed.start(), g1_committed.end(), 4319 g1_reserved.start(), g1_reserved.end(), 4320 HeapRegion::GrainBytes); 4321 _out->print_cr(G1PPRL_LINE_PREFIX); 4322 _out->print_cr(G1PPRL_LINE_PREFIX 4323 G1PPRL_TYPE_H_FORMAT 4324 G1PPRL_ADDR_BASE_H_FORMAT 4325 G1PPRL_BYTE_H_FORMAT 4326 G1PPRL_BYTE_H_FORMAT 4327 G1PPRL_BYTE_H_FORMAT 4328 G1PPRL_DOUBLE_H_FORMAT, 4329 "type", "address-range", 4330 "used", "prev-live", "next-live", "gc-eff"); 4331 _out->print_cr(G1PPRL_LINE_PREFIX 4332 G1PPRL_TYPE_H_FORMAT 4333 G1PPRL_ADDR_BASE_H_FORMAT 4334 G1PPRL_BYTE_H_FORMAT 4335 G1PPRL_BYTE_H_FORMAT 4336 G1PPRL_BYTE_H_FORMAT 4337 G1PPRL_DOUBLE_H_FORMAT, 4338 "", "", 4339 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)"); 4340 } 4341 4342 // It takes as a parameter a reference to one of the _hum_* fields, it 4343 // deduces the corresponding value for a region in a humongous region 4344 // series (either the region size, or what's left if the _hum_* field 4345 // is < the region size), and updates the _hum_* field accordingly. 4346 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4347 size_t bytes = 0; 4348 // The > 0 check is to deal with the prev and next live bytes which 4349 // could be 0. 4350 if (*hum_bytes > 0) { 4351 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4352 *hum_bytes -= bytes; 4353 } 4354 return bytes; 4355 } 4356 4357 // It deduces the values for a region in a humongous region series 4358 // from the _hum_* fields and updates those accordingly. It assumes 4359 // that that _hum_* fields have already been set up from the "starts 4360 // humongous" region and we visit the regions in address order. 4361 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4362 size_t* capacity_bytes, 4363 size_t* prev_live_bytes, 4364 size_t* next_live_bytes) { 4365 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4366 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4367 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4368 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4369 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4370 } 4371 4372 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4373 const char* type = ""; 4374 HeapWord* bottom = r->bottom(); 4375 HeapWord* end = r->end(); 4376 size_t capacity_bytes = r->capacity(); 4377 size_t used_bytes = r->used(); 4378 size_t prev_live_bytes = r->live_bytes(); 4379 size_t next_live_bytes = r->next_live_bytes(); 4380 double gc_eff = r->gc_efficiency(); 4381 if (r->used() == 0) { 4382 type = "FREE"; 4383 } else if (r->is_survivor()) { 4384 type = "SURV"; 4385 } else if (r->is_young()) { 4386 type = "EDEN"; 4387 } else if (r->startsHumongous()) { 4388 type = "HUMS"; 4389 4390 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4391 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4392 "they should have been zeroed after the last time we used them"); 4393 // Set up the _hum_* fields. 4394 _hum_capacity_bytes = capacity_bytes; 4395 _hum_used_bytes = used_bytes; 4396 _hum_prev_live_bytes = prev_live_bytes; 4397 _hum_next_live_bytes = next_live_bytes; 4398 get_hum_bytes(&used_bytes, &capacity_bytes, 4399 &prev_live_bytes, &next_live_bytes); 4400 end = bottom + HeapRegion::GrainWords; 4401 } else if (r->continuesHumongous()) { 4402 type = "HUMC"; 4403 get_hum_bytes(&used_bytes, &capacity_bytes, 4404 &prev_live_bytes, &next_live_bytes); 4405 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4406 } else { 4407 type = "OLD"; 4408 } 4409 4410 _total_used_bytes += used_bytes; 4411 _total_capacity_bytes += capacity_bytes; 4412 _total_prev_live_bytes += prev_live_bytes; 4413 _total_next_live_bytes += next_live_bytes; 4414 4415 // Print a line for this particular region. 4416 _out->print_cr(G1PPRL_LINE_PREFIX 4417 G1PPRL_TYPE_FORMAT 4418 G1PPRL_ADDR_BASE_FORMAT 4419 G1PPRL_BYTE_FORMAT 4420 G1PPRL_BYTE_FORMAT 4421 G1PPRL_BYTE_FORMAT 4422 G1PPRL_DOUBLE_FORMAT, 4423 type, bottom, end, 4424 used_bytes, prev_live_bytes, next_live_bytes, gc_eff); 4425 4426 return false; 4427 } 4428 4429 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4430 // Print the footer of the output. 4431 _out->print_cr(G1PPRL_LINE_PREFIX); 4432 _out->print_cr(G1PPRL_LINE_PREFIX 4433 " SUMMARY" 4434 G1PPRL_SUM_MB_FORMAT("capacity") 4435 G1PPRL_SUM_MB_PERC_FORMAT("used") 4436 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4437 G1PPRL_SUM_MB_PERC_FORMAT("next-live"), 4438 bytes_to_mb(_total_capacity_bytes), 4439 bytes_to_mb(_total_used_bytes), 4440 perc(_total_used_bytes, _total_capacity_bytes), 4441 bytes_to_mb(_total_prev_live_bytes), 4442 perc(_total_prev_live_bytes, _total_capacity_bytes), 4443 bytes_to_mb(_total_next_live_bytes), 4444 perc(_total_next_live_bytes, _total_capacity_bytes)); 4445 _out->cr(); 4446 }