1 /* 2 * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 32 #include "gc_implementation/g1/g1Log.hpp" 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 34 #include "gc_implementation/g1/g1RemSet.hpp" 35 #include "gc_implementation/g1/heapRegion.inline.hpp" 36 #include "gc_implementation/g1/heapRegionRemSet.hpp" 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 38 #include "gc_implementation/shared/vmGCOperations.hpp" 39 #include "memory/genOopClosures.inline.hpp" 40 #include "memory/referencePolicy.hpp" 41 #include "memory/resourceArea.hpp" 42 #include "oops/oop.inline.hpp" 43 #include "runtime/handles.inline.hpp" 44 #include "runtime/java.hpp" 45 #include "services/memTracker.hpp" 46 47 // Concurrent marking bit map wrapper 48 49 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) : 50 _bm((uintptr_t*)NULL, 0), 51 _shifter(shifter) { 52 _bmStartWord = (HeapWord*)(rs.base()); 53 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 54 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 55 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 56 57 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC); 58 59 guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map"); 60 // For now we'll just commit all of the bit map up fromt. 61 // Later on we'll try to be more parsimonious with swap. 62 guarantee(_virtual_space.initialize(brs, brs.size()), 63 "couldn't reseve backing store for concurrent marking bit map"); 64 assert(_virtual_space.committed_size() == brs.size(), 65 "didn't reserve backing store for all of concurrent marking bit map?"); 66 _bm.set_map((uintptr_t*)_virtual_space.low()); 67 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 68 _bmWordSize, "inconsistency in bit map sizing"); 69 _bm.set_size(_bmWordSize >> _shifter); 70 } 71 72 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 73 HeapWord* limit) const { 74 // First we must round addr *up* to a possible object boundary. 75 addr = (HeapWord*)align_size_up((intptr_t)addr, 76 HeapWordSize << _shifter); 77 size_t addrOffset = heapWordToOffset(addr); 78 if (limit == NULL) { 79 limit = _bmStartWord + _bmWordSize; 80 } 81 size_t limitOffset = heapWordToOffset(limit); 82 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 83 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 84 assert(nextAddr >= addr, "get_next_one postcondition"); 85 assert(nextAddr == limit || isMarked(nextAddr), 86 "get_next_one postcondition"); 87 return nextAddr; 88 } 89 90 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 91 HeapWord* limit) const { 92 size_t addrOffset = heapWordToOffset(addr); 93 if (limit == NULL) { 94 limit = _bmStartWord + _bmWordSize; 95 } 96 size_t limitOffset = heapWordToOffset(limit); 97 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 98 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 99 assert(nextAddr >= addr, "get_next_one postcondition"); 100 assert(nextAddr == limit || !isMarked(nextAddr), 101 "get_next_one postcondition"); 102 return nextAddr; 103 } 104 105 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 106 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 107 return (int) (diff >> _shifter); 108 } 109 110 #ifndef PRODUCT 111 bool CMBitMapRO::covers(ReservedSpace rs) const { 112 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 113 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 114 "size inconsistency"); 115 return _bmStartWord == (HeapWord*)(rs.base()) && 116 _bmWordSize == rs.size()>>LogHeapWordSize; 117 } 118 #endif 119 120 void CMBitMap::clearAll() { 121 _bm.clear(); 122 return; 123 } 124 125 void CMBitMap::markRange(MemRegion mr) { 126 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 127 assert(!mr.is_empty(), "unexpected empty region"); 128 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 129 ((HeapWord *) mr.end())), 130 "markRange memory region end is not card aligned"); 131 // convert address range into offset range 132 _bm.at_put_range(heapWordToOffset(mr.start()), 133 heapWordToOffset(mr.end()), true); 134 } 135 136 void CMBitMap::clearRange(MemRegion mr) { 137 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 138 assert(!mr.is_empty(), "unexpected empty region"); 139 // convert address range into offset range 140 _bm.at_put_range(heapWordToOffset(mr.start()), 141 heapWordToOffset(mr.end()), false); 142 } 143 144 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 145 HeapWord* end_addr) { 146 HeapWord* start = getNextMarkedWordAddress(addr); 147 start = MIN2(start, end_addr); 148 HeapWord* end = getNextUnmarkedWordAddress(start); 149 end = MIN2(end, end_addr); 150 assert(start <= end, "Consistency check"); 151 MemRegion mr(start, end); 152 if (!mr.is_empty()) { 153 clearRange(mr); 154 } 155 return mr; 156 } 157 158 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 159 _base(NULL), _cm(cm) 160 #ifdef ASSERT 161 , _drain_in_progress(false) 162 , _drain_in_progress_yields(false) 163 #endif 164 {} 165 166 void CMMarkStack::allocate(size_t size) { 167 size_t mark_stack_bytes = size * oopSize; 168 size_t aligned_mark_stack_bytes = 169 ReservedSpace::allocation_align_size_up(mark_stack_bytes); 170 171 ReservedSpace ms_rs(aligned_mark_stack_bytes); 172 if (!ms_rs.is_reserved()) { 173 vm_exit_during_initialization("Failed to allocate CM mark stack"); 174 } 175 if (!_virtual_space.initialize(ms_rs, ms_rs.size())) { 176 vm_exit_during_initialization("Couldn't reseve backing store " 177 "for CM mark stack"); 178 } 179 guarantee(_virtual_space.committed_size() == ms_rs.size(), 180 "Didn't reserve backing store for " 181 "all of concurrent marking stack?"); 182 183 _base = (oop*) _virtual_space.low(); 184 _index = 0; 185 _capacity = (jint) size; 186 _saved_index = -1; 187 NOT_PRODUCT(_max_depth = 0); 188 } 189 190 CMMarkStack::~CMMarkStack() { 191 if (_base != NULL) { 192 _base = NULL; 193 _virtual_space.release(); 194 } 195 } 196 197 void CMMarkStack::par_push(oop ptr) { 198 while (true) { 199 if (isFull()) { 200 _overflow = true; 201 return; 202 } 203 // Otherwise... 204 jint index = _index; 205 jint next_index = index+1; 206 jint res = Atomic::cmpxchg(next_index, &_index, index); 207 if (res == index) { 208 _base[index] = ptr; 209 // Note that we don't maintain this atomically. We could, but it 210 // doesn't seem necessary. 211 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 212 return; 213 } 214 // Otherwise, we need to try again. 215 } 216 } 217 218 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 219 while (true) { 220 if (isFull()) { 221 _overflow = true; 222 return; 223 } 224 // Otherwise... 225 jint index = _index; 226 jint next_index = index + n; 227 if (next_index > _capacity) { 228 _overflow = true; 229 return; 230 } 231 jint res = Atomic::cmpxchg(next_index, &_index, index); 232 if (res == index) { 233 for (int i = 0; i < n; i++) { 234 int ind = index + i; 235 assert(ind < _capacity, "By overflow test above."); 236 _base[ind] = ptr_arr[i]; 237 } 238 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 239 return; 240 } 241 // Otherwise, we need to try again. 242 } 243 } 244 245 246 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 247 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 248 jint start = _index; 249 jint next_index = start + n; 250 if (next_index > _capacity) { 251 _overflow = true; 252 return; 253 } 254 // Otherwise. 255 _index = next_index; 256 for (int i = 0; i < n; i++) { 257 int ind = start + i; 258 assert(ind < _capacity, "By overflow test above."); 259 _base[ind] = ptr_arr[i]; 260 } 261 } 262 263 264 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 265 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 266 jint index = _index; 267 if (index == 0) { 268 *n = 0; 269 return false; 270 } else { 271 int k = MIN2(max, index); 272 jint new_ind = index - k; 273 for (int j = 0; j < k; j++) { 274 ptr_arr[j] = _base[new_ind + j]; 275 } 276 _index = new_ind; 277 *n = k; 278 return true; 279 } 280 } 281 282 template<class OopClosureClass> 283 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 284 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 285 || SafepointSynchronize::is_at_safepoint(), 286 "Drain recursion must be yield-safe."); 287 bool res = true; 288 debug_only(_drain_in_progress = true); 289 debug_only(_drain_in_progress_yields = yield_after); 290 while (!isEmpty()) { 291 oop newOop = pop(); 292 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 293 assert(newOop->is_oop(), "Expected an oop"); 294 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 295 "only grey objects on this stack"); 296 newOop->oop_iterate(cl); 297 if (yield_after && _cm->do_yield_check()) { 298 res = false; 299 break; 300 } 301 } 302 debug_only(_drain_in_progress = false); 303 return res; 304 } 305 306 void CMMarkStack::note_start_of_gc() { 307 assert(_saved_index == -1, 308 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 309 _saved_index = _index; 310 } 311 312 void CMMarkStack::note_end_of_gc() { 313 // This is intentionally a guarantee, instead of an assert. If we 314 // accidentally add something to the mark stack during GC, it 315 // will be a correctness issue so it's better if we crash. we'll 316 // only check this once per GC anyway, so it won't be a performance 317 // issue in any way. 318 guarantee(_saved_index == _index, 319 err_msg("saved index: %d index: %d", _saved_index, _index)); 320 _saved_index = -1; 321 } 322 323 void CMMarkStack::oops_do(OopClosure* f) { 324 assert(_saved_index == _index, 325 err_msg("saved index: %d index: %d", _saved_index, _index)); 326 for (int i = 0; i < _index; i += 1) { 327 f->do_oop(&_base[i]); 328 } 329 } 330 331 bool ConcurrentMark::not_yet_marked(oop obj) const { 332 return _g1h->is_obj_ill(obj); 333 } 334 335 CMRootRegions::CMRootRegions() : 336 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 337 _should_abort(false), _next_survivor(NULL) { } 338 339 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 340 _young_list = g1h->young_list(); 341 _cm = cm; 342 } 343 344 void CMRootRegions::prepare_for_scan() { 345 assert(!scan_in_progress(), "pre-condition"); 346 347 // Currently, only survivors can be root regions. 348 assert(_next_survivor == NULL, "pre-condition"); 349 _next_survivor = _young_list->first_survivor_region(); 350 _scan_in_progress = (_next_survivor != NULL); 351 _should_abort = false; 352 } 353 354 HeapRegion* CMRootRegions::claim_next() { 355 if (_should_abort) { 356 // If someone has set the should_abort flag, we return NULL to 357 // force the caller to bail out of their loop. 358 return NULL; 359 } 360 361 // Currently, only survivors can be root regions. 362 HeapRegion* res = _next_survivor; 363 if (res != NULL) { 364 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 365 // Read it again in case it changed while we were waiting for the lock. 366 res = _next_survivor; 367 if (res != NULL) { 368 if (res == _young_list->last_survivor_region()) { 369 // We just claimed the last survivor so store NULL to indicate 370 // that we're done. 371 _next_survivor = NULL; 372 } else { 373 _next_survivor = res->get_next_young_region(); 374 } 375 } else { 376 // Someone else claimed the last survivor while we were trying 377 // to take the lock so nothing else to do. 378 } 379 } 380 assert(res == NULL || res->is_survivor(), "post-condition"); 381 382 return res; 383 } 384 385 void CMRootRegions::scan_finished() { 386 assert(scan_in_progress(), "pre-condition"); 387 388 // Currently, only survivors can be root regions. 389 if (!_should_abort) { 390 assert(_next_survivor == NULL, "we should have claimed all survivors"); 391 } 392 _next_survivor = NULL; 393 394 { 395 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 396 _scan_in_progress = false; 397 RootRegionScan_lock->notify_all(); 398 } 399 } 400 401 bool CMRootRegions::wait_until_scan_finished() { 402 if (!scan_in_progress()) return false; 403 404 { 405 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 406 while (scan_in_progress()) { 407 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 408 } 409 } 410 return true; 411 } 412 413 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 414 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 415 #endif // _MSC_VER 416 417 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 418 return MAX2((n_par_threads + 2) / 4, 1U); 419 } 420 421 ConcurrentMark::ConcurrentMark(ReservedSpace heap_rs, uint max_regions) : 422 _markBitMap1(heap_rs, MinObjAlignment - 1), 423 _markBitMap2(heap_rs, MinObjAlignment - 1), 424 425 _parallel_marking_threads(0), 426 _max_parallel_marking_threads(0), 427 _sleep_factor(0.0), 428 _marking_task_overhead(1.0), 429 _cleanup_sleep_factor(0.0), 430 _cleanup_task_overhead(1.0), 431 _cleanup_list("Cleanup List"), 432 433 _prevMarkBitMap(&_markBitMap1), 434 _nextMarkBitMap(&_markBitMap2), 435 436 _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/), 437 438 _markStack(this), 439 // _finger set in set_non_marking_state 440 441 _max_task_num(MAX2((uint)ParallelGCThreads, 1U)), 442 // _active_tasks set in set_non_marking_state 443 // _tasks set inside the constructor 444 _task_queues(new CMTaskQueueSet((int) _max_task_num)), 445 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 446 447 _has_overflown(false), 448 _concurrent(false), 449 _has_aborted(false), 450 _restart_for_overflow(false), 451 _concurrent_marking_in_progress(false), 452 453 // _verbose_level set below 454 455 _init_times(), 456 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 457 _cleanup_times(), 458 _total_counting_time(0.0), 459 _total_rs_scrub_time(0.0), 460 461 _parallel_workers(NULL), 462 463 _count_card_bitmaps(NULL), 464 _count_marked_bytes(NULL) { 465 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 466 if (verbose_level < no_verbose) { 467 verbose_level = no_verbose; 468 } 469 if (verbose_level > high_verbose) { 470 verbose_level = high_verbose; 471 } 472 _verbose_level = verbose_level; 473 474 if (verbose_low()) { 475 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 476 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 477 } 478 479 _markStack.allocate(MarkStackSize); 480 481 // Create & start a ConcurrentMark thread. 482 _cmThread = new ConcurrentMarkThread(this); 483 assert(cmThread() != NULL, "CM Thread should have been created"); 484 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 485 486 _g1h = G1CollectedHeap::heap(); 487 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 488 assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency"); 489 assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency"); 490 491 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 492 satb_qs.set_buffer_size(G1SATBBufferSize); 493 494 _root_regions.init(_g1h, this); 495 496 // Calculate the size of the memory that is needed for the backing store 497 // (from which we "allocate" the global liveness card bitmap and the 498 // liveness card bitmaps for the individual worker threads). 499 500 // Card liveness bitmap size (in bits) 501 BitMap::idx_t card_bm_size = (heap_rs.size() + CardTableModRefBS::card_size - 1) 502 >> CardTableModRefBS::card_shift; 503 // Card liveness bitmap size (in bytes) 504 size_t card_bm_size_bytes = (card_bm_size + (BitsPerByte - 1)) / BitsPerByte; 505 506 // Length of marked bytes array (one element per region) 507 size_t marked_bytes_size = max_regions; 508 509 // Size of marked bytes array (in bytes) 510 size_t marked_bytes_size_bytes = marked_bytes_size * sizeof(size_t); 511 512 // Now align up to whole number of bitmap words 513 assert(sizeof(BitMap::bm_word_t) >= sizeof(size_t), "check this code"); 514 card_bm_size_bytes = align_size_up(card_bm_size_bytes, sizeof(BitMap::bm_word_t)); 515 marked_bytes_size_bytes = align_size_up(marked_bytes_size_bytes, sizeof(BitMap::bm_word_t)); 516 517 // The memory size is the size of the global card bitmap, 518 // and the combined size of the card bitmaps and marked bytes 519 // arrays for each of the worker threads. 520 size_t mem_size_bytes = card_bm_size_bytes + 521 _max_task_num * (card_bm_size_bytes + marked_bytes_size_bytes); 522 523 // Align the size up. 524 size_t aligned_mem_size_bytes = ReservedSpace::allocation_align_size_up(mem_size_bytes); 525 526 // Reserve the backing store 527 ReservedSpace cm_rs(aligned_mem_size_bytes); 528 if (!cm_rs.is_reserved()) { 529 vm_exit_during_initialization("Failed to allocate CM backing store"); 530 } 531 if (!_virtual_space.initialize(cm_rs, cm_rs.size())) { 532 vm_exit_during_initialization("Couldn't reseve CM backing store"); 533 } 534 guarantee(_virtual_space.committed_size() == cm_rs.size(), 535 "didn't reserve all of CM backing store?"); 536 guarantee(_virtual_space.committed_size() >= mem_size_bytes, "sanity"); 537 538 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num, mtGC); 539 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num, mtGC); 540 541 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_task_num, mtGC); 542 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num, mtGC); 543 544 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 545 _active_tasks = _max_task_num; 546 547 char* vaddr = _virtual_space.low(); 548 size_t incr_bytes = card_bm_size_bytes + marked_bytes_size_bytes; 549 550 for (int i = 0; i < (int) _max_task_num; ++i) { 551 CMTaskQueue* task_queue = new CMTaskQueue(); 552 task_queue->initialize(); 553 _task_queues->register_queue(i, task_queue); 554 555 // "allocate" the card bitmap and live bytes array for current task. 556 guarantee(_virtual_space.low() <= vaddr && vaddr < _virtual_space.high(), "oob"); 557 guarantee((vaddr + incr_bytes) < _virtual_space.high(), "overflow"); 558 559 char* card_bm_map_addr = vaddr; 560 _count_card_bitmaps[i].set_map((BitMap::bm_word_t*) card_bm_map_addr); 561 _count_card_bitmaps[i].set_size((BitMap::idx_t) card_bm_size); 562 563 char* marked_bytes_addr = card_bm_map_addr + card_bm_size_bytes; 564 _count_marked_bytes[i] = (size_t*) marked_bytes_addr; 565 566 _tasks[i] = new CMTask(i, this, 567 _count_marked_bytes[i], 568 &_count_card_bitmaps[i], 569 task_queue, _task_queues); 570 571 _accum_task_vtime[i] = 0.0; 572 573 // Update pointer into virtual memory backing store 574 vaddr += incr_bytes; 575 } 576 577 // Now "allocate" the global card bitmap. 578 guarantee(_virtual_space.low() <= vaddr && vaddr < _virtual_space.high(), "oob"); 579 guarantee((vaddr + card_bm_size_bytes) <= _virtual_space.high(), "overflow"); 580 _card_bm.set_map((BitMap::bm_word_t*) vaddr); 581 _card_bm.set_size((BitMap::idx_t) card_bm_size); 582 583 // Calculate the card number for the bottom of the heap. Used 584 // in biasing indexes into the accounting card bitmaps. 585 _heap_bottom_card_num = 586 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 587 CardTableModRefBS::card_shift); 588 589 // Clear all the liveness counting data 590 clear_all_count_data(); 591 592 if (ConcGCThreads > ParallelGCThreads) { 593 vm_exit_during_initialization("Can't have more ConcGCThreads " 594 "than ParallelGCThreads."); 595 } 596 if (ParallelGCThreads == 0) { 597 // if we are not running with any parallel GC threads we will not 598 // spawn any marking threads either 599 _parallel_marking_threads = 0; 600 _max_parallel_marking_threads = 0; 601 _sleep_factor = 0.0; 602 _marking_task_overhead = 1.0; 603 } else { 604 if (ConcGCThreads > 0) { 605 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 606 // if both are set 607 608 _parallel_marking_threads = (uint) ConcGCThreads; 609 _max_parallel_marking_threads = _parallel_marking_threads; 610 _sleep_factor = 0.0; 611 _marking_task_overhead = 1.0; 612 } else if (G1MarkingOverheadPercent > 0) { 613 // we will calculate the number of parallel marking threads 614 // based on a target overhead with respect to the soft real-time 615 // goal 616 617 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 618 double overall_cm_overhead = 619 (double) MaxGCPauseMillis * marking_overhead / 620 (double) GCPauseIntervalMillis; 621 double cpu_ratio = 1.0 / (double) os::processor_count(); 622 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 623 double marking_task_overhead = 624 overall_cm_overhead / marking_thread_num * 625 (double) os::processor_count(); 626 double sleep_factor = 627 (1.0 - marking_task_overhead) / marking_task_overhead; 628 629 _parallel_marking_threads = (uint) marking_thread_num; 630 _max_parallel_marking_threads = _parallel_marking_threads; 631 _sleep_factor = sleep_factor; 632 _marking_task_overhead = marking_task_overhead; 633 } else { 634 _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads); 635 _max_parallel_marking_threads = _parallel_marking_threads; 636 _sleep_factor = 0.0; 637 _marking_task_overhead = 1.0; 638 } 639 640 if (parallel_marking_threads() > 1) { 641 _cleanup_task_overhead = 1.0; 642 } else { 643 _cleanup_task_overhead = marking_task_overhead(); 644 } 645 _cleanup_sleep_factor = 646 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 647 648 #if 0 649 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 650 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 651 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 652 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 653 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 654 #endif 655 656 guarantee(parallel_marking_threads() > 0, "peace of mind"); 657 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 658 _max_parallel_marking_threads, false, true); 659 if (_parallel_workers == NULL) { 660 vm_exit_during_initialization("Failed necessary allocation."); 661 } else { 662 _parallel_workers->initialize_workers(); 663 } 664 } 665 666 // so that the call below can read a sensible value 667 _heap_start = (HeapWord*) heap_rs.base(); 668 set_non_marking_state(); 669 } 670 671 void ConcurrentMark::update_g1_committed(bool force) { 672 // If concurrent marking is not in progress, then we do not need to 673 // update _heap_end. 674 if (!concurrent_marking_in_progress() && !force) return; 675 676 MemRegion committed = _g1h->g1_committed(); 677 assert(committed.start() == _heap_start, "start shouldn't change"); 678 HeapWord* new_end = committed.end(); 679 if (new_end > _heap_end) { 680 // The heap has been expanded. 681 682 _heap_end = new_end; 683 } 684 // Notice that the heap can also shrink. However, this only happens 685 // during a Full GC (at least currently) and the entire marking 686 // phase will bail out and the task will not be restarted. So, let's 687 // do nothing. 688 } 689 690 void ConcurrentMark::reset() { 691 // Starting values for these two. This should be called in a STW 692 // phase. CM will be notified of any future g1_committed expansions 693 // will be at the end of evacuation pauses, when tasks are 694 // inactive. 695 MemRegion committed = _g1h->g1_committed(); 696 _heap_start = committed.start(); 697 _heap_end = committed.end(); 698 699 // Separated the asserts so that we know which one fires. 700 assert(_heap_start != NULL, "heap bounds should look ok"); 701 assert(_heap_end != NULL, "heap bounds should look ok"); 702 assert(_heap_start < _heap_end, "heap bounds should look ok"); 703 704 // reset all the marking data structures and any necessary flags 705 clear_marking_state(); 706 707 if (verbose_low()) { 708 gclog_or_tty->print_cr("[global] resetting"); 709 } 710 711 // We do reset all of them, since different phases will use 712 // different number of active threads. So, it's easiest to have all 713 // of them ready. 714 for (int i = 0; i < (int) _max_task_num; ++i) { 715 _tasks[i]->reset(_nextMarkBitMap); 716 } 717 718 // we need this to make sure that the flag is on during the evac 719 // pause with initial mark piggy-backed 720 set_concurrent_marking_in_progress(); 721 } 722 723 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) { 724 assert(active_tasks <= _max_task_num, "we should not have more"); 725 726 _active_tasks = active_tasks; 727 // Need to update the three data structures below according to the 728 // number of active threads for this phase. 729 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 730 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 731 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 732 733 _concurrent = concurrent; 734 // We propagate this to all tasks, not just the active ones. 735 for (int i = 0; i < (int) _max_task_num; ++i) 736 _tasks[i]->set_concurrent(concurrent); 737 738 if (concurrent) { 739 set_concurrent_marking_in_progress(); 740 } else { 741 // We currently assume that the concurrent flag has been set to 742 // false before we start remark. At this point we should also be 743 // in a STW phase. 744 assert(!concurrent_marking_in_progress(), "invariant"); 745 assert(_finger == _heap_end, "only way to get here"); 746 update_g1_committed(true); 747 } 748 } 749 750 void ConcurrentMark::set_non_marking_state() { 751 // We set the global marking state to some default values when we're 752 // not doing marking. 753 clear_marking_state(); 754 _active_tasks = 0; 755 clear_concurrent_marking_in_progress(); 756 } 757 758 ConcurrentMark::~ConcurrentMark() { 759 // The ConcurrentMark instance is never freed. 760 ShouldNotReachHere(); 761 } 762 763 void ConcurrentMark::clearNextBitmap() { 764 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 765 G1CollectorPolicy* g1p = g1h->g1_policy(); 766 767 // Make sure that the concurrent mark thread looks to still be in 768 // the current cycle. 769 guarantee(cmThread()->during_cycle(), "invariant"); 770 771 // We are finishing up the current cycle by clearing the next 772 // marking bitmap and getting it ready for the next cycle. During 773 // this time no other cycle can start. So, let's make sure that this 774 // is the case. 775 guarantee(!g1h->mark_in_progress(), "invariant"); 776 777 // clear the mark bitmap (no grey objects to start with). 778 // We need to do this in chunks and offer to yield in between 779 // each chunk. 780 HeapWord* start = _nextMarkBitMap->startWord(); 781 HeapWord* end = _nextMarkBitMap->endWord(); 782 HeapWord* cur = start; 783 size_t chunkSize = M; 784 while (cur < end) { 785 HeapWord* next = cur + chunkSize; 786 if (next > end) { 787 next = end; 788 } 789 MemRegion mr(cur,next); 790 _nextMarkBitMap->clearRange(mr); 791 cur = next; 792 do_yield_check(); 793 794 // Repeat the asserts from above. We'll do them as asserts here to 795 // minimize their overhead on the product. However, we'll have 796 // them as guarantees at the beginning / end of the bitmap 797 // clearing to get some checking in the product. 798 assert(cmThread()->during_cycle(), "invariant"); 799 assert(!g1h->mark_in_progress(), "invariant"); 800 } 801 802 // Clear the liveness counting data 803 clear_all_count_data(); 804 805 // Repeat the asserts from above. 806 guarantee(cmThread()->during_cycle(), "invariant"); 807 guarantee(!g1h->mark_in_progress(), "invariant"); 808 } 809 810 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 811 public: 812 bool doHeapRegion(HeapRegion* r) { 813 if (!r->continuesHumongous()) { 814 r->note_start_of_marking(); 815 } 816 return false; 817 } 818 }; 819 820 void ConcurrentMark::checkpointRootsInitialPre() { 821 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 822 G1CollectorPolicy* g1p = g1h->g1_policy(); 823 824 _has_aborted = false; 825 826 #ifndef PRODUCT 827 if (G1PrintReachableAtInitialMark) { 828 print_reachable("at-cycle-start", 829 VerifyOption_G1UsePrevMarking, true /* all */); 830 } 831 #endif 832 833 // Initialise marking structures. This has to be done in a STW phase. 834 reset(); 835 836 // For each region note start of marking. 837 NoteStartOfMarkHRClosure startcl; 838 g1h->heap_region_iterate(&startcl); 839 } 840 841 842 void ConcurrentMark::checkpointRootsInitialPost() { 843 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 844 845 // If we force an overflow during remark, the remark operation will 846 // actually abort and we'll restart concurrent marking. If we always 847 // force an oveflow during remark we'll never actually complete the 848 // marking phase. So, we initilize this here, at the start of the 849 // cycle, so that at the remaining overflow number will decrease at 850 // every remark and we'll eventually not need to cause one. 851 force_overflow_stw()->init(); 852 853 // Start Concurrent Marking weak-reference discovery. 854 ReferenceProcessor* rp = g1h->ref_processor_cm(); 855 // enable ("weak") refs discovery 856 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 857 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 858 859 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 860 // This is the start of the marking cycle, we're expected all 861 // threads to have SATB queues with active set to false. 862 satb_mq_set.set_active_all_threads(true, /* new active value */ 863 false /* expected_active */); 864 865 _root_regions.prepare_for_scan(); 866 867 // update_g1_committed() will be called at the end of an evac pause 868 // when marking is on. So, it's also called at the end of the 869 // initial-mark pause to update the heap end, if the heap expands 870 // during it. No need to call it here. 871 } 872 873 /* 874 * Notice that in the next two methods, we actually leave the STS 875 * during the barrier sync and join it immediately afterwards. If we 876 * do not do this, the following deadlock can occur: one thread could 877 * be in the barrier sync code, waiting for the other thread to also 878 * sync up, whereas another one could be trying to yield, while also 879 * waiting for the other threads to sync up too. 880 * 881 * Note, however, that this code is also used during remark and in 882 * this case we should not attempt to leave / enter the STS, otherwise 883 * we'll either hit an asseert (debug / fastdebug) or deadlock 884 * (product). So we should only leave / enter the STS if we are 885 * operating concurrently. 886 * 887 * Because the thread that does the sync barrier has left the STS, it 888 * is possible to be suspended for a Full GC or an evacuation pause 889 * could occur. This is actually safe, since the entering the sync 890 * barrier is one of the last things do_marking_step() does, and it 891 * doesn't manipulate any data structures afterwards. 892 */ 893 894 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 895 if (verbose_low()) { 896 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 897 } 898 899 if (concurrent()) { 900 ConcurrentGCThread::stsLeave(); 901 } 902 _first_overflow_barrier_sync.enter(); 903 if (concurrent()) { 904 ConcurrentGCThread::stsJoin(); 905 } 906 // at this point everyone should have synced up and not be doing any 907 // more work 908 909 if (verbose_low()) { 910 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 911 } 912 913 // let task 0 do this 914 if (task_num == 0) { 915 // task 0 is responsible for clearing the global data structures 916 // We should be here because of an overflow. During STW we should 917 // not clear the overflow flag since we rely on it being true when 918 // we exit this method to abort the pause and restart concurent 919 // marking. 920 clear_marking_state(concurrent() /* clear_overflow */); 921 force_overflow()->update(); 922 923 if (G1Log::fine()) { 924 gclog_or_tty->date_stamp(PrintGCDateStamps); 925 gclog_or_tty->stamp(PrintGCTimeStamps); 926 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 927 } 928 } 929 930 // after this, each task should reset its own data structures then 931 // then go into the second barrier 932 } 933 934 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 935 if (verbose_low()) { 936 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 937 } 938 939 if (concurrent()) { 940 ConcurrentGCThread::stsLeave(); 941 } 942 _second_overflow_barrier_sync.enter(); 943 if (concurrent()) { 944 ConcurrentGCThread::stsJoin(); 945 } 946 // at this point everything should be re-initialised and ready to go 947 948 if (verbose_low()) { 949 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 950 } 951 } 952 953 #ifndef PRODUCT 954 void ForceOverflowSettings::init() { 955 _num_remaining = G1ConcMarkForceOverflow; 956 _force = false; 957 update(); 958 } 959 960 void ForceOverflowSettings::update() { 961 if (_num_remaining > 0) { 962 _num_remaining -= 1; 963 _force = true; 964 } else { 965 _force = false; 966 } 967 } 968 969 bool ForceOverflowSettings::should_force() { 970 if (_force) { 971 _force = false; 972 return true; 973 } else { 974 return false; 975 } 976 } 977 #endif // !PRODUCT 978 979 class CMConcurrentMarkingTask: public AbstractGangTask { 980 private: 981 ConcurrentMark* _cm; 982 ConcurrentMarkThread* _cmt; 983 984 public: 985 void work(uint worker_id) { 986 assert(Thread::current()->is_ConcurrentGC_thread(), 987 "this should only be done by a conc GC thread"); 988 ResourceMark rm; 989 990 double start_vtime = os::elapsedVTime(); 991 992 ConcurrentGCThread::stsJoin(); 993 994 assert(worker_id < _cm->active_tasks(), "invariant"); 995 CMTask* the_task = _cm->task(worker_id); 996 the_task->record_start_time(); 997 if (!_cm->has_aborted()) { 998 do { 999 double start_vtime_sec = os::elapsedVTime(); 1000 double start_time_sec = os::elapsedTime(); 1001 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1002 1003 the_task->do_marking_step(mark_step_duration_ms, 1004 true /* do_stealing */, 1005 true /* do_termination */); 1006 1007 double end_time_sec = os::elapsedTime(); 1008 double end_vtime_sec = os::elapsedVTime(); 1009 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1010 double elapsed_time_sec = end_time_sec - start_time_sec; 1011 _cm->clear_has_overflown(); 1012 1013 bool ret = _cm->do_yield_check(worker_id); 1014 1015 jlong sleep_time_ms; 1016 if (!_cm->has_aborted() && the_task->has_aborted()) { 1017 sleep_time_ms = 1018 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1019 ConcurrentGCThread::stsLeave(); 1020 os::sleep(Thread::current(), sleep_time_ms, false); 1021 ConcurrentGCThread::stsJoin(); 1022 } 1023 double end_time2_sec = os::elapsedTime(); 1024 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1025 1026 #if 0 1027 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1028 "overhead %1.4lf", 1029 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1030 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1031 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1032 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1033 #endif 1034 } while (!_cm->has_aborted() && the_task->has_aborted()); 1035 } 1036 the_task->record_end_time(); 1037 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1038 1039 ConcurrentGCThread::stsLeave(); 1040 1041 double end_vtime = os::elapsedVTime(); 1042 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1043 } 1044 1045 CMConcurrentMarkingTask(ConcurrentMark* cm, 1046 ConcurrentMarkThread* cmt) : 1047 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1048 1049 ~CMConcurrentMarkingTask() { } 1050 }; 1051 1052 // Calculates the number of active workers for a concurrent 1053 // phase. 1054 uint ConcurrentMark::calc_parallel_marking_threads() { 1055 if (G1CollectedHeap::use_parallel_gc_threads()) { 1056 uint n_conc_workers = 0; 1057 if (!UseDynamicNumberOfGCThreads || 1058 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1059 !ForceDynamicNumberOfGCThreads)) { 1060 n_conc_workers = max_parallel_marking_threads(); 1061 } else { 1062 n_conc_workers = 1063 AdaptiveSizePolicy::calc_default_active_workers( 1064 max_parallel_marking_threads(), 1065 1, /* Minimum workers */ 1066 parallel_marking_threads(), 1067 Threads::number_of_non_daemon_threads()); 1068 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1069 // that scaling has already gone into "_max_parallel_marking_threads". 1070 } 1071 assert(n_conc_workers > 0, "Always need at least 1"); 1072 return n_conc_workers; 1073 } 1074 // If we are not running with any parallel GC threads we will not 1075 // have spawned any marking threads either. Hence the number of 1076 // concurrent workers should be 0. 1077 return 0; 1078 } 1079 1080 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1081 // Currently, only survivors can be root regions. 1082 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1083 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1084 1085 const uintx interval = PrefetchScanIntervalInBytes; 1086 HeapWord* curr = hr->bottom(); 1087 const HeapWord* end = hr->top(); 1088 while (curr < end) { 1089 Prefetch::read(curr, interval); 1090 oop obj = oop(curr); 1091 int size = obj->oop_iterate(&cl); 1092 assert(size == obj->size(), "sanity"); 1093 curr += size; 1094 } 1095 } 1096 1097 class CMRootRegionScanTask : public AbstractGangTask { 1098 private: 1099 ConcurrentMark* _cm; 1100 1101 public: 1102 CMRootRegionScanTask(ConcurrentMark* cm) : 1103 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1104 1105 void work(uint worker_id) { 1106 assert(Thread::current()->is_ConcurrentGC_thread(), 1107 "this should only be done by a conc GC thread"); 1108 1109 CMRootRegions* root_regions = _cm->root_regions(); 1110 HeapRegion* hr = root_regions->claim_next(); 1111 while (hr != NULL) { 1112 _cm->scanRootRegion(hr, worker_id); 1113 hr = root_regions->claim_next(); 1114 } 1115 } 1116 }; 1117 1118 void ConcurrentMark::scanRootRegions() { 1119 // scan_in_progress() will have been set to true only if there was 1120 // at least one root region to scan. So, if it's false, we 1121 // should not attempt to do any further work. 1122 if (root_regions()->scan_in_progress()) { 1123 _parallel_marking_threads = calc_parallel_marking_threads(); 1124 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1125 "Maximum number of marking threads exceeded"); 1126 uint active_workers = MAX2(1U, parallel_marking_threads()); 1127 1128 CMRootRegionScanTask task(this); 1129 if (parallel_marking_threads() > 0) { 1130 _parallel_workers->set_active_workers((int) active_workers); 1131 _parallel_workers->run_task(&task); 1132 } else { 1133 task.work(0); 1134 } 1135 1136 // It's possible that has_aborted() is true here without actually 1137 // aborting the survivor scan earlier. This is OK as it's 1138 // mainly used for sanity checking. 1139 root_regions()->scan_finished(); 1140 } 1141 } 1142 1143 void ConcurrentMark::markFromRoots() { 1144 // we might be tempted to assert that: 1145 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1146 // "inconsistent argument?"); 1147 // However that wouldn't be right, because it's possible that 1148 // a safepoint is indeed in progress as a younger generation 1149 // stop-the-world GC happens even as we mark in this generation. 1150 1151 _restart_for_overflow = false; 1152 force_overflow_conc()->init(); 1153 1154 // _g1h has _n_par_threads 1155 _parallel_marking_threads = calc_parallel_marking_threads(); 1156 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1157 "Maximum number of marking threads exceeded"); 1158 1159 uint active_workers = MAX2(1U, parallel_marking_threads()); 1160 1161 // Parallel task terminator is set in "set_phase()" 1162 set_phase(active_workers, true /* concurrent */); 1163 1164 CMConcurrentMarkingTask markingTask(this, cmThread()); 1165 if (parallel_marking_threads() > 0) { 1166 _parallel_workers->set_active_workers((int)active_workers); 1167 // Don't set _n_par_threads because it affects MT in proceess_strong_roots() 1168 // and the decisions on that MT processing is made elsewhere. 1169 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1170 _parallel_workers->run_task(&markingTask); 1171 } else { 1172 markingTask.work(0); 1173 } 1174 print_stats(); 1175 } 1176 1177 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1178 // world is stopped at this checkpoint 1179 assert(SafepointSynchronize::is_at_safepoint(), 1180 "world should be stopped"); 1181 1182 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1183 1184 // If a full collection has happened, we shouldn't do this. 1185 if (has_aborted()) { 1186 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1187 return; 1188 } 1189 1190 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1191 1192 if (VerifyDuringGC) { 1193 HandleMark hm; // handle scope 1194 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1195 Universe::heap()->prepare_for_verify(); 1196 Universe::verify(/* silent */ false, 1197 /* option */ VerifyOption_G1UsePrevMarking); 1198 } 1199 1200 G1CollectorPolicy* g1p = g1h->g1_policy(); 1201 g1p->record_concurrent_mark_remark_start(); 1202 1203 double start = os::elapsedTime(); 1204 1205 checkpointRootsFinalWork(); 1206 1207 double mark_work_end = os::elapsedTime(); 1208 1209 weakRefsWork(clear_all_soft_refs); 1210 1211 if (has_overflown()) { 1212 // Oops. We overflowed. Restart concurrent marking. 1213 _restart_for_overflow = true; 1214 // Clear the flag. We do not need it any more. 1215 clear_has_overflown(); 1216 if (G1TraceMarkStackOverflow) { 1217 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1218 } 1219 } else { 1220 // Aggregate the per-task counting data that we have accumulated 1221 // while marking. 1222 aggregate_count_data(); 1223 1224 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1225 // We're done with marking. 1226 // This is the end of the marking cycle, we're expected all 1227 // threads to have SATB queues with active set to true. 1228 satb_mq_set.set_active_all_threads(false, /* new active value */ 1229 true /* expected_active */); 1230 1231 if (VerifyDuringGC) { 1232 HandleMark hm; // handle scope 1233 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1234 Universe::heap()->prepare_for_verify(); 1235 Universe::verify(/* silent */ false, 1236 /* option */ VerifyOption_G1UseNextMarking); 1237 } 1238 assert(!restart_for_overflow(), "sanity"); 1239 } 1240 1241 // Reset the marking state if marking completed 1242 if (!restart_for_overflow()) { 1243 set_non_marking_state(); 1244 } 1245 1246 #if VERIFY_OBJS_PROCESSED 1247 _scan_obj_cl.objs_processed = 0; 1248 ThreadLocalObjQueue::objs_enqueued = 0; 1249 #endif 1250 1251 // Statistics 1252 double now = os::elapsedTime(); 1253 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1254 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1255 _remark_times.add((now - start) * 1000.0); 1256 1257 g1p->record_concurrent_mark_remark_end(); 1258 } 1259 1260 // Base class of the closures that finalize and verify the 1261 // liveness counting data. 1262 class CMCountDataClosureBase: public HeapRegionClosure { 1263 protected: 1264 ConcurrentMark* _cm; 1265 BitMap* _region_bm; 1266 BitMap* _card_bm; 1267 1268 void set_card_bitmap_range(BitMap::idx_t start_idx, BitMap::idx_t last_idx) { 1269 assert(start_idx <= last_idx, "sanity"); 1270 1271 // Set the inclusive bit range [start_idx, last_idx]. 1272 // For small ranges (up to 8 cards) use a simple loop; otherwise 1273 // use par_at_put_range. 1274 if ((last_idx - start_idx) < 8) { 1275 for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) { 1276 _card_bm->par_set_bit(i); 1277 } 1278 } else { 1279 assert(last_idx < _card_bm->size(), "sanity"); 1280 // Note BitMap::par_at_put_range() is exclusive. 1281 BitMap::idx_t max_idx = MAX2(last_idx+1, _card_bm->size()); 1282 _card_bm->par_at_put_range(start_idx, max_idx, true); 1283 } 1284 } 1285 1286 // It takes a region that's not empty (i.e., it has at least one 1287 // live object in it and sets its corresponding bit on the region 1288 // bitmap to 1. If the region is "starts humongous" it will also set 1289 // to 1 the bits on the region bitmap that correspond to its 1290 // associated "continues humongous" regions. 1291 void set_bit_for_region(HeapRegion* hr) { 1292 assert(!hr->continuesHumongous(), "should have filtered those out"); 1293 1294 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1295 if (!hr->startsHumongous()) { 1296 // Normal (non-humongous) case: just set the bit. 1297 _region_bm->par_at_put(index, true); 1298 } else { 1299 // Starts humongous case: calculate how many regions are part of 1300 // this humongous region and then set the bit range. 1301 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1302 _region_bm->par_at_put_range(index, end_index, true); 1303 } 1304 } 1305 1306 public: 1307 CMCountDataClosureBase(ConcurrentMark *cm, 1308 BitMap* region_bm, BitMap* card_bm): 1309 _cm(cm), _region_bm(region_bm), _card_bm(card_bm) { } 1310 }; 1311 1312 // Closure that calculates the # live objects per region. Used 1313 // for verification purposes during the cleanup pause. 1314 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1315 CMBitMapRO* _bm; 1316 size_t _region_marked_bytes; 1317 1318 public: 1319 CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm, 1320 BitMap* region_bm, BitMap* card_bm) : 1321 CMCountDataClosureBase(cm, region_bm, card_bm), 1322 _bm(bm), _region_marked_bytes(0) { } 1323 1324 bool doHeapRegion(HeapRegion* hr) { 1325 1326 if (hr->continuesHumongous()) { 1327 // We will ignore these here and process them when their 1328 // associated "starts humongous" region is processed (see 1329 // set_bit_for_heap_region()). Note that we cannot rely on their 1330 // associated "starts humongous" region to have their bit set to 1331 // 1 since, due to the region chunking in the parallel region 1332 // iteration, a "continues humongous" region might be visited 1333 // before its associated "starts humongous". 1334 return false; 1335 } 1336 1337 HeapWord* nextTop = hr->next_top_at_mark_start(); 1338 HeapWord* start = hr->bottom(); 1339 1340 assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(), 1341 err_msg("Preconditions not met - " 1342 "start: "PTR_FORMAT", nextTop: "PTR_FORMAT", end: "PTR_FORMAT, 1343 start, nextTop, hr->end())); 1344 1345 // Find the first marked object at or after "start". 1346 start = _bm->getNextMarkedWordAddress(start, nextTop); 1347 1348 size_t marked_bytes = 0; 1349 1350 while (start < nextTop) { 1351 oop obj = oop(start); 1352 int obj_sz = obj->size(); 1353 HeapWord* obj_last = start + obj_sz - 1; 1354 1355 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1356 BitMap::idx_t last_idx = _cm->card_bitmap_index_for(obj_last); 1357 1358 // Set the bits in the card BM for this object (inclusive). 1359 set_card_bitmap_range(start_idx, last_idx); 1360 1361 // Add the size of this object to the number of marked bytes. 1362 marked_bytes += (size_t)obj_sz * HeapWordSize; 1363 1364 // Find the next marked object after this one. 1365 start = _bm->getNextMarkedWordAddress(obj_last + 1, nextTop); 1366 } 1367 1368 // Mark the allocated-since-marking portion... 1369 HeapWord* top = hr->top(); 1370 if (nextTop < top) { 1371 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(nextTop); 1372 BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top - 1); 1373 1374 set_card_bitmap_range(start_idx, last_idx); 1375 1376 // This definitely means the region has live objects. 1377 set_bit_for_region(hr); 1378 } 1379 1380 // Update the live region bitmap. 1381 if (marked_bytes > 0) { 1382 set_bit_for_region(hr); 1383 } 1384 1385 // Set the marked bytes for the current region so that 1386 // it can be queried by a calling verificiation routine 1387 _region_marked_bytes = marked_bytes; 1388 1389 return false; 1390 } 1391 1392 size_t region_marked_bytes() const { return _region_marked_bytes; } 1393 }; 1394 1395 // Heap region closure used for verifying the counting data 1396 // that was accumulated concurrently and aggregated during 1397 // the remark pause. This closure is applied to the heap 1398 // regions during the STW cleanup pause. 1399 1400 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1401 ConcurrentMark* _cm; 1402 CalcLiveObjectsClosure _calc_cl; 1403 BitMap* _region_bm; // Region BM to be verified 1404 BitMap* _card_bm; // Card BM to be verified 1405 bool _verbose; // verbose output? 1406 1407 BitMap* _exp_region_bm; // Expected Region BM values 1408 BitMap* _exp_card_bm; // Expected card BM values 1409 1410 int _failures; 1411 1412 public: 1413 VerifyLiveObjectDataHRClosure(ConcurrentMark* cm, 1414 BitMap* region_bm, 1415 BitMap* card_bm, 1416 BitMap* exp_region_bm, 1417 BitMap* exp_card_bm, 1418 bool verbose) : 1419 _cm(cm), 1420 _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm), 1421 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1422 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1423 _failures(0) { } 1424 1425 int failures() const { return _failures; } 1426 1427 bool doHeapRegion(HeapRegion* hr) { 1428 if (hr->continuesHumongous()) { 1429 // We will ignore these here and process them when their 1430 // associated "starts humongous" region is processed (see 1431 // set_bit_for_heap_region()). Note that we cannot rely on their 1432 // associated "starts humongous" region to have their bit set to 1433 // 1 since, due to the region chunking in the parallel region 1434 // iteration, a "continues humongous" region might be visited 1435 // before its associated "starts humongous". 1436 return false; 1437 } 1438 1439 int failures = 0; 1440 1441 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1442 // this region and set the corresponding bits in the expected region 1443 // and card bitmaps. 1444 bool res = _calc_cl.doHeapRegion(hr); 1445 assert(res == false, "should be continuing"); 1446 1447 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1448 Mutex::_no_safepoint_check_flag); 1449 1450 // Verify the marked bytes for this region. 1451 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1452 size_t act_marked_bytes = hr->next_marked_bytes(); 1453 1454 // We're not OK if expected marked bytes > actual marked bytes. It means 1455 // we have missed accounting some objects during the actual marking. 1456 if (exp_marked_bytes > act_marked_bytes) { 1457 if (_verbose) { 1458 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1459 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1460 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1461 } 1462 failures += 1; 1463 } 1464 1465 // Verify the bit, for this region, in the actual and expected 1466 // (which was just calculated) region bit maps. 1467 // We're not OK if the bit in the calculated expected region 1468 // bitmap is set and the bit in the actual region bitmap is not. 1469 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1470 1471 bool expected = _exp_region_bm->at(index); 1472 bool actual = _region_bm->at(index); 1473 if (expected && !actual) { 1474 if (_verbose) { 1475 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1476 "expected: %s, actual: %s", 1477 hr->hrs_index(), 1478 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1479 } 1480 failures += 1; 1481 } 1482 1483 // Verify that the card bit maps for the cards spanned by the current 1484 // region match. We have an error if we have a set bit in the expected 1485 // bit map and the corresponding bit in the actual bitmap is not set. 1486 1487 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1488 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1489 1490 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1491 expected = _exp_card_bm->at(i); 1492 actual = _card_bm->at(i); 1493 1494 if (expected && !actual) { 1495 if (_verbose) { 1496 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1497 "expected: %s, actual: %s", 1498 hr->hrs_index(), i, 1499 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1500 } 1501 failures += 1; 1502 } 1503 } 1504 1505 if (failures > 0 && _verbose) { 1506 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1507 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1508 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(), 1509 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1510 } 1511 1512 _failures += failures; 1513 1514 // We could stop iteration over the heap when we 1515 // find the first violating region by returning true. 1516 return false; 1517 } 1518 }; 1519 1520 1521 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1522 protected: 1523 G1CollectedHeap* _g1h; 1524 ConcurrentMark* _cm; 1525 BitMap* _actual_region_bm; 1526 BitMap* _actual_card_bm; 1527 1528 uint _n_workers; 1529 1530 BitMap* _expected_region_bm; 1531 BitMap* _expected_card_bm; 1532 1533 int _failures; 1534 bool _verbose; 1535 1536 public: 1537 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1538 BitMap* region_bm, BitMap* card_bm, 1539 BitMap* expected_region_bm, BitMap* expected_card_bm) 1540 : AbstractGangTask("G1 verify final counting"), 1541 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1542 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1543 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1544 _failures(0), _verbose(false), 1545 _n_workers(0) { 1546 assert(VerifyDuringGC, "don't call this otherwise"); 1547 1548 // Use the value already set as the number of active threads 1549 // in the call to run_task(). 1550 if (G1CollectedHeap::use_parallel_gc_threads()) { 1551 assert( _g1h->workers()->active_workers() > 0, 1552 "Should have been previously set"); 1553 _n_workers = _g1h->workers()->active_workers(); 1554 } else { 1555 _n_workers = 1; 1556 } 1557 1558 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1559 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1560 1561 _verbose = _cm->verbose_medium(); 1562 } 1563 1564 void work(uint worker_id) { 1565 assert(worker_id < _n_workers, "invariant"); 1566 1567 VerifyLiveObjectDataHRClosure verify_cl(_cm, 1568 _actual_region_bm, _actual_card_bm, 1569 _expected_region_bm, 1570 _expected_card_bm, 1571 _verbose); 1572 1573 if (G1CollectedHeap::use_parallel_gc_threads()) { 1574 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1575 worker_id, 1576 _n_workers, 1577 HeapRegion::VerifyCountClaimValue); 1578 } else { 1579 _g1h->heap_region_iterate(&verify_cl); 1580 } 1581 1582 Atomic::add(verify_cl.failures(), &_failures); 1583 } 1584 1585 int failures() const { return _failures; } 1586 }; 1587 1588 // Closure that finalizes the liveness counting data. 1589 // Used during the cleanup pause. 1590 // Sets the bits corresponding to the interval [NTAMS, top] 1591 // (which contains the implicitly live objects) in the 1592 // card liveness bitmap. Also sets the bit for each region, 1593 // containing live data, in the region liveness bitmap. 1594 1595 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1596 public: 1597 FinalCountDataUpdateClosure(ConcurrentMark* cm, 1598 BitMap* region_bm, 1599 BitMap* card_bm) : 1600 CMCountDataClosureBase(cm, region_bm, card_bm) { } 1601 1602 bool doHeapRegion(HeapRegion* hr) { 1603 1604 if (hr->continuesHumongous()) { 1605 // We will ignore these here and process them when their 1606 // associated "starts humongous" region is processed (see 1607 // set_bit_for_heap_region()). Note that we cannot rely on their 1608 // associated "starts humongous" region to have their bit set to 1609 // 1 since, due to the region chunking in the parallel region 1610 // iteration, a "continues humongous" region might be visited 1611 // before its associated "starts humongous". 1612 return false; 1613 } 1614 1615 HeapWord* ntams = hr->next_top_at_mark_start(); 1616 HeapWord* top = hr->top(); 1617 1618 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1619 1620 // Mark the allocated-since-marking portion... 1621 if (ntams < top) { 1622 // This definitely means the region has live objects. 1623 set_bit_for_region(hr); 1624 } 1625 1626 // Now set the bits for [ntams, top] 1627 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1628 // set_card_bitmap_range() expects the last_idx to be with 1629 // the range of the bit map (see assertion in set_card_bitmap_range()), 1630 // so limit it to that range with this application of MIN2. 1631 BitMap::idx_t last_idx = MIN2(_cm->card_bitmap_index_for(top), 1632 _card_bm->size()-1); 1633 if (start_idx < _card_bm->size()) { 1634 set_card_bitmap_range(start_idx, last_idx); 1635 } else { 1636 // To reach here start_idx must be beyond the end of 1637 // the bit map and last_idx must have been limited by 1638 // the MIN2(). 1639 assert(start_idx == last_idx + 1, 1640 err_msg("Not beyond end start_idx " SIZE_FORMAT " last_idx " 1641 SIZE_FORMAT, start_idx, last_idx)); 1642 } 1643 1644 // Set the bit for the region if it contains live data 1645 if (hr->next_marked_bytes() > 0) { 1646 set_bit_for_region(hr); 1647 } 1648 1649 return false; 1650 } 1651 }; 1652 1653 class G1ParFinalCountTask: public AbstractGangTask { 1654 protected: 1655 G1CollectedHeap* _g1h; 1656 ConcurrentMark* _cm; 1657 BitMap* _actual_region_bm; 1658 BitMap* _actual_card_bm; 1659 1660 uint _n_workers; 1661 1662 public: 1663 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1664 : AbstractGangTask("G1 final counting"), 1665 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1666 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1667 _n_workers(0) { 1668 // Use the value already set as the number of active threads 1669 // in the call to run_task(). 1670 if (G1CollectedHeap::use_parallel_gc_threads()) { 1671 assert( _g1h->workers()->active_workers() > 0, 1672 "Should have been previously set"); 1673 _n_workers = _g1h->workers()->active_workers(); 1674 } else { 1675 _n_workers = 1; 1676 } 1677 } 1678 1679 void work(uint worker_id) { 1680 assert(worker_id < _n_workers, "invariant"); 1681 1682 FinalCountDataUpdateClosure final_update_cl(_cm, 1683 _actual_region_bm, 1684 _actual_card_bm); 1685 1686 if (G1CollectedHeap::use_parallel_gc_threads()) { 1687 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1688 worker_id, 1689 _n_workers, 1690 HeapRegion::FinalCountClaimValue); 1691 } else { 1692 _g1h->heap_region_iterate(&final_update_cl); 1693 } 1694 } 1695 }; 1696 1697 class G1ParNoteEndTask; 1698 1699 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1700 G1CollectedHeap* _g1; 1701 int _worker_num; 1702 size_t _max_live_bytes; 1703 uint _regions_claimed; 1704 size_t _freed_bytes; 1705 FreeRegionList* _local_cleanup_list; 1706 OldRegionSet* _old_proxy_set; 1707 HumongousRegionSet* _humongous_proxy_set; 1708 HRRSCleanupTask* _hrrs_cleanup_task; 1709 double _claimed_region_time; 1710 double _max_region_time; 1711 1712 public: 1713 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1714 int worker_num, 1715 FreeRegionList* local_cleanup_list, 1716 OldRegionSet* old_proxy_set, 1717 HumongousRegionSet* humongous_proxy_set, 1718 HRRSCleanupTask* hrrs_cleanup_task) : 1719 _g1(g1), _worker_num(worker_num), 1720 _max_live_bytes(0), _regions_claimed(0), 1721 _freed_bytes(0), 1722 _claimed_region_time(0.0), _max_region_time(0.0), 1723 _local_cleanup_list(local_cleanup_list), 1724 _old_proxy_set(old_proxy_set), 1725 _humongous_proxy_set(humongous_proxy_set), 1726 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1727 1728 size_t freed_bytes() { return _freed_bytes; } 1729 1730 bool doHeapRegion(HeapRegion *hr) { 1731 if (hr->continuesHumongous()) { 1732 return false; 1733 } 1734 // We use a claim value of zero here because all regions 1735 // were claimed with value 1 in the FinalCount task. 1736 _g1->reset_gc_time_stamps(hr); 1737 double start = os::elapsedTime(); 1738 _regions_claimed++; 1739 hr->note_end_of_marking(); 1740 _max_live_bytes += hr->max_live_bytes(); 1741 _g1->free_region_if_empty(hr, 1742 &_freed_bytes, 1743 _local_cleanup_list, 1744 _old_proxy_set, 1745 _humongous_proxy_set, 1746 _hrrs_cleanup_task, 1747 true /* par */); 1748 double region_time = (os::elapsedTime() - start); 1749 _claimed_region_time += region_time; 1750 if (region_time > _max_region_time) { 1751 _max_region_time = region_time; 1752 } 1753 return false; 1754 } 1755 1756 size_t max_live_bytes() { return _max_live_bytes; } 1757 uint regions_claimed() { return _regions_claimed; } 1758 double claimed_region_time_sec() { return _claimed_region_time; } 1759 double max_region_time_sec() { return _max_region_time; } 1760 }; 1761 1762 class G1ParNoteEndTask: public AbstractGangTask { 1763 friend class G1NoteEndOfConcMarkClosure; 1764 1765 protected: 1766 G1CollectedHeap* _g1h; 1767 size_t _max_live_bytes; 1768 size_t _freed_bytes; 1769 FreeRegionList* _cleanup_list; 1770 1771 public: 1772 G1ParNoteEndTask(G1CollectedHeap* g1h, 1773 FreeRegionList* cleanup_list) : 1774 AbstractGangTask("G1 note end"), _g1h(g1h), 1775 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1776 1777 void work(uint worker_id) { 1778 double start = os::elapsedTime(); 1779 FreeRegionList local_cleanup_list("Local Cleanup List"); 1780 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set"); 1781 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1782 HRRSCleanupTask hrrs_cleanup_task; 1783 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list, 1784 &old_proxy_set, 1785 &humongous_proxy_set, 1786 &hrrs_cleanup_task); 1787 if (G1CollectedHeap::use_parallel_gc_threads()) { 1788 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1789 _g1h->workers()->active_workers(), 1790 HeapRegion::NoteEndClaimValue); 1791 } else { 1792 _g1h->heap_region_iterate(&g1_note_end); 1793 } 1794 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1795 1796 // Now update the lists 1797 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1798 NULL /* free_list */, 1799 &old_proxy_set, 1800 &humongous_proxy_set, 1801 true /* par */); 1802 { 1803 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1804 _max_live_bytes += g1_note_end.max_live_bytes(); 1805 _freed_bytes += g1_note_end.freed_bytes(); 1806 1807 // If we iterate over the global cleanup list at the end of 1808 // cleanup to do this printing we will not guarantee to only 1809 // generate output for the newly-reclaimed regions (the list 1810 // might not be empty at the beginning of cleanup; we might 1811 // still be working on its previous contents). So we do the 1812 // printing here, before we append the new regions to the global 1813 // cleanup list. 1814 1815 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1816 if (hr_printer->is_active()) { 1817 HeapRegionLinkedListIterator iter(&local_cleanup_list); 1818 while (iter.more_available()) { 1819 HeapRegion* hr = iter.get_next(); 1820 hr_printer->cleanup(hr); 1821 } 1822 } 1823 1824 _cleanup_list->add_as_tail(&local_cleanup_list); 1825 assert(local_cleanup_list.is_empty(), "post-condition"); 1826 1827 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1828 } 1829 } 1830 size_t max_live_bytes() { return _max_live_bytes; } 1831 size_t freed_bytes() { return _freed_bytes; } 1832 }; 1833 1834 class G1ParScrubRemSetTask: public AbstractGangTask { 1835 protected: 1836 G1RemSet* _g1rs; 1837 BitMap* _region_bm; 1838 BitMap* _card_bm; 1839 public: 1840 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1841 BitMap* region_bm, BitMap* card_bm) : 1842 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1843 _region_bm(region_bm), _card_bm(card_bm) { } 1844 1845 void work(uint worker_id) { 1846 if (G1CollectedHeap::use_parallel_gc_threads()) { 1847 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1848 HeapRegion::ScrubRemSetClaimValue); 1849 } else { 1850 _g1rs->scrub(_region_bm, _card_bm); 1851 } 1852 } 1853 1854 }; 1855 1856 void ConcurrentMark::cleanup() { 1857 // world is stopped at this checkpoint 1858 assert(SafepointSynchronize::is_at_safepoint(), 1859 "world should be stopped"); 1860 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1861 1862 // If a full collection has happened, we shouldn't do this. 1863 if (has_aborted()) { 1864 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1865 return; 1866 } 1867 1868 HRSPhaseSetter x(HRSPhaseCleanup); 1869 g1h->verify_region_sets_optional(); 1870 1871 if (VerifyDuringGC) { 1872 HandleMark hm; // handle scope 1873 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1874 Universe::heap()->prepare_for_verify(); 1875 Universe::verify(/* silent */ false, 1876 /* option */ VerifyOption_G1UsePrevMarking); 1877 } 1878 1879 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1880 g1p->record_concurrent_mark_cleanup_start(); 1881 1882 double start = os::elapsedTime(); 1883 1884 HeapRegionRemSet::reset_for_cleanup_tasks(); 1885 1886 uint n_workers; 1887 1888 // Do counting once more with the world stopped for good measure. 1889 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1890 1891 if (G1CollectedHeap::use_parallel_gc_threads()) { 1892 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 1893 "sanity check"); 1894 1895 g1h->set_par_threads(); 1896 n_workers = g1h->n_par_threads(); 1897 assert(g1h->n_par_threads() == n_workers, 1898 "Should not have been reset"); 1899 g1h->workers()->run_task(&g1_par_count_task); 1900 // Done with the parallel phase so reset to 0. 1901 g1h->set_par_threads(0); 1902 1903 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 1904 "sanity check"); 1905 } else { 1906 n_workers = 1; 1907 g1_par_count_task.work(0); 1908 } 1909 1910 if (VerifyDuringGC) { 1911 // Verify that the counting data accumulated during marking matches 1912 // that calculated by walking the marking bitmap. 1913 1914 // Bitmaps to hold expected values 1915 BitMap expected_region_bm(_region_bm.size(), false); 1916 BitMap expected_card_bm(_card_bm.size(), false); 1917 1918 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1919 &_region_bm, 1920 &_card_bm, 1921 &expected_region_bm, 1922 &expected_card_bm); 1923 1924 if (G1CollectedHeap::use_parallel_gc_threads()) { 1925 g1h->set_par_threads((int)n_workers); 1926 g1h->workers()->run_task(&g1_par_verify_task); 1927 // Done with the parallel phase so reset to 0. 1928 g1h->set_par_threads(0); 1929 1930 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 1931 "sanity check"); 1932 } else { 1933 g1_par_verify_task.work(0); 1934 } 1935 1936 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1937 } 1938 1939 size_t start_used_bytes = g1h->used(); 1940 g1h->set_marking_complete(); 1941 1942 double count_end = os::elapsedTime(); 1943 double this_final_counting_time = (count_end - start); 1944 _total_counting_time += this_final_counting_time; 1945 1946 if (G1PrintRegionLivenessInfo) { 1947 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1948 _g1h->heap_region_iterate(&cl); 1949 } 1950 1951 // Install newly created mark bitMap as "prev". 1952 swapMarkBitMaps(); 1953 1954 g1h->reset_gc_time_stamp(); 1955 1956 // Note end of marking in all heap regions. 1957 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 1958 if (G1CollectedHeap::use_parallel_gc_threads()) { 1959 g1h->set_par_threads((int)n_workers); 1960 g1h->workers()->run_task(&g1_par_note_end_task); 1961 g1h->set_par_threads(0); 1962 1963 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 1964 "sanity check"); 1965 } else { 1966 g1_par_note_end_task.work(0); 1967 } 1968 g1h->check_gc_time_stamps(); 1969 1970 if (!cleanup_list_is_empty()) { 1971 // The cleanup list is not empty, so we'll have to process it 1972 // concurrently. Notify anyone else that might be wanting free 1973 // regions that there will be more free regions coming soon. 1974 g1h->set_free_regions_coming(); 1975 } 1976 1977 // call below, since it affects the metric by which we sort the heap 1978 // regions. 1979 if (G1ScrubRemSets) { 1980 double rs_scrub_start = os::elapsedTime(); 1981 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1982 if (G1CollectedHeap::use_parallel_gc_threads()) { 1983 g1h->set_par_threads((int)n_workers); 1984 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1985 g1h->set_par_threads(0); 1986 1987 assert(g1h->check_heap_region_claim_values( 1988 HeapRegion::ScrubRemSetClaimValue), 1989 "sanity check"); 1990 } else { 1991 g1_par_scrub_rs_task.work(0); 1992 } 1993 1994 double rs_scrub_end = os::elapsedTime(); 1995 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1996 _total_rs_scrub_time += this_rs_scrub_time; 1997 } 1998 1999 // this will also free any regions totally full of garbage objects, 2000 // and sort the regions. 2001 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 2002 2003 // Statistics. 2004 double end = os::elapsedTime(); 2005 _cleanup_times.add((end - start) * 1000.0); 2006 2007 if (G1Log::fine()) { 2008 g1h->print_size_transition(gclog_or_tty, 2009 start_used_bytes, 2010 g1h->used(), 2011 g1h->capacity()); 2012 } 2013 2014 // Clean up will have freed any regions completely full of garbage. 2015 // Update the soft reference policy with the new heap occupancy. 2016 Universe::update_heap_info_at_gc(); 2017 2018 // We need to make this be a "collection" so any collection pause that 2019 // races with it goes around and waits for completeCleanup to finish. 2020 g1h->increment_total_collections(); 2021 2022 // We reclaimed old regions so we should calculate the sizes to make 2023 // sure we update the old gen/space data. 2024 g1h->g1mm()->update_sizes(); 2025 2026 if (VerifyDuringGC) { 2027 HandleMark hm; // handle scope 2028 gclog_or_tty->print(" VerifyDuringGC:(after)"); 2029 Universe::heap()->prepare_for_verify(); 2030 Universe::verify(/* silent */ false, 2031 /* option */ VerifyOption_G1UsePrevMarking); 2032 } 2033 2034 g1h->verify_region_sets_optional(); 2035 } 2036 2037 void ConcurrentMark::completeCleanup() { 2038 if (has_aborted()) return; 2039 2040 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2041 2042 _cleanup_list.verify_optional(); 2043 FreeRegionList tmp_free_list("Tmp Free List"); 2044 2045 if (G1ConcRegionFreeingVerbose) { 2046 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2047 "cleanup list has %u entries", 2048 _cleanup_list.length()); 2049 } 2050 2051 // Noone else should be accessing the _cleanup_list at this point, 2052 // so it's not necessary to take any locks 2053 while (!_cleanup_list.is_empty()) { 2054 HeapRegion* hr = _cleanup_list.remove_head(); 2055 assert(hr != NULL, "the list was not empty"); 2056 hr->par_clear(); 2057 tmp_free_list.add_as_tail(hr); 2058 2059 // Instead of adding one region at a time to the secondary_free_list, 2060 // we accumulate them in the local list and move them a few at a 2061 // time. This also cuts down on the number of notify_all() calls 2062 // we do during this process. We'll also append the local list when 2063 // _cleanup_list is empty (which means we just removed the last 2064 // region from the _cleanup_list). 2065 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2066 _cleanup_list.is_empty()) { 2067 if (G1ConcRegionFreeingVerbose) { 2068 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2069 "appending %u entries to the secondary_free_list, " 2070 "cleanup list still has %u entries", 2071 tmp_free_list.length(), 2072 _cleanup_list.length()); 2073 } 2074 2075 { 2076 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2077 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 2078 SecondaryFreeList_lock->notify_all(); 2079 } 2080 2081 if (G1StressConcRegionFreeing) { 2082 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2083 os::sleep(Thread::current(), (jlong) 1, false); 2084 } 2085 } 2086 } 2087 } 2088 assert(tmp_free_list.is_empty(), "post-condition"); 2089 } 2090 2091 // Support closures for reference procssing in G1 2092 2093 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2094 HeapWord* addr = (HeapWord*)obj; 2095 return addr != NULL && 2096 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2097 } 2098 2099 class G1CMKeepAliveClosure: public ExtendedOopClosure { 2100 G1CollectedHeap* _g1; 2101 ConcurrentMark* _cm; 2102 public: 2103 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) : 2104 _g1(g1), _cm(cm) { 2105 assert(Thread::current()->is_VM_thread(), "otherwise fix worker id"); 2106 } 2107 2108 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2109 virtual void do_oop( oop* p) { do_oop_work(p); } 2110 2111 template <class T> void do_oop_work(T* p) { 2112 oop obj = oopDesc::load_decode_heap_oop(p); 2113 HeapWord* addr = (HeapWord*)obj; 2114 2115 if (_cm->verbose_high()) { 2116 gclog_or_tty->print_cr("\t[0] we're looking at location " 2117 "*"PTR_FORMAT" = "PTR_FORMAT, 2118 p, (void*) obj); 2119 } 2120 2121 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) { 2122 _cm->mark_and_count(obj); 2123 _cm->mark_stack_push(obj); 2124 } 2125 } 2126 }; 2127 2128 class G1CMDrainMarkingStackClosure: public VoidClosure { 2129 ConcurrentMark* _cm; 2130 CMMarkStack* _markStack; 2131 G1CMKeepAliveClosure* _oopClosure; 2132 public: 2133 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack, 2134 G1CMKeepAliveClosure* oopClosure) : 2135 _cm(cm), 2136 _markStack(markStack), 2137 _oopClosure(oopClosure) { } 2138 2139 void do_void() { 2140 _markStack->drain(_oopClosure, _cm->nextMarkBitMap(), false); 2141 } 2142 }; 2143 2144 // 'Keep Alive' closure used by parallel reference processing. 2145 // An instance of this closure is used in the parallel reference processing 2146 // code rather than an instance of G1CMKeepAliveClosure. We could have used 2147 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are 2148 // placed on to discovered ref lists once so we can mark and push with no 2149 // need to check whether the object has already been marked. Using the 2150 // G1CMKeepAliveClosure would mean, however, having all the worker threads 2151 // operating on the global mark stack. This means that an individual 2152 // worker would be doing lock-free pushes while it processes its own 2153 // discovered ref list followed by drain call. If the discovered ref lists 2154 // are unbalanced then this could cause interference with the other 2155 // workers. Using a CMTask (and its embedded local data structures) 2156 // avoids that potential interference. 2157 class G1CMParKeepAliveAndDrainClosure: public OopClosure { 2158 ConcurrentMark* _cm; 2159 CMTask* _task; 2160 int _ref_counter_limit; 2161 int _ref_counter; 2162 public: 2163 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) : 2164 _cm(cm), _task(task), 2165 _ref_counter_limit(G1RefProcDrainInterval) { 2166 assert(_ref_counter_limit > 0, "sanity"); 2167 _ref_counter = _ref_counter_limit; 2168 } 2169 2170 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2171 virtual void do_oop( oop* p) { do_oop_work(p); } 2172 2173 template <class T> void do_oop_work(T* p) { 2174 if (!_cm->has_overflown()) { 2175 oop obj = oopDesc::load_decode_heap_oop(p); 2176 if (_cm->verbose_high()) { 2177 gclog_or_tty->print_cr("\t[%d] we're looking at location " 2178 "*"PTR_FORMAT" = "PTR_FORMAT, 2179 _task->task_id(), p, (void*) obj); 2180 } 2181 2182 _task->deal_with_reference(obj); 2183 _ref_counter--; 2184 2185 if (_ref_counter == 0) { 2186 // We have dealt with _ref_counter_limit references, pushing them and objects 2187 // reachable from them on to the local stack (and possibly the global stack). 2188 // Call do_marking_step() to process these entries. We call the routine in a 2189 // loop, which we'll exit if there's nothing more to do (i.e. we're done 2190 // with the entries that we've pushed as a result of the deal_with_reference 2191 // calls above) or we overflow. 2192 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2193 // while there may still be some work to do. (See the comment at the 2194 // beginning of CMTask::do_marking_step() for those conditions - one of which 2195 // is reaching the specified time target.) It is only when 2196 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2197 // that the marking has completed. 2198 do { 2199 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2200 _task->do_marking_step(mark_step_duration_ms, 2201 false /* do_stealing */, 2202 false /* do_termination */); 2203 } while (_task->has_aborted() && !_cm->has_overflown()); 2204 _ref_counter = _ref_counter_limit; 2205 } 2206 } else { 2207 if (_cm->verbose_high()) { 2208 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id()); 2209 } 2210 } 2211 } 2212 }; 2213 2214 class G1CMParDrainMarkingStackClosure: public VoidClosure { 2215 ConcurrentMark* _cm; 2216 CMTask* _task; 2217 public: 2218 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) : 2219 _cm(cm), _task(task) { } 2220 2221 void do_void() { 2222 do { 2223 if (_cm->verbose_high()) { 2224 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", 2225 _task->task_id()); 2226 } 2227 2228 // We call CMTask::do_marking_step() to completely drain the local and 2229 // global marking stacks. The routine is called in a loop, which we'll 2230 // exit if there's nothing more to do (i.e. we'completely drained the 2231 // entries that were pushed as a result of applying the 2232 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref 2233 // lists above) or we overflow the global marking stack. 2234 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2235 // while there may still be some work to do. (See the comment at the 2236 // beginning of CMTask::do_marking_step() for those conditions - one of which 2237 // is reaching the specified time target.) It is only when 2238 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2239 // that the marking has completed. 2240 2241 _task->do_marking_step(1000000000.0 /* something very large */, 2242 true /* do_stealing */, 2243 true /* do_termination */); 2244 } while (_task->has_aborted() && !_cm->has_overflown()); 2245 } 2246 }; 2247 2248 // Implementation of AbstractRefProcTaskExecutor for parallel 2249 // reference processing at the end of G1 concurrent marking 2250 2251 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2252 private: 2253 G1CollectedHeap* _g1h; 2254 ConcurrentMark* _cm; 2255 WorkGang* _workers; 2256 int _active_workers; 2257 2258 public: 2259 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2260 ConcurrentMark* cm, 2261 WorkGang* workers, 2262 int n_workers) : 2263 _g1h(g1h), _cm(cm), 2264 _workers(workers), _active_workers(n_workers) { } 2265 2266 // Executes the given task using concurrent marking worker threads. 2267 virtual void execute(ProcessTask& task); 2268 virtual void execute(EnqueueTask& task); 2269 }; 2270 2271 class G1CMRefProcTaskProxy: public AbstractGangTask { 2272 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2273 ProcessTask& _proc_task; 2274 G1CollectedHeap* _g1h; 2275 ConcurrentMark* _cm; 2276 2277 public: 2278 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2279 G1CollectedHeap* g1h, 2280 ConcurrentMark* cm) : 2281 AbstractGangTask("Process reference objects in parallel"), 2282 _proc_task(proc_task), _g1h(g1h), _cm(cm) { } 2283 2284 virtual void work(uint worker_id) { 2285 CMTask* marking_task = _cm->task(worker_id); 2286 G1CMIsAliveClosure g1_is_alive(_g1h); 2287 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task); 2288 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task); 2289 2290 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2291 } 2292 }; 2293 2294 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2295 assert(_workers != NULL, "Need parallel worker threads."); 2296 2297 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2298 2299 // We need to reset the phase for each task execution so that 2300 // the termination protocol of CMTask::do_marking_step works. 2301 _cm->set_phase(_active_workers, false /* concurrent */); 2302 _g1h->set_par_threads(_active_workers); 2303 _workers->run_task(&proc_task_proxy); 2304 _g1h->set_par_threads(0); 2305 } 2306 2307 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2308 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2309 EnqueueTask& _enq_task; 2310 2311 public: 2312 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2313 AbstractGangTask("Enqueue reference objects in parallel"), 2314 _enq_task(enq_task) { } 2315 2316 virtual void work(uint worker_id) { 2317 _enq_task.work(worker_id); 2318 } 2319 }; 2320 2321 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2322 assert(_workers != NULL, "Need parallel worker threads."); 2323 2324 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2325 2326 _g1h->set_par_threads(_active_workers); 2327 _workers->run_task(&enq_task_proxy); 2328 _g1h->set_par_threads(0); 2329 } 2330 2331 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2332 ResourceMark rm; 2333 HandleMark hm; 2334 2335 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2336 2337 // Is alive closure. 2338 G1CMIsAliveClosure g1_is_alive(g1h); 2339 2340 // Inner scope to exclude the cleaning of the string and symbol 2341 // tables from the displayed time. 2342 { 2343 if (G1Log::finer()) { 2344 gclog_or_tty->put(' '); 2345 } 2346 TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty); 2347 2348 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2349 2350 // See the comment in G1CollectedHeap::ref_processing_init() 2351 // about how reference processing currently works in G1. 2352 2353 // Process weak references. 2354 rp->setup_policy(clear_all_soft_refs); 2355 assert(_markStack.isEmpty(), "mark stack should be empty"); 2356 2357 G1CMKeepAliveClosure g1_keep_alive(g1h, this); 2358 G1CMDrainMarkingStackClosure 2359 g1_drain_mark_stack(this, &_markStack, &g1_keep_alive); 2360 2361 // We use the work gang from the G1CollectedHeap and we utilize all 2362 // the worker threads. 2363 uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U; 2364 active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U); 2365 2366 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2367 g1h->workers(), active_workers); 2368 2369 if (rp->processing_is_mt()) { 2370 // Set the degree of MT here. If the discovery is done MT, there 2371 // may have been a different number of threads doing the discovery 2372 // and a different number of discovered lists may have Ref objects. 2373 // That is OK as long as the Reference lists are balanced (see 2374 // balance_all_queues() and balance_queues()). 2375 rp->set_active_mt_degree(active_workers); 2376 2377 rp->process_discovered_references(&g1_is_alive, 2378 &g1_keep_alive, 2379 &g1_drain_mark_stack, 2380 &par_task_executor); 2381 2382 // The work routines of the parallel keep_alive and drain_marking_stack 2383 // will set the has_overflown flag if we overflow the global marking 2384 // stack. 2385 } else { 2386 rp->process_discovered_references(&g1_is_alive, 2387 &g1_keep_alive, 2388 &g1_drain_mark_stack, 2389 NULL); 2390 } 2391 2392 assert(_markStack.overflow() || _markStack.isEmpty(), 2393 "mark stack should be empty (unless it overflowed)"); 2394 if (_markStack.overflow()) { 2395 // Should have been done already when we tried to push an 2396 // entry on to the global mark stack. But let's do it again. 2397 set_has_overflown(); 2398 } 2399 2400 if (rp->processing_is_mt()) { 2401 assert(rp->num_q() == active_workers, "why not"); 2402 rp->enqueue_discovered_references(&par_task_executor); 2403 } else { 2404 rp->enqueue_discovered_references(); 2405 } 2406 2407 rp->verify_no_references_recorded(); 2408 assert(!rp->discovery_enabled(), "Post condition"); 2409 } 2410 2411 // Now clean up stale oops in StringTable 2412 StringTable::unlink(&g1_is_alive); 2413 // Clean up unreferenced symbols in symbol table. 2414 SymbolTable::unlink(); 2415 } 2416 2417 void ConcurrentMark::swapMarkBitMaps() { 2418 CMBitMapRO* temp = _prevMarkBitMap; 2419 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2420 _nextMarkBitMap = (CMBitMap*) temp; 2421 } 2422 2423 class CMRemarkTask: public AbstractGangTask { 2424 private: 2425 ConcurrentMark *_cm; 2426 2427 public: 2428 void work(uint worker_id) { 2429 // Since all available tasks are actually started, we should 2430 // only proceed if we're supposed to be actived. 2431 if (worker_id < _cm->active_tasks()) { 2432 CMTask* task = _cm->task(worker_id); 2433 task->record_start_time(); 2434 do { 2435 task->do_marking_step(1000000000.0 /* something very large */, 2436 true /* do_stealing */, 2437 true /* do_termination */); 2438 } while (task->has_aborted() && !_cm->has_overflown()); 2439 // If we overflow, then we do not want to restart. We instead 2440 // want to abort remark and do concurrent marking again. 2441 task->record_end_time(); 2442 } 2443 } 2444 2445 CMRemarkTask(ConcurrentMark* cm, int active_workers) : 2446 AbstractGangTask("Par Remark"), _cm(cm) { 2447 _cm->terminator()->reset_for_reuse(active_workers); 2448 } 2449 }; 2450 2451 void ConcurrentMark::checkpointRootsFinalWork() { 2452 ResourceMark rm; 2453 HandleMark hm; 2454 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2455 2456 g1h->ensure_parsability(false); 2457 2458 if (G1CollectedHeap::use_parallel_gc_threads()) { 2459 G1CollectedHeap::StrongRootsScope srs(g1h); 2460 // this is remark, so we'll use up all active threads 2461 uint active_workers = g1h->workers()->active_workers(); 2462 if (active_workers == 0) { 2463 assert(active_workers > 0, "Should have been set earlier"); 2464 active_workers = (uint) ParallelGCThreads; 2465 g1h->workers()->set_active_workers(active_workers); 2466 } 2467 set_phase(active_workers, false /* concurrent */); 2468 // Leave _parallel_marking_threads at it's 2469 // value originally calculated in the ConcurrentMark 2470 // constructor and pass values of the active workers 2471 // through the gang in the task. 2472 2473 CMRemarkTask remarkTask(this, active_workers); 2474 g1h->set_par_threads(active_workers); 2475 g1h->workers()->run_task(&remarkTask); 2476 g1h->set_par_threads(0); 2477 } else { 2478 G1CollectedHeap::StrongRootsScope srs(g1h); 2479 // this is remark, so we'll use up all available threads 2480 uint active_workers = 1; 2481 set_phase(active_workers, false /* concurrent */); 2482 2483 CMRemarkTask remarkTask(this, active_workers); 2484 // We will start all available threads, even if we decide that the 2485 // active_workers will be fewer. The extra ones will just bail out 2486 // immediately. 2487 remarkTask.work(0); 2488 } 2489 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2490 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2491 2492 print_stats(); 2493 2494 #if VERIFY_OBJS_PROCESSED 2495 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2496 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2497 _scan_obj_cl.objs_processed, 2498 ThreadLocalObjQueue::objs_enqueued); 2499 guarantee(_scan_obj_cl.objs_processed == 2500 ThreadLocalObjQueue::objs_enqueued, 2501 "Different number of objs processed and enqueued."); 2502 } 2503 #endif 2504 } 2505 2506 #ifndef PRODUCT 2507 2508 class PrintReachableOopClosure: public OopClosure { 2509 private: 2510 G1CollectedHeap* _g1h; 2511 outputStream* _out; 2512 VerifyOption _vo; 2513 bool _all; 2514 2515 public: 2516 PrintReachableOopClosure(outputStream* out, 2517 VerifyOption vo, 2518 bool all) : 2519 _g1h(G1CollectedHeap::heap()), 2520 _out(out), _vo(vo), _all(all) { } 2521 2522 void do_oop(narrowOop* p) { do_oop_work(p); } 2523 void do_oop( oop* p) { do_oop_work(p); } 2524 2525 template <class T> void do_oop_work(T* p) { 2526 oop obj = oopDesc::load_decode_heap_oop(p); 2527 const char* str = NULL; 2528 const char* str2 = ""; 2529 2530 if (obj == NULL) { 2531 str = ""; 2532 } else if (!_g1h->is_in_g1_reserved(obj)) { 2533 str = " O"; 2534 } else { 2535 HeapRegion* hr = _g1h->heap_region_containing(obj); 2536 guarantee(hr != NULL, "invariant"); 2537 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2538 bool marked = _g1h->is_marked(obj, _vo); 2539 2540 if (over_tams) { 2541 str = " >"; 2542 if (marked) { 2543 str2 = " AND MARKED"; 2544 } 2545 } else if (marked) { 2546 str = " M"; 2547 } else { 2548 str = " NOT"; 2549 } 2550 } 2551 2552 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2553 p, (void*) obj, str, str2); 2554 } 2555 }; 2556 2557 class PrintReachableObjectClosure : public ObjectClosure { 2558 private: 2559 G1CollectedHeap* _g1h; 2560 outputStream* _out; 2561 VerifyOption _vo; 2562 bool _all; 2563 HeapRegion* _hr; 2564 2565 public: 2566 PrintReachableObjectClosure(outputStream* out, 2567 VerifyOption vo, 2568 bool all, 2569 HeapRegion* hr) : 2570 _g1h(G1CollectedHeap::heap()), 2571 _out(out), _vo(vo), _all(all), _hr(hr) { } 2572 2573 void do_object(oop o) { 2574 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2575 bool marked = _g1h->is_marked(o, _vo); 2576 bool print_it = _all || over_tams || marked; 2577 2578 if (print_it) { 2579 _out->print_cr(" "PTR_FORMAT"%s", 2580 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2581 PrintReachableOopClosure oopCl(_out, _vo, _all); 2582 o->oop_iterate_no_header(&oopCl); 2583 } 2584 } 2585 }; 2586 2587 class PrintReachableRegionClosure : public HeapRegionClosure { 2588 private: 2589 G1CollectedHeap* _g1h; 2590 outputStream* _out; 2591 VerifyOption _vo; 2592 bool _all; 2593 2594 public: 2595 bool doHeapRegion(HeapRegion* hr) { 2596 HeapWord* b = hr->bottom(); 2597 HeapWord* e = hr->end(); 2598 HeapWord* t = hr->top(); 2599 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2600 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2601 "TAMS: "PTR_FORMAT, b, e, t, p); 2602 _out->cr(); 2603 2604 HeapWord* from = b; 2605 HeapWord* to = t; 2606 2607 if (to > from) { 2608 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2609 _out->cr(); 2610 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2611 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2612 _out->cr(); 2613 } 2614 2615 return false; 2616 } 2617 2618 PrintReachableRegionClosure(outputStream* out, 2619 VerifyOption vo, 2620 bool all) : 2621 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2622 }; 2623 2624 void ConcurrentMark::print_reachable(const char* str, 2625 VerifyOption vo, 2626 bool all) { 2627 gclog_or_tty->cr(); 2628 gclog_or_tty->print_cr("== Doing heap dump... "); 2629 2630 if (G1PrintReachableBaseFile == NULL) { 2631 gclog_or_tty->print_cr(" #### error: no base file defined"); 2632 return; 2633 } 2634 2635 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2636 (JVM_MAXPATHLEN - 1)) { 2637 gclog_or_tty->print_cr(" #### error: file name too long"); 2638 return; 2639 } 2640 2641 char file_name[JVM_MAXPATHLEN]; 2642 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2643 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2644 2645 fileStream fout(file_name); 2646 if (!fout.is_open()) { 2647 gclog_or_tty->print_cr(" #### error: could not open file"); 2648 return; 2649 } 2650 2651 outputStream* out = &fout; 2652 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2653 out->cr(); 2654 2655 out->print_cr("--- ITERATING OVER REGIONS"); 2656 out->cr(); 2657 PrintReachableRegionClosure rcl(out, vo, all); 2658 _g1h->heap_region_iterate(&rcl); 2659 out->cr(); 2660 2661 gclog_or_tty->print_cr(" done"); 2662 gclog_or_tty->flush(); 2663 } 2664 2665 #endif // PRODUCT 2666 2667 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2668 // Note we are overriding the read-only view of the prev map here, via 2669 // the cast. 2670 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2671 } 2672 2673 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2674 _nextMarkBitMap->clearRange(mr); 2675 } 2676 2677 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2678 clearRangePrevBitmap(mr); 2679 clearRangeNextBitmap(mr); 2680 } 2681 2682 HeapRegion* 2683 ConcurrentMark::claim_region(int task_num) { 2684 // "checkpoint" the finger 2685 HeapWord* finger = _finger; 2686 2687 // _heap_end will not change underneath our feet; it only changes at 2688 // yield points. 2689 while (finger < _heap_end) { 2690 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2691 2692 // Note on how this code handles humongous regions. In the 2693 // normal case the finger will reach the start of a "starts 2694 // humongous" (SH) region. Its end will either be the end of the 2695 // last "continues humongous" (CH) region in the sequence, or the 2696 // standard end of the SH region (if the SH is the only region in 2697 // the sequence). That way claim_region() will skip over the CH 2698 // regions. However, there is a subtle race between a CM thread 2699 // executing this method and a mutator thread doing a humongous 2700 // object allocation. The two are not mutually exclusive as the CM 2701 // thread does not need to hold the Heap_lock when it gets 2702 // here. So there is a chance that claim_region() will come across 2703 // a free region that's in the progress of becoming a SH or a CH 2704 // region. In the former case, it will either 2705 // a) Miss the update to the region's end, in which case it will 2706 // visit every subsequent CH region, will find their bitmaps 2707 // empty, and do nothing, or 2708 // b) Will observe the update of the region's end (in which case 2709 // it will skip the subsequent CH regions). 2710 // If it comes across a region that suddenly becomes CH, the 2711 // scenario will be similar to b). So, the race between 2712 // claim_region() and a humongous object allocation might force us 2713 // to do a bit of unnecessary work (due to some unnecessary bitmap 2714 // iterations) but it should not introduce and correctness issues. 2715 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2716 HeapWord* bottom = curr_region->bottom(); 2717 HeapWord* end = curr_region->end(); 2718 HeapWord* limit = curr_region->next_top_at_mark_start(); 2719 2720 if (verbose_low()) { 2721 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 2722 "["PTR_FORMAT", "PTR_FORMAT"), " 2723 "limit = "PTR_FORMAT, 2724 task_num, curr_region, bottom, end, limit); 2725 } 2726 2727 // Is the gap between reading the finger and doing the CAS too long? 2728 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2729 if (res == finger) { 2730 // we succeeded 2731 2732 // notice that _finger == end cannot be guaranteed here since, 2733 // someone else might have moved the finger even further 2734 assert(_finger >= end, "the finger should have moved forward"); 2735 2736 if (verbose_low()) { 2737 gclog_or_tty->print_cr("[%d] we were successful with region = " 2738 PTR_FORMAT, task_num, curr_region); 2739 } 2740 2741 if (limit > bottom) { 2742 if (verbose_low()) { 2743 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 2744 "returning it ", task_num, curr_region); 2745 } 2746 return curr_region; 2747 } else { 2748 assert(limit == bottom, 2749 "the region limit should be at bottom"); 2750 if (verbose_low()) { 2751 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 2752 "returning NULL", task_num, curr_region); 2753 } 2754 // we return NULL and the caller should try calling 2755 // claim_region() again. 2756 return NULL; 2757 } 2758 } else { 2759 assert(_finger > finger, "the finger should have moved forward"); 2760 if (verbose_low()) { 2761 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 2762 "global finger = "PTR_FORMAT", " 2763 "our finger = "PTR_FORMAT, 2764 task_num, _finger, finger); 2765 } 2766 2767 // read it again 2768 finger = _finger; 2769 } 2770 } 2771 2772 return NULL; 2773 } 2774 2775 #ifndef PRODUCT 2776 enum VerifyNoCSetOopsPhase { 2777 VerifyNoCSetOopsStack, 2778 VerifyNoCSetOopsQueues, 2779 VerifyNoCSetOopsSATBCompleted, 2780 VerifyNoCSetOopsSATBThread 2781 }; 2782 2783 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 2784 private: 2785 G1CollectedHeap* _g1h; 2786 VerifyNoCSetOopsPhase _phase; 2787 int _info; 2788 2789 const char* phase_str() { 2790 switch (_phase) { 2791 case VerifyNoCSetOopsStack: return "Stack"; 2792 case VerifyNoCSetOopsQueues: return "Queue"; 2793 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 2794 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 2795 default: ShouldNotReachHere(); 2796 } 2797 return NULL; 2798 } 2799 2800 void do_object_work(oop obj) { 2801 guarantee(!_g1h->obj_in_cs(obj), 2802 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 2803 (void*) obj, phase_str(), _info)); 2804 } 2805 2806 public: 2807 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 2808 2809 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 2810 _phase = phase; 2811 _info = info; 2812 } 2813 2814 virtual void do_oop(oop* p) { 2815 oop obj = oopDesc::load_decode_heap_oop(p); 2816 do_object_work(obj); 2817 } 2818 2819 virtual void do_oop(narrowOop* p) { 2820 // We should not come across narrow oops while scanning marking 2821 // stacks and SATB buffers. 2822 ShouldNotReachHere(); 2823 } 2824 2825 virtual void do_object(oop obj) { 2826 do_object_work(obj); 2827 } 2828 }; 2829 2830 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 2831 bool verify_enqueued_buffers, 2832 bool verify_thread_buffers, 2833 bool verify_fingers) { 2834 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2835 if (!G1CollectedHeap::heap()->mark_in_progress()) { 2836 return; 2837 } 2838 2839 VerifyNoCSetOopsClosure cl; 2840 2841 if (verify_stacks) { 2842 // Verify entries on the global mark stack 2843 cl.set_phase(VerifyNoCSetOopsStack); 2844 _markStack.oops_do(&cl); 2845 2846 // Verify entries on the task queues 2847 for (int i = 0; i < (int) _max_task_num; i += 1) { 2848 cl.set_phase(VerifyNoCSetOopsQueues, i); 2849 OopTaskQueue* queue = _task_queues->queue(i); 2850 queue->oops_do(&cl); 2851 } 2852 } 2853 2854 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 2855 2856 // Verify entries on the enqueued SATB buffers 2857 if (verify_enqueued_buffers) { 2858 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 2859 satb_qs.iterate_completed_buffers_read_only(&cl); 2860 } 2861 2862 // Verify entries on the per-thread SATB buffers 2863 if (verify_thread_buffers) { 2864 cl.set_phase(VerifyNoCSetOopsSATBThread); 2865 satb_qs.iterate_thread_buffers_read_only(&cl); 2866 } 2867 2868 if (verify_fingers) { 2869 // Verify the global finger 2870 HeapWord* global_finger = finger(); 2871 if (global_finger != NULL && global_finger < _heap_end) { 2872 // The global finger always points to a heap region boundary. We 2873 // use heap_region_containing_raw() to get the containing region 2874 // given that the global finger could be pointing to a free region 2875 // which subsequently becomes continues humongous. If that 2876 // happens, heap_region_containing() will return the bottom of the 2877 // corresponding starts humongous region and the check below will 2878 // not hold any more. 2879 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 2880 guarantee(global_finger == global_hr->bottom(), 2881 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 2882 global_finger, HR_FORMAT_PARAMS(global_hr))); 2883 } 2884 2885 // Verify the task fingers 2886 assert(parallel_marking_threads() <= _max_task_num, "sanity"); 2887 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 2888 CMTask* task = _tasks[i]; 2889 HeapWord* task_finger = task->finger(); 2890 if (task_finger != NULL && task_finger < _heap_end) { 2891 // See above note on the global finger verification. 2892 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 2893 guarantee(task_finger == task_hr->bottom() || 2894 !task_hr->in_collection_set(), 2895 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 2896 task_finger, HR_FORMAT_PARAMS(task_hr))); 2897 } 2898 } 2899 } 2900 } 2901 #endif // PRODUCT 2902 2903 void ConcurrentMark::clear_marking_state(bool clear_overflow) { 2904 _markStack.setEmpty(); 2905 _markStack.clear_overflow(); 2906 if (clear_overflow) { 2907 clear_has_overflown(); 2908 } else { 2909 assert(has_overflown(), "pre-condition"); 2910 } 2911 _finger = _heap_start; 2912 2913 for (int i = 0; i < (int)_max_task_num; ++i) { 2914 OopTaskQueue* queue = _task_queues->queue(i); 2915 queue->set_empty(); 2916 } 2917 } 2918 2919 // Aggregate the counting data that was constructed concurrently 2920 // with marking. 2921 class AggregateCountDataHRClosure: public HeapRegionClosure { 2922 ConcurrentMark* _cm; 2923 BitMap* _cm_card_bm; 2924 size_t _max_task_num; 2925 2926 public: 2927 AggregateCountDataHRClosure(ConcurrentMark *cm, 2928 BitMap* cm_card_bm, 2929 size_t max_task_num) : 2930 _cm(cm), _cm_card_bm(cm_card_bm), 2931 _max_task_num(max_task_num) { } 2932 2933 bool is_card_aligned(HeapWord* p) { 2934 return ((uintptr_t(p) & (CardTableModRefBS::card_size - 1)) == 0); 2935 } 2936 2937 bool doHeapRegion(HeapRegion* hr) { 2938 if (hr->continuesHumongous()) { 2939 // We will ignore these here and process them when their 2940 // associated "starts humongous" region is processed. 2941 // Note that we cannot rely on their associated 2942 // "starts humongous" region to have their bit set to 1 2943 // since, due to the region chunking in the parallel region 2944 // iteration, a "continues humongous" region might be visited 2945 // before its associated "starts humongous". 2946 return false; 2947 } 2948 2949 HeapWord* start = hr->bottom(); 2950 HeapWord* limit = hr->next_top_at_mark_start(); 2951 HeapWord* end = hr->end(); 2952 2953 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2954 err_msg("Preconditions not met - " 2955 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 2956 "top: "PTR_FORMAT", end: "PTR_FORMAT, 2957 start, limit, hr->top(), hr->end())); 2958 2959 assert(hr->next_marked_bytes() == 0, "Precondition"); 2960 2961 if (start == limit) { 2962 // NTAMS of this region has not been set so nothing to do. 2963 return false; 2964 } 2965 2966 assert(is_card_aligned(start), "sanity"); 2967 assert(is_card_aligned(end), "sanity"); 2968 2969 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2970 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2971 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2972 2973 // If ntams is not card aligned then we bump the index for 2974 // limit so that we get the card spanning ntams. 2975 if (!is_card_aligned(limit)) { 2976 limit_idx += 1; 2977 } 2978 2979 assert(limit_idx <= end_idx, "or else use atomics"); 2980 2981 // Aggregate the "stripe" in the count data associated with hr. 2982 uint hrs_index = hr->hrs_index(); 2983 size_t marked_bytes = 0; 2984 2985 for (int i = 0; (size_t)i < _max_task_num; i += 1) { 2986 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2987 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2988 2989 // Fetch the marked_bytes in this region for task i and 2990 // add it to the running total for this region. 2991 marked_bytes += marked_bytes_array[hrs_index]; 2992 2993 // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx) 2994 // into the global card bitmap. 2995 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2996 2997 while (scan_idx < limit_idx) { 2998 assert(task_card_bm->at(scan_idx) == true, "should be"); 2999 _cm_card_bm->set_bit(scan_idx); 3000 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 3001 3002 // BitMap::get_next_one_offset() can handle the case when 3003 // its left_offset parameter is greater than its right_offset 3004 // parameter. If does, however, have an early exit if 3005 // left_offset == right_offset. So let's limit the value 3006 // passed in for left offset here. 3007 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 3008 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 3009 } 3010 } 3011 3012 // Update the marked bytes for this region. 3013 hr->add_to_marked_bytes(marked_bytes); 3014 3015 // Next heap region 3016 return false; 3017 } 3018 }; 3019 3020 class G1AggregateCountDataTask: public AbstractGangTask { 3021 protected: 3022 G1CollectedHeap* _g1h; 3023 ConcurrentMark* _cm; 3024 BitMap* _cm_card_bm; 3025 size_t _max_task_num; 3026 int _active_workers; 3027 3028 public: 3029 G1AggregateCountDataTask(G1CollectedHeap* g1h, 3030 ConcurrentMark* cm, 3031 BitMap* cm_card_bm, 3032 size_t max_task_num, 3033 int n_workers) : 3034 AbstractGangTask("Count Aggregation"), 3035 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 3036 _max_task_num(max_task_num), 3037 _active_workers(n_workers) { } 3038 3039 void work(uint worker_id) { 3040 AggregateCountDataHRClosure cl(_cm, _cm_card_bm, _max_task_num); 3041 3042 if (G1CollectedHeap::use_parallel_gc_threads()) { 3043 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 3044 _active_workers, 3045 HeapRegion::AggregateCountClaimValue); 3046 } else { 3047 _g1h->heap_region_iterate(&cl); 3048 } 3049 } 3050 }; 3051 3052 3053 void ConcurrentMark::aggregate_count_data() { 3054 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3055 _g1h->workers()->active_workers() : 3056 1); 3057 3058 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3059 _max_task_num, n_workers); 3060 3061 if (G1CollectedHeap::use_parallel_gc_threads()) { 3062 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3063 "sanity check"); 3064 _g1h->set_par_threads(n_workers); 3065 _g1h->workers()->run_task(&g1_par_agg_task); 3066 _g1h->set_par_threads(0); 3067 3068 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3069 "sanity check"); 3070 _g1h->reset_heap_region_claim_values(); 3071 } else { 3072 g1_par_agg_task.work(0); 3073 } 3074 } 3075 3076 // Clear the per-worker arrays used to store the per-region counting data 3077 void ConcurrentMark::clear_all_count_data() { 3078 // Clear the global card bitmap - it will be filled during 3079 // liveness count aggregation (during remark) and the 3080 // final counting task. 3081 _card_bm.clear(); 3082 3083 // Clear the global region bitmap - it will be filled as part 3084 // of the final counting task. 3085 _region_bm.clear(); 3086 3087 uint max_regions = _g1h->max_regions(); 3088 assert(_max_task_num != 0, "unitialized"); 3089 3090 for (int i = 0; (size_t) i < _max_task_num; i += 1) { 3091 BitMap* task_card_bm = count_card_bitmap_for(i); 3092 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3093 3094 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3095 assert(marked_bytes_array != NULL, "uninitialized"); 3096 3097 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3098 task_card_bm->clear(); 3099 } 3100 } 3101 3102 void ConcurrentMark::print_stats() { 3103 if (verbose_stats()) { 3104 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3105 for (size_t i = 0; i < _active_tasks; ++i) { 3106 _tasks[i]->print_stats(); 3107 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3108 } 3109 } 3110 } 3111 3112 // abandon current marking iteration due to a Full GC 3113 void ConcurrentMark::abort() { 3114 // Clear all marks to force marking thread to do nothing 3115 _nextMarkBitMap->clearAll(); 3116 // Clear the liveness counting data 3117 clear_all_count_data(); 3118 // Empty mark stack 3119 clear_marking_state(); 3120 for (int i = 0; i < (int)_max_task_num; ++i) { 3121 _tasks[i]->clear_region_fields(); 3122 } 3123 _has_aborted = true; 3124 3125 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3126 satb_mq_set.abandon_partial_marking(); 3127 // This can be called either during or outside marking, we'll read 3128 // the expected_active value from the SATB queue set. 3129 satb_mq_set.set_active_all_threads( 3130 false, /* new active value */ 3131 satb_mq_set.is_active() /* expected_active */); 3132 } 3133 3134 static void print_ms_time_info(const char* prefix, const char* name, 3135 NumberSeq& ns) { 3136 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3137 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3138 if (ns.num() > 0) { 3139 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3140 prefix, ns.sd(), ns.maximum()); 3141 } 3142 } 3143 3144 void ConcurrentMark::print_summary_info() { 3145 gclog_or_tty->print_cr(" Concurrent marking:"); 3146 print_ms_time_info(" ", "init marks", _init_times); 3147 print_ms_time_info(" ", "remarks", _remark_times); 3148 { 3149 print_ms_time_info(" ", "final marks", _remark_mark_times); 3150 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3151 3152 } 3153 print_ms_time_info(" ", "cleanups", _cleanup_times); 3154 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3155 _total_counting_time, 3156 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3157 (double)_cleanup_times.num() 3158 : 0.0)); 3159 if (G1ScrubRemSets) { 3160 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3161 _total_rs_scrub_time, 3162 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3163 (double)_cleanup_times.num() 3164 : 0.0)); 3165 } 3166 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3167 (_init_times.sum() + _remark_times.sum() + 3168 _cleanup_times.sum())/1000.0); 3169 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3170 "(%8.2f s marking).", 3171 cmThread()->vtime_accum(), 3172 cmThread()->vtime_mark_accum()); 3173 } 3174 3175 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3176 _parallel_workers->print_worker_threads_on(st); 3177 } 3178 3179 // We take a break if someone is trying to stop the world. 3180 bool ConcurrentMark::do_yield_check(uint worker_id) { 3181 if (should_yield()) { 3182 if (worker_id == 0) { 3183 _g1h->g1_policy()->record_concurrent_pause(); 3184 } 3185 cmThread()->yield(); 3186 return true; 3187 } else { 3188 return false; 3189 } 3190 } 3191 3192 bool ConcurrentMark::should_yield() { 3193 return cmThread()->should_yield(); 3194 } 3195 3196 bool ConcurrentMark::containing_card_is_marked(void* p) { 3197 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3198 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3199 } 3200 3201 bool ConcurrentMark::containing_cards_are_marked(void* start, 3202 void* last) { 3203 return containing_card_is_marked(start) && 3204 containing_card_is_marked(last); 3205 } 3206 3207 #ifndef PRODUCT 3208 // for debugging purposes 3209 void ConcurrentMark::print_finger() { 3210 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3211 _heap_start, _heap_end, _finger); 3212 for (int i = 0; i < (int) _max_task_num; ++i) { 3213 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 3214 } 3215 gclog_or_tty->print_cr(""); 3216 } 3217 #endif 3218 3219 void CMTask::scan_object(oop obj) { 3220 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3221 3222 if (_cm->verbose_high()) { 3223 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT, 3224 _task_id, (void*) obj); 3225 } 3226 3227 size_t obj_size = obj->size(); 3228 _words_scanned += obj_size; 3229 3230 obj->oop_iterate(_cm_oop_closure); 3231 statsOnly( ++_objs_scanned ); 3232 check_limits(); 3233 } 3234 3235 // Closure for iteration over bitmaps 3236 class CMBitMapClosure : public BitMapClosure { 3237 private: 3238 // the bitmap that is being iterated over 3239 CMBitMap* _nextMarkBitMap; 3240 ConcurrentMark* _cm; 3241 CMTask* _task; 3242 3243 public: 3244 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3245 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3246 3247 bool do_bit(size_t offset) { 3248 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3249 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3250 assert( addr < _cm->finger(), "invariant"); 3251 3252 statsOnly( _task->increase_objs_found_on_bitmap() ); 3253 assert(addr >= _task->finger(), "invariant"); 3254 3255 // We move that task's local finger along. 3256 _task->move_finger_to(addr); 3257 3258 _task->scan_object(oop(addr)); 3259 // we only partially drain the local queue and global stack 3260 _task->drain_local_queue(true); 3261 _task->drain_global_stack(true); 3262 3263 // if the has_aborted flag has been raised, we need to bail out of 3264 // the iteration 3265 return !_task->has_aborted(); 3266 } 3267 }; 3268 3269 // Closure for iterating over objects, currently only used for 3270 // processing SATB buffers. 3271 class CMObjectClosure : public ObjectClosure { 3272 private: 3273 CMTask* _task; 3274 3275 public: 3276 void do_object(oop obj) { 3277 _task->deal_with_reference(obj); 3278 } 3279 3280 CMObjectClosure(CMTask* task) : _task(task) { } 3281 }; 3282 3283 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3284 ConcurrentMark* cm, 3285 CMTask* task) 3286 : _g1h(g1h), _cm(cm), _task(task) { 3287 assert(_ref_processor == NULL, "should be initialized to NULL"); 3288 3289 if (G1UseConcMarkReferenceProcessing) { 3290 _ref_processor = g1h->ref_processor_cm(); 3291 assert(_ref_processor != NULL, "should not be NULL"); 3292 } 3293 } 3294 3295 void CMTask::setup_for_region(HeapRegion* hr) { 3296 // Separated the asserts so that we know which one fires. 3297 assert(hr != NULL, 3298 "claim_region() should have filtered out continues humongous regions"); 3299 assert(!hr->continuesHumongous(), 3300 "claim_region() should have filtered out continues humongous regions"); 3301 3302 if (_cm->verbose_low()) { 3303 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 3304 _task_id, hr); 3305 } 3306 3307 _curr_region = hr; 3308 _finger = hr->bottom(); 3309 update_region_limit(); 3310 } 3311 3312 void CMTask::update_region_limit() { 3313 HeapRegion* hr = _curr_region; 3314 HeapWord* bottom = hr->bottom(); 3315 HeapWord* limit = hr->next_top_at_mark_start(); 3316 3317 if (limit == bottom) { 3318 if (_cm->verbose_low()) { 3319 gclog_or_tty->print_cr("[%d] found an empty region " 3320 "["PTR_FORMAT", "PTR_FORMAT")", 3321 _task_id, bottom, limit); 3322 } 3323 // The region was collected underneath our feet. 3324 // We set the finger to bottom to ensure that the bitmap 3325 // iteration that will follow this will not do anything. 3326 // (this is not a condition that holds when we set the region up, 3327 // as the region is not supposed to be empty in the first place) 3328 _finger = bottom; 3329 } else if (limit >= _region_limit) { 3330 assert(limit >= _finger, "peace of mind"); 3331 } else { 3332 assert(limit < _region_limit, "only way to get here"); 3333 // This can happen under some pretty unusual circumstances. An 3334 // evacuation pause empties the region underneath our feet (NTAMS 3335 // at bottom). We then do some allocation in the region (NTAMS 3336 // stays at bottom), followed by the region being used as a GC 3337 // alloc region (NTAMS will move to top() and the objects 3338 // originally below it will be grayed). All objects now marked in 3339 // the region are explicitly grayed, if below the global finger, 3340 // and we do not need in fact to scan anything else. So, we simply 3341 // set _finger to be limit to ensure that the bitmap iteration 3342 // doesn't do anything. 3343 _finger = limit; 3344 } 3345 3346 _region_limit = limit; 3347 } 3348 3349 void CMTask::giveup_current_region() { 3350 assert(_curr_region != NULL, "invariant"); 3351 if (_cm->verbose_low()) { 3352 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 3353 _task_id, _curr_region); 3354 } 3355 clear_region_fields(); 3356 } 3357 3358 void CMTask::clear_region_fields() { 3359 // Values for these three fields that indicate that we're not 3360 // holding on to a region. 3361 _curr_region = NULL; 3362 _finger = NULL; 3363 _region_limit = NULL; 3364 } 3365 3366 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3367 if (cm_oop_closure == NULL) { 3368 assert(_cm_oop_closure != NULL, "invariant"); 3369 } else { 3370 assert(_cm_oop_closure == NULL, "invariant"); 3371 } 3372 _cm_oop_closure = cm_oop_closure; 3373 } 3374 3375 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3376 guarantee(nextMarkBitMap != NULL, "invariant"); 3377 3378 if (_cm->verbose_low()) { 3379 gclog_or_tty->print_cr("[%d] resetting", _task_id); 3380 } 3381 3382 _nextMarkBitMap = nextMarkBitMap; 3383 clear_region_fields(); 3384 3385 _calls = 0; 3386 _elapsed_time_ms = 0.0; 3387 _termination_time_ms = 0.0; 3388 _termination_start_time_ms = 0.0; 3389 3390 #if _MARKING_STATS_ 3391 _local_pushes = 0; 3392 _local_pops = 0; 3393 _local_max_size = 0; 3394 _objs_scanned = 0; 3395 _global_pushes = 0; 3396 _global_pops = 0; 3397 _global_max_size = 0; 3398 _global_transfers_to = 0; 3399 _global_transfers_from = 0; 3400 _regions_claimed = 0; 3401 _objs_found_on_bitmap = 0; 3402 _satb_buffers_processed = 0; 3403 _steal_attempts = 0; 3404 _steals = 0; 3405 _aborted = 0; 3406 _aborted_overflow = 0; 3407 _aborted_cm_aborted = 0; 3408 _aborted_yield = 0; 3409 _aborted_timed_out = 0; 3410 _aborted_satb = 0; 3411 _aborted_termination = 0; 3412 #endif // _MARKING_STATS_ 3413 } 3414 3415 bool CMTask::should_exit_termination() { 3416 regular_clock_call(); 3417 // This is called when we are in the termination protocol. We should 3418 // quit if, for some reason, this task wants to abort or the global 3419 // stack is not empty (this means that we can get work from it). 3420 return !_cm->mark_stack_empty() || has_aborted(); 3421 } 3422 3423 void CMTask::reached_limit() { 3424 assert(_words_scanned >= _words_scanned_limit || 3425 _refs_reached >= _refs_reached_limit , 3426 "shouldn't have been called otherwise"); 3427 regular_clock_call(); 3428 } 3429 3430 void CMTask::regular_clock_call() { 3431 if (has_aborted()) return; 3432 3433 // First, we need to recalculate the words scanned and refs reached 3434 // limits for the next clock call. 3435 recalculate_limits(); 3436 3437 // During the regular clock call we do the following 3438 3439 // (1) If an overflow has been flagged, then we abort. 3440 if (_cm->has_overflown()) { 3441 set_has_aborted(); 3442 return; 3443 } 3444 3445 // If we are not concurrent (i.e. we're doing remark) we don't need 3446 // to check anything else. The other steps are only needed during 3447 // the concurrent marking phase. 3448 if (!concurrent()) return; 3449 3450 // (2) If marking has been aborted for Full GC, then we also abort. 3451 if (_cm->has_aborted()) { 3452 set_has_aborted(); 3453 statsOnly( ++_aborted_cm_aborted ); 3454 return; 3455 } 3456 3457 double curr_time_ms = os::elapsedVTime() * 1000.0; 3458 3459 // (3) If marking stats are enabled, then we update the step history. 3460 #if _MARKING_STATS_ 3461 if (_words_scanned >= _words_scanned_limit) { 3462 ++_clock_due_to_scanning; 3463 } 3464 if (_refs_reached >= _refs_reached_limit) { 3465 ++_clock_due_to_marking; 3466 } 3467 3468 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3469 _interval_start_time_ms = curr_time_ms; 3470 _all_clock_intervals_ms.add(last_interval_ms); 3471 3472 if (_cm->verbose_medium()) { 3473 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 3474 "scanned = %d%s, refs reached = %d%s", 3475 _task_id, last_interval_ms, 3476 _words_scanned, 3477 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3478 _refs_reached, 3479 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3480 } 3481 #endif // _MARKING_STATS_ 3482 3483 // (4) We check whether we should yield. If we have to, then we abort. 3484 if (_cm->should_yield()) { 3485 // We should yield. To do this we abort the task. The caller is 3486 // responsible for yielding. 3487 set_has_aborted(); 3488 statsOnly( ++_aborted_yield ); 3489 return; 3490 } 3491 3492 // (5) We check whether we've reached our time quota. If we have, 3493 // then we abort. 3494 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3495 if (elapsed_time_ms > _time_target_ms) { 3496 set_has_aborted(); 3497 _has_timed_out = true; 3498 statsOnly( ++_aborted_timed_out ); 3499 return; 3500 } 3501 3502 // (6) Finally, we check whether there are enough completed STAB 3503 // buffers available for processing. If there are, we abort. 3504 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3505 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3506 if (_cm->verbose_low()) { 3507 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 3508 _task_id); 3509 } 3510 // we do need to process SATB buffers, we'll abort and restart 3511 // the marking task to do so 3512 set_has_aborted(); 3513 statsOnly( ++_aborted_satb ); 3514 return; 3515 } 3516 } 3517 3518 void CMTask::recalculate_limits() { 3519 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3520 _words_scanned_limit = _real_words_scanned_limit; 3521 3522 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3523 _refs_reached_limit = _real_refs_reached_limit; 3524 } 3525 3526 void CMTask::decrease_limits() { 3527 // This is called when we believe that we're going to do an infrequent 3528 // operation which will increase the per byte scanned cost (i.e. move 3529 // entries to/from the global stack). It basically tries to decrease the 3530 // scanning limit so that the clock is called earlier. 3531 3532 if (_cm->verbose_medium()) { 3533 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 3534 } 3535 3536 _words_scanned_limit = _real_words_scanned_limit - 3537 3 * words_scanned_period / 4; 3538 _refs_reached_limit = _real_refs_reached_limit - 3539 3 * refs_reached_period / 4; 3540 } 3541 3542 void CMTask::move_entries_to_global_stack() { 3543 // local array where we'll store the entries that will be popped 3544 // from the local queue 3545 oop buffer[global_stack_transfer_size]; 3546 3547 int n = 0; 3548 oop obj; 3549 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3550 buffer[n] = obj; 3551 ++n; 3552 } 3553 3554 if (n > 0) { 3555 // we popped at least one entry from the local queue 3556 3557 statsOnly( ++_global_transfers_to; _local_pops += n ); 3558 3559 if (!_cm->mark_stack_push(buffer, n)) { 3560 if (_cm->verbose_low()) { 3561 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", 3562 _task_id); 3563 } 3564 set_has_aborted(); 3565 } else { 3566 // the transfer was successful 3567 3568 if (_cm->verbose_medium()) { 3569 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 3570 _task_id, n); 3571 } 3572 statsOnly( int tmp_size = _cm->mark_stack_size(); 3573 if (tmp_size > _global_max_size) { 3574 _global_max_size = tmp_size; 3575 } 3576 _global_pushes += n ); 3577 } 3578 } 3579 3580 // this operation was quite expensive, so decrease the limits 3581 decrease_limits(); 3582 } 3583 3584 void CMTask::get_entries_from_global_stack() { 3585 // local array where we'll store the entries that will be popped 3586 // from the global stack. 3587 oop buffer[global_stack_transfer_size]; 3588 int n; 3589 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3590 assert(n <= global_stack_transfer_size, 3591 "we should not pop more than the given limit"); 3592 if (n > 0) { 3593 // yes, we did actually pop at least one entry 3594 3595 statsOnly( ++_global_transfers_from; _global_pops += n ); 3596 if (_cm->verbose_medium()) { 3597 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 3598 _task_id, n); 3599 } 3600 for (int i = 0; i < n; ++i) { 3601 bool success = _task_queue->push(buffer[i]); 3602 // We only call this when the local queue is empty or under a 3603 // given target limit. So, we do not expect this push to fail. 3604 assert(success, "invariant"); 3605 } 3606 3607 statsOnly( int tmp_size = _task_queue->size(); 3608 if (tmp_size > _local_max_size) { 3609 _local_max_size = tmp_size; 3610 } 3611 _local_pushes += n ); 3612 } 3613 3614 // this operation was quite expensive, so decrease the limits 3615 decrease_limits(); 3616 } 3617 3618 void CMTask::drain_local_queue(bool partially) { 3619 if (has_aborted()) return; 3620 3621 // Decide what the target size is, depending whether we're going to 3622 // drain it partially (so that other tasks can steal if they run out 3623 // of things to do) or totally (at the very end). 3624 size_t target_size; 3625 if (partially) { 3626 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3627 } else { 3628 target_size = 0; 3629 } 3630 3631 if (_task_queue->size() > target_size) { 3632 if (_cm->verbose_high()) { 3633 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 3634 _task_id, target_size); 3635 } 3636 3637 oop obj; 3638 bool ret = _task_queue->pop_local(obj); 3639 while (ret) { 3640 statsOnly( ++_local_pops ); 3641 3642 if (_cm->verbose_high()) { 3643 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 3644 (void*) obj); 3645 } 3646 3647 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3648 assert(!_g1h->is_on_master_free_list( 3649 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3650 3651 scan_object(obj); 3652 3653 if (_task_queue->size() <= target_size || has_aborted()) { 3654 ret = false; 3655 } else { 3656 ret = _task_queue->pop_local(obj); 3657 } 3658 } 3659 3660 if (_cm->verbose_high()) { 3661 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 3662 _task_id, _task_queue->size()); 3663 } 3664 } 3665 } 3666 3667 void CMTask::drain_global_stack(bool partially) { 3668 if (has_aborted()) return; 3669 3670 // We have a policy to drain the local queue before we attempt to 3671 // drain the global stack. 3672 assert(partially || _task_queue->size() == 0, "invariant"); 3673 3674 // Decide what the target size is, depending whether we're going to 3675 // drain it partially (so that other tasks can steal if they run out 3676 // of things to do) or totally (at the very end). Notice that, 3677 // because we move entries from the global stack in chunks or 3678 // because another task might be doing the same, we might in fact 3679 // drop below the target. But, this is not a problem. 3680 size_t target_size; 3681 if (partially) { 3682 target_size = _cm->partial_mark_stack_size_target(); 3683 } else { 3684 target_size = 0; 3685 } 3686 3687 if (_cm->mark_stack_size() > target_size) { 3688 if (_cm->verbose_low()) { 3689 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 3690 _task_id, target_size); 3691 } 3692 3693 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3694 get_entries_from_global_stack(); 3695 drain_local_queue(partially); 3696 } 3697 3698 if (_cm->verbose_low()) { 3699 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 3700 _task_id, _cm->mark_stack_size()); 3701 } 3702 } 3703 } 3704 3705 // SATB Queue has several assumptions on whether to call the par or 3706 // non-par versions of the methods. this is why some of the code is 3707 // replicated. We should really get rid of the single-threaded version 3708 // of the code to simplify things. 3709 void CMTask::drain_satb_buffers() { 3710 if (has_aborted()) return; 3711 3712 // We set this so that the regular clock knows that we're in the 3713 // middle of draining buffers and doesn't set the abort flag when it 3714 // notices that SATB buffers are available for draining. It'd be 3715 // very counter productive if it did that. :-) 3716 _draining_satb_buffers = true; 3717 3718 CMObjectClosure oc(this); 3719 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3720 if (G1CollectedHeap::use_parallel_gc_threads()) { 3721 satb_mq_set.set_par_closure(_task_id, &oc); 3722 } else { 3723 satb_mq_set.set_closure(&oc); 3724 } 3725 3726 // This keeps claiming and applying the closure to completed buffers 3727 // until we run out of buffers or we need to abort. 3728 if (G1CollectedHeap::use_parallel_gc_threads()) { 3729 while (!has_aborted() && 3730 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 3731 if (_cm->verbose_medium()) { 3732 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3733 } 3734 statsOnly( ++_satb_buffers_processed ); 3735 regular_clock_call(); 3736 } 3737 } else { 3738 while (!has_aborted() && 3739 satb_mq_set.apply_closure_to_completed_buffer()) { 3740 if (_cm->verbose_medium()) { 3741 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3742 } 3743 statsOnly( ++_satb_buffers_processed ); 3744 regular_clock_call(); 3745 } 3746 } 3747 3748 if (!concurrent() && !has_aborted()) { 3749 // We should only do this during remark. 3750 if (G1CollectedHeap::use_parallel_gc_threads()) { 3751 satb_mq_set.par_iterate_closure_all_threads(_task_id); 3752 } else { 3753 satb_mq_set.iterate_closure_all_threads(); 3754 } 3755 } 3756 3757 _draining_satb_buffers = false; 3758 3759 assert(has_aborted() || 3760 concurrent() || 3761 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3762 3763 if (G1CollectedHeap::use_parallel_gc_threads()) { 3764 satb_mq_set.set_par_closure(_task_id, NULL); 3765 } else { 3766 satb_mq_set.set_closure(NULL); 3767 } 3768 3769 // again, this was a potentially expensive operation, decrease the 3770 // limits to get the regular clock call early 3771 decrease_limits(); 3772 } 3773 3774 void CMTask::print_stats() { 3775 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 3776 _task_id, _calls); 3777 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3778 _elapsed_time_ms, _termination_time_ms); 3779 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3780 _step_times_ms.num(), _step_times_ms.avg(), 3781 _step_times_ms.sd()); 3782 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3783 _step_times_ms.maximum(), _step_times_ms.sum()); 3784 3785 #if _MARKING_STATS_ 3786 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3787 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3788 _all_clock_intervals_ms.sd()); 3789 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3790 _all_clock_intervals_ms.maximum(), 3791 _all_clock_intervals_ms.sum()); 3792 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3793 _clock_due_to_scanning, _clock_due_to_marking); 3794 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3795 _objs_scanned, _objs_found_on_bitmap); 3796 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3797 _local_pushes, _local_pops, _local_max_size); 3798 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3799 _global_pushes, _global_pops, _global_max_size); 3800 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3801 _global_transfers_to,_global_transfers_from); 3802 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 3803 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3804 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3805 _steal_attempts, _steals); 3806 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3807 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3808 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3809 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3810 _aborted_timed_out, _aborted_satb, _aborted_termination); 3811 #endif // _MARKING_STATS_ 3812 } 3813 3814 /***************************************************************************** 3815 3816 The do_marking_step(time_target_ms) method is the building block 3817 of the parallel marking framework. It can be called in parallel 3818 with other invocations of do_marking_step() on different tasks 3819 (but only one per task, obviously) and concurrently with the 3820 mutator threads, or during remark, hence it eliminates the need 3821 for two versions of the code. When called during remark, it will 3822 pick up from where the task left off during the concurrent marking 3823 phase. Interestingly, tasks are also claimable during evacuation 3824 pauses too, since do_marking_step() ensures that it aborts before 3825 it needs to yield. 3826 3827 The data structures that is uses to do marking work are the 3828 following: 3829 3830 (1) Marking Bitmap. If there are gray objects that appear only 3831 on the bitmap (this happens either when dealing with an overflow 3832 or when the initial marking phase has simply marked the roots 3833 and didn't push them on the stack), then tasks claim heap 3834 regions whose bitmap they then scan to find gray objects. A 3835 global finger indicates where the end of the last claimed region 3836 is. A local finger indicates how far into the region a task has 3837 scanned. The two fingers are used to determine how to gray an 3838 object (i.e. whether simply marking it is OK, as it will be 3839 visited by a task in the future, or whether it needs to be also 3840 pushed on a stack). 3841 3842 (2) Local Queue. The local queue of the task which is accessed 3843 reasonably efficiently by the task. Other tasks can steal from 3844 it when they run out of work. Throughout the marking phase, a 3845 task attempts to keep its local queue short but not totally 3846 empty, so that entries are available for stealing by other 3847 tasks. Only when there is no more work, a task will totally 3848 drain its local queue. 3849 3850 (3) Global Mark Stack. This handles local queue overflow. During 3851 marking only sets of entries are moved between it and the local 3852 queues, as access to it requires a mutex and more fine-grain 3853 interaction with it which might cause contention. If it 3854 overflows, then the marking phase should restart and iterate 3855 over the bitmap to identify gray objects. Throughout the marking 3856 phase, tasks attempt to keep the global mark stack at a small 3857 length but not totally empty, so that entries are available for 3858 popping by other tasks. Only when there is no more work, tasks 3859 will totally drain the global mark stack. 3860 3861 (4) SATB Buffer Queue. This is where completed SATB buffers are 3862 made available. Buffers are regularly removed from this queue 3863 and scanned for roots, so that the queue doesn't get too 3864 long. During remark, all completed buffers are processed, as 3865 well as the filled in parts of any uncompleted buffers. 3866 3867 The do_marking_step() method tries to abort when the time target 3868 has been reached. There are a few other cases when the 3869 do_marking_step() method also aborts: 3870 3871 (1) When the marking phase has been aborted (after a Full GC). 3872 3873 (2) When a global overflow (on the global stack) has been 3874 triggered. Before the task aborts, it will actually sync up with 3875 the other tasks to ensure that all the marking data structures 3876 (local queues, stacks, fingers etc.) are re-initialised so that 3877 when do_marking_step() completes, the marking phase can 3878 immediately restart. 3879 3880 (3) When enough completed SATB buffers are available. The 3881 do_marking_step() method only tries to drain SATB buffers right 3882 at the beginning. So, if enough buffers are available, the 3883 marking step aborts and the SATB buffers are processed at 3884 the beginning of the next invocation. 3885 3886 (4) To yield. when we have to yield then we abort and yield 3887 right at the end of do_marking_step(). This saves us from a lot 3888 of hassle as, by yielding we might allow a Full GC. If this 3889 happens then objects will be compacted underneath our feet, the 3890 heap might shrink, etc. We save checking for this by just 3891 aborting and doing the yield right at the end. 3892 3893 From the above it follows that the do_marking_step() method should 3894 be called in a loop (or, otherwise, regularly) until it completes. 3895 3896 If a marking step completes without its has_aborted() flag being 3897 true, it means it has completed the current marking phase (and 3898 also all other marking tasks have done so and have all synced up). 3899 3900 A method called regular_clock_call() is invoked "regularly" (in 3901 sub ms intervals) throughout marking. It is this clock method that 3902 checks all the abort conditions which were mentioned above and 3903 decides when the task should abort. A work-based scheme is used to 3904 trigger this clock method: when the number of object words the 3905 marking phase has scanned or the number of references the marking 3906 phase has visited reach a given limit. Additional invocations to 3907 the method clock have been planted in a few other strategic places 3908 too. The initial reason for the clock method was to avoid calling 3909 vtime too regularly, as it is quite expensive. So, once it was in 3910 place, it was natural to piggy-back all the other conditions on it 3911 too and not constantly check them throughout the code. 3912 3913 *****************************************************************************/ 3914 3915 void CMTask::do_marking_step(double time_target_ms, 3916 bool do_stealing, 3917 bool do_termination) { 3918 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3919 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3920 3921 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3922 assert(_task_queues != NULL, "invariant"); 3923 assert(_task_queue != NULL, "invariant"); 3924 assert(_task_queues->queue(_task_id) == _task_queue, "invariant"); 3925 3926 assert(!_claimed, 3927 "only one thread should claim this task at any one time"); 3928 3929 // OK, this doesn't safeguard again all possible scenarios, as it is 3930 // possible for two threads to set the _claimed flag at the same 3931 // time. But it is only for debugging purposes anyway and it will 3932 // catch most problems. 3933 _claimed = true; 3934 3935 _start_time_ms = os::elapsedVTime() * 1000.0; 3936 statsOnly( _interval_start_time_ms = _start_time_ms ); 3937 3938 double diff_prediction_ms = 3939 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 3940 _time_target_ms = time_target_ms - diff_prediction_ms; 3941 3942 // set up the variables that are used in the work-based scheme to 3943 // call the regular clock method 3944 _words_scanned = 0; 3945 _refs_reached = 0; 3946 recalculate_limits(); 3947 3948 // clear all flags 3949 clear_has_aborted(); 3950 _has_timed_out = false; 3951 _draining_satb_buffers = false; 3952 3953 ++_calls; 3954 3955 if (_cm->verbose_low()) { 3956 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 3957 "target = %1.2lfms >>>>>>>>>>", 3958 _task_id, _calls, _time_target_ms); 3959 } 3960 3961 // Set up the bitmap and oop closures. Anything that uses them is 3962 // eventually called from this method, so it is OK to allocate these 3963 // statically. 3964 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3965 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3966 set_cm_oop_closure(&cm_oop_closure); 3967 3968 if (_cm->has_overflown()) { 3969 // This can happen if the mark stack overflows during a GC pause 3970 // and this task, after a yield point, restarts. We have to abort 3971 // as we need to get into the overflow protocol which happens 3972 // right at the end of this task. 3973 set_has_aborted(); 3974 } 3975 3976 // First drain any available SATB buffers. After this, we will not 3977 // look at SATB buffers before the next invocation of this method. 3978 // If enough completed SATB buffers are queued up, the regular clock 3979 // will abort this task so that it restarts. 3980 drain_satb_buffers(); 3981 // ...then partially drain the local queue and the global stack 3982 drain_local_queue(true); 3983 drain_global_stack(true); 3984 3985 do { 3986 if (!has_aborted() && _curr_region != NULL) { 3987 // This means that we're already holding on to a region. 3988 assert(_finger != NULL, "if region is not NULL, then the finger " 3989 "should not be NULL either"); 3990 3991 // We might have restarted this task after an evacuation pause 3992 // which might have evacuated the region we're holding on to 3993 // underneath our feet. Let's read its limit again to make sure 3994 // that we do not iterate over a region of the heap that 3995 // contains garbage (update_region_limit() will also move 3996 // _finger to the start of the region if it is found empty). 3997 update_region_limit(); 3998 // We will start from _finger not from the start of the region, 3999 // as we might be restarting this task after aborting half-way 4000 // through scanning this region. In this case, _finger points to 4001 // the address where we last found a marked object. If this is a 4002 // fresh region, _finger points to start(). 4003 MemRegion mr = MemRegion(_finger, _region_limit); 4004 4005 if (_cm->verbose_low()) { 4006 gclog_or_tty->print_cr("[%d] we're scanning part " 4007 "["PTR_FORMAT", "PTR_FORMAT") " 4008 "of region "PTR_FORMAT, 4009 _task_id, _finger, _region_limit, _curr_region); 4010 } 4011 4012 // Let's iterate over the bitmap of the part of the 4013 // region that is left. 4014 if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4015 // We successfully completed iterating over the region. Now, 4016 // let's give up the region. 4017 giveup_current_region(); 4018 regular_clock_call(); 4019 } else { 4020 assert(has_aborted(), "currently the only way to do so"); 4021 // The only way to abort the bitmap iteration is to return 4022 // false from the do_bit() method. However, inside the 4023 // do_bit() method we move the _finger to point to the 4024 // object currently being looked at. So, if we bail out, we 4025 // have definitely set _finger to something non-null. 4026 assert(_finger != NULL, "invariant"); 4027 4028 // Region iteration was actually aborted. So now _finger 4029 // points to the address of the object we last scanned. If we 4030 // leave it there, when we restart this task, we will rescan 4031 // the object. It is easy to avoid this. We move the finger by 4032 // enough to point to the next possible object header (the 4033 // bitmap knows by how much we need to move it as it knows its 4034 // granularity). 4035 assert(_finger < _region_limit, "invariant"); 4036 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 4037 // Check if bitmap iteration was aborted while scanning the last object 4038 if (new_finger >= _region_limit) { 4039 giveup_current_region(); 4040 } else { 4041 move_finger_to(new_finger); 4042 } 4043 } 4044 } 4045 // At this point we have either completed iterating over the 4046 // region we were holding on to, or we have aborted. 4047 4048 // We then partially drain the local queue and the global stack. 4049 // (Do we really need this?) 4050 drain_local_queue(true); 4051 drain_global_stack(true); 4052 4053 // Read the note on the claim_region() method on why it might 4054 // return NULL with potentially more regions available for 4055 // claiming and why we have to check out_of_regions() to determine 4056 // whether we're done or not. 4057 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4058 // We are going to try to claim a new region. We should have 4059 // given up on the previous one. 4060 // Separated the asserts so that we know which one fires. 4061 assert(_curr_region == NULL, "invariant"); 4062 assert(_finger == NULL, "invariant"); 4063 assert(_region_limit == NULL, "invariant"); 4064 if (_cm->verbose_low()) { 4065 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 4066 } 4067 HeapRegion* claimed_region = _cm->claim_region(_task_id); 4068 if (claimed_region != NULL) { 4069 // Yes, we managed to claim one 4070 statsOnly( ++_regions_claimed ); 4071 4072 if (_cm->verbose_low()) { 4073 gclog_or_tty->print_cr("[%d] we successfully claimed " 4074 "region "PTR_FORMAT, 4075 _task_id, claimed_region); 4076 } 4077 4078 setup_for_region(claimed_region); 4079 assert(_curr_region == claimed_region, "invariant"); 4080 } 4081 // It is important to call the regular clock here. It might take 4082 // a while to claim a region if, for example, we hit a large 4083 // block of empty regions. So we need to call the regular clock 4084 // method once round the loop to make sure it's called 4085 // frequently enough. 4086 regular_clock_call(); 4087 } 4088 4089 if (!has_aborted() && _curr_region == NULL) { 4090 assert(_cm->out_of_regions(), 4091 "at this point we should be out of regions"); 4092 } 4093 } while ( _curr_region != NULL && !has_aborted()); 4094 4095 if (!has_aborted()) { 4096 // We cannot check whether the global stack is empty, since other 4097 // tasks might be pushing objects to it concurrently. 4098 assert(_cm->out_of_regions(), 4099 "at this point we should be out of regions"); 4100 4101 if (_cm->verbose_low()) { 4102 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 4103 } 4104 4105 // Try to reduce the number of available SATB buffers so that 4106 // remark has less work to do. 4107 drain_satb_buffers(); 4108 } 4109 4110 // Since we've done everything else, we can now totally drain the 4111 // local queue and global stack. 4112 drain_local_queue(false); 4113 drain_global_stack(false); 4114 4115 // Attempt at work stealing from other task's queues. 4116 if (do_stealing && !has_aborted()) { 4117 // We have not aborted. This means that we have finished all that 4118 // we could. Let's try to do some stealing... 4119 4120 // We cannot check whether the global stack is empty, since other 4121 // tasks might be pushing objects to it concurrently. 4122 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4123 "only way to reach here"); 4124 4125 if (_cm->verbose_low()) { 4126 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 4127 } 4128 4129 while (!has_aborted()) { 4130 oop obj; 4131 statsOnly( ++_steal_attempts ); 4132 4133 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 4134 if (_cm->verbose_medium()) { 4135 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 4136 _task_id, (void*) obj); 4137 } 4138 4139 statsOnly( ++_steals ); 4140 4141 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4142 "any stolen object should be marked"); 4143 scan_object(obj); 4144 4145 // And since we're towards the end, let's totally drain the 4146 // local queue and global stack. 4147 drain_local_queue(false); 4148 drain_global_stack(false); 4149 } else { 4150 break; 4151 } 4152 } 4153 } 4154 4155 // If we are about to wrap up and go into termination, check if we 4156 // should raise the overflow flag. 4157 if (do_termination && !has_aborted()) { 4158 if (_cm->force_overflow()->should_force()) { 4159 _cm->set_has_overflown(); 4160 regular_clock_call(); 4161 } 4162 } 4163 4164 // We still haven't aborted. Now, let's try to get into the 4165 // termination protocol. 4166 if (do_termination && !has_aborted()) { 4167 // We cannot check whether the global stack is empty, since other 4168 // tasks might be concurrently pushing objects on it. 4169 // Separated the asserts so that we know which one fires. 4170 assert(_cm->out_of_regions(), "only way to reach here"); 4171 assert(_task_queue->size() == 0, "only way to reach here"); 4172 4173 if (_cm->verbose_low()) { 4174 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 4175 } 4176 4177 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4178 // The CMTask class also extends the TerminatorTerminator class, 4179 // hence its should_exit_termination() method will also decide 4180 // whether to exit the termination protocol or not. 4181 bool finished = _cm->terminator()->offer_termination(this); 4182 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4183 _termination_time_ms += 4184 termination_end_time_ms - _termination_start_time_ms; 4185 4186 if (finished) { 4187 // We're all done. 4188 4189 if (_task_id == 0) { 4190 // let's allow task 0 to do this 4191 if (concurrent()) { 4192 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4193 // we need to set this to false before the next 4194 // safepoint. This way we ensure that the marking phase 4195 // doesn't observe any more heap expansions. 4196 _cm->clear_concurrent_marking_in_progress(); 4197 } 4198 } 4199 4200 // We can now guarantee that the global stack is empty, since 4201 // all other tasks have finished. We separated the guarantees so 4202 // that, if a condition is false, we can immediately find out 4203 // which one. 4204 guarantee(_cm->out_of_regions(), "only way to reach here"); 4205 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4206 guarantee(_task_queue->size() == 0, "only way to reach here"); 4207 guarantee(!_cm->has_overflown(), "only way to reach here"); 4208 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4209 4210 if (_cm->verbose_low()) { 4211 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 4212 } 4213 } else { 4214 // Apparently there's more work to do. Let's abort this task. It 4215 // will restart it and we can hopefully find more things to do. 4216 4217 if (_cm->verbose_low()) { 4218 gclog_or_tty->print_cr("[%d] apparently there is more work to do", 4219 _task_id); 4220 } 4221 4222 set_has_aborted(); 4223 statsOnly( ++_aborted_termination ); 4224 } 4225 } 4226 4227 // Mainly for debugging purposes to make sure that a pointer to the 4228 // closure which was statically allocated in this frame doesn't 4229 // escape it by accident. 4230 set_cm_oop_closure(NULL); 4231 double end_time_ms = os::elapsedVTime() * 1000.0; 4232 double elapsed_time_ms = end_time_ms - _start_time_ms; 4233 // Update the step history. 4234 _step_times_ms.add(elapsed_time_ms); 4235 4236 if (has_aborted()) { 4237 // The task was aborted for some reason. 4238 4239 statsOnly( ++_aborted ); 4240 4241 if (_has_timed_out) { 4242 double diff_ms = elapsed_time_ms - _time_target_ms; 4243 // Keep statistics of how well we did with respect to hitting 4244 // our target only if we actually timed out (if we aborted for 4245 // other reasons, then the results might get skewed). 4246 _marking_step_diffs_ms.add(diff_ms); 4247 } 4248 4249 if (_cm->has_overflown()) { 4250 // This is the interesting one. We aborted because a global 4251 // overflow was raised. This means we have to restart the 4252 // marking phase and start iterating over regions. However, in 4253 // order to do this we have to make sure that all tasks stop 4254 // what they are doing and re-initialise in a safe manner. We 4255 // will achieve this with the use of two barrier sync points. 4256 4257 if (_cm->verbose_low()) { 4258 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 4259 } 4260 4261 _cm->enter_first_sync_barrier(_task_id); 4262 // When we exit this sync barrier we know that all tasks have 4263 // stopped doing marking work. So, it's now safe to 4264 // re-initialise our data structures. At the end of this method, 4265 // task 0 will clear the global data structures. 4266 4267 statsOnly( ++_aborted_overflow ); 4268 4269 // We clear the local state of this task... 4270 clear_region_fields(); 4271 4272 // ...and enter the second barrier. 4273 _cm->enter_second_sync_barrier(_task_id); 4274 // At this point everything has bee re-initialised and we're 4275 // ready to restart. 4276 } 4277 4278 if (_cm->verbose_low()) { 4279 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4280 "elapsed = %1.2lfms <<<<<<<<<<", 4281 _task_id, _time_target_ms, elapsed_time_ms); 4282 if (_cm->has_aborted()) { 4283 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 4284 _task_id); 4285 } 4286 } 4287 } else { 4288 if (_cm->verbose_low()) { 4289 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4290 "elapsed = %1.2lfms <<<<<<<<<<", 4291 _task_id, _time_target_ms, elapsed_time_ms); 4292 } 4293 } 4294 4295 _claimed = false; 4296 } 4297 4298 CMTask::CMTask(int task_id, 4299 ConcurrentMark* cm, 4300 size_t* marked_bytes, 4301 BitMap* card_bm, 4302 CMTaskQueue* task_queue, 4303 CMTaskQueueSet* task_queues) 4304 : _g1h(G1CollectedHeap::heap()), 4305 _task_id(task_id), _cm(cm), 4306 _claimed(false), 4307 _nextMarkBitMap(NULL), _hash_seed(17), 4308 _task_queue(task_queue), 4309 _task_queues(task_queues), 4310 _cm_oop_closure(NULL), 4311 _marked_bytes_array(marked_bytes), 4312 _card_bm(card_bm) { 4313 guarantee(task_queue != NULL, "invariant"); 4314 guarantee(task_queues != NULL, "invariant"); 4315 4316 statsOnly( _clock_due_to_scanning = 0; 4317 _clock_due_to_marking = 0 ); 4318 4319 _marking_step_diffs_ms.add(0.5); 4320 } 4321 4322 // These are formatting macros that are used below to ensure 4323 // consistent formatting. The *_H_* versions are used to format the 4324 // header for a particular value and they should be kept consistent 4325 // with the corresponding macro. Also note that most of the macros add 4326 // the necessary white space (as a prefix) which makes them a bit 4327 // easier to compose. 4328 4329 // All the output lines are prefixed with this string to be able to 4330 // identify them easily in a large log file. 4331 #define G1PPRL_LINE_PREFIX "###" 4332 4333 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4334 #ifdef _LP64 4335 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4336 #else // _LP64 4337 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4338 #endif // _LP64 4339 4340 // For per-region info 4341 #define G1PPRL_TYPE_FORMAT " %-4s" 4342 #define G1PPRL_TYPE_H_FORMAT " %4s" 4343 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4344 #define G1PPRL_BYTE_H_FORMAT " %9s" 4345 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4346 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4347 4348 // For summary info 4349 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4350 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4351 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4352 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4353 4354 G1PrintRegionLivenessInfoClosure:: 4355 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4356 : _out(out), 4357 _total_used_bytes(0), _total_capacity_bytes(0), 4358 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4359 _hum_used_bytes(0), _hum_capacity_bytes(0), 4360 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) { 4361 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4362 MemRegion g1_committed = g1h->g1_committed(); 4363 MemRegion g1_reserved = g1h->g1_reserved(); 4364 double now = os::elapsedTime(); 4365 4366 // Print the header of the output. 4367 _out->cr(); 4368 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4369 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4370 G1PPRL_SUM_ADDR_FORMAT("committed") 4371 G1PPRL_SUM_ADDR_FORMAT("reserved") 4372 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4373 g1_committed.start(), g1_committed.end(), 4374 g1_reserved.start(), g1_reserved.end(), 4375 HeapRegion::GrainBytes); 4376 _out->print_cr(G1PPRL_LINE_PREFIX); 4377 _out->print_cr(G1PPRL_LINE_PREFIX 4378 G1PPRL_TYPE_H_FORMAT 4379 G1PPRL_ADDR_BASE_H_FORMAT 4380 G1PPRL_BYTE_H_FORMAT 4381 G1PPRL_BYTE_H_FORMAT 4382 G1PPRL_BYTE_H_FORMAT 4383 G1PPRL_DOUBLE_H_FORMAT, 4384 "type", "address-range", 4385 "used", "prev-live", "next-live", "gc-eff"); 4386 _out->print_cr(G1PPRL_LINE_PREFIX 4387 G1PPRL_TYPE_H_FORMAT 4388 G1PPRL_ADDR_BASE_H_FORMAT 4389 G1PPRL_BYTE_H_FORMAT 4390 G1PPRL_BYTE_H_FORMAT 4391 G1PPRL_BYTE_H_FORMAT 4392 G1PPRL_DOUBLE_H_FORMAT, 4393 "", "", 4394 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)"); 4395 } 4396 4397 // It takes as a parameter a reference to one of the _hum_* fields, it 4398 // deduces the corresponding value for a region in a humongous region 4399 // series (either the region size, or what's left if the _hum_* field 4400 // is < the region size), and updates the _hum_* field accordingly. 4401 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4402 size_t bytes = 0; 4403 // The > 0 check is to deal with the prev and next live bytes which 4404 // could be 0. 4405 if (*hum_bytes > 0) { 4406 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4407 *hum_bytes -= bytes; 4408 } 4409 return bytes; 4410 } 4411 4412 // It deduces the values for a region in a humongous region series 4413 // from the _hum_* fields and updates those accordingly. It assumes 4414 // that that _hum_* fields have already been set up from the "starts 4415 // humongous" region and we visit the regions in address order. 4416 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4417 size_t* capacity_bytes, 4418 size_t* prev_live_bytes, 4419 size_t* next_live_bytes) { 4420 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4421 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4422 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4423 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4424 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4425 } 4426 4427 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4428 const char* type = ""; 4429 HeapWord* bottom = r->bottom(); 4430 HeapWord* end = r->end(); 4431 size_t capacity_bytes = r->capacity(); 4432 size_t used_bytes = r->used(); 4433 size_t prev_live_bytes = r->live_bytes(); 4434 size_t next_live_bytes = r->next_live_bytes(); 4435 double gc_eff = r->gc_efficiency(); 4436 if (r->used() == 0) { 4437 type = "FREE"; 4438 } else if (r->is_survivor()) { 4439 type = "SURV"; 4440 } else if (r->is_young()) { 4441 type = "EDEN"; 4442 } else if (r->startsHumongous()) { 4443 type = "HUMS"; 4444 4445 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4446 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4447 "they should have been zeroed after the last time we used them"); 4448 // Set up the _hum_* fields. 4449 _hum_capacity_bytes = capacity_bytes; 4450 _hum_used_bytes = used_bytes; 4451 _hum_prev_live_bytes = prev_live_bytes; 4452 _hum_next_live_bytes = next_live_bytes; 4453 get_hum_bytes(&used_bytes, &capacity_bytes, 4454 &prev_live_bytes, &next_live_bytes); 4455 end = bottom + HeapRegion::GrainWords; 4456 } else if (r->continuesHumongous()) { 4457 type = "HUMC"; 4458 get_hum_bytes(&used_bytes, &capacity_bytes, 4459 &prev_live_bytes, &next_live_bytes); 4460 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4461 } else { 4462 type = "OLD"; 4463 } 4464 4465 _total_used_bytes += used_bytes; 4466 _total_capacity_bytes += capacity_bytes; 4467 _total_prev_live_bytes += prev_live_bytes; 4468 _total_next_live_bytes += next_live_bytes; 4469 4470 // Print a line for this particular region. 4471 _out->print_cr(G1PPRL_LINE_PREFIX 4472 G1PPRL_TYPE_FORMAT 4473 G1PPRL_ADDR_BASE_FORMAT 4474 G1PPRL_BYTE_FORMAT 4475 G1PPRL_BYTE_FORMAT 4476 G1PPRL_BYTE_FORMAT 4477 G1PPRL_DOUBLE_FORMAT, 4478 type, bottom, end, 4479 used_bytes, prev_live_bytes, next_live_bytes, gc_eff); 4480 4481 return false; 4482 } 4483 4484 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4485 // Print the footer of the output. 4486 _out->print_cr(G1PPRL_LINE_PREFIX); 4487 _out->print_cr(G1PPRL_LINE_PREFIX 4488 " SUMMARY" 4489 G1PPRL_SUM_MB_FORMAT("capacity") 4490 G1PPRL_SUM_MB_PERC_FORMAT("used") 4491 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4492 G1PPRL_SUM_MB_PERC_FORMAT("next-live"), 4493 bytes_to_mb(_total_capacity_bytes), 4494 bytes_to_mb(_total_used_bytes), 4495 perc(_total_used_bytes, _total_capacity_bytes), 4496 bytes_to_mb(_total_prev_live_bytes), 4497 perc(_total_prev_live_bytes, _total_capacity_bytes), 4498 bytes_to_mb(_total_next_live_bytes), 4499 perc(_total_next_live_bytes, _total_capacity_bytes)); 4500 _out->cr(); 4501 }