1 /* 2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 32 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 33 #include "gc_implementation/g1/g1RemSet.hpp" 34 #include "gc_implementation/g1/heapRegionRemSet.hpp" 35 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 36 #include "gc_implementation/shared/vmGCOperations.hpp" 37 #include "memory/genOopClosures.inline.hpp" 38 #include "memory/referencePolicy.hpp" 39 #include "memory/resourceArea.hpp" 40 #include "oops/oop.inline.hpp" 41 #include "runtime/handles.inline.hpp" 42 #include "runtime/java.hpp" 43 44 // 45 // CMS Bit Map Wrapper 46 47 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) : 48 _bm((uintptr_t*)NULL,0), 49 _shifter(shifter) { 50 _bmStartWord = (HeapWord*)(rs.base()); 51 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 52 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 53 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 54 55 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map"); 56 // For now we'll just commit all of the bit map up fromt. 57 // Later on we'll try to be more parsimonious with swap. 58 guarantee(_virtual_space.initialize(brs, brs.size()), 59 "couldn't reseve backing store for CMS bit map"); 60 assert(_virtual_space.committed_size() == brs.size(), 61 "didn't reserve backing store for all of CMS bit map?"); 62 _bm.set_map((uintptr_t*)_virtual_space.low()); 63 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 64 _bmWordSize, "inconsistency in bit map sizing"); 65 _bm.set_size(_bmWordSize >> _shifter); 66 } 67 68 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 69 HeapWord* limit) const { 70 // First we must round addr *up* to a possible object boundary. 71 addr = (HeapWord*)align_size_up((intptr_t)addr, 72 HeapWordSize << _shifter); 73 size_t addrOffset = heapWordToOffset(addr); 74 if (limit == NULL) { 75 limit = _bmStartWord + _bmWordSize; 76 } 77 size_t limitOffset = heapWordToOffset(limit); 78 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 79 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 80 assert(nextAddr >= addr, "get_next_one postcondition"); 81 assert(nextAddr == limit || isMarked(nextAddr), 82 "get_next_one postcondition"); 83 return nextAddr; 84 } 85 86 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 87 HeapWord* limit) const { 88 size_t addrOffset = heapWordToOffset(addr); 89 if (limit == NULL) { 90 limit = _bmStartWord + _bmWordSize; 91 } 92 size_t limitOffset = heapWordToOffset(limit); 93 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 94 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 95 assert(nextAddr >= addr, "get_next_one postcondition"); 96 assert(nextAddr == limit || !isMarked(nextAddr), 97 "get_next_one postcondition"); 98 return nextAddr; 99 } 100 101 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 102 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 103 return (int) (diff >> _shifter); 104 } 105 106 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) { 107 HeapWord* left = MAX2(_bmStartWord, mr.start()); 108 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end()); 109 if (right > left) { 110 // Right-open interval [leftOffset, rightOffset). 111 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right)); 112 } else { 113 return true; 114 } 115 } 116 117 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap, 118 size_t from_start_index, 119 HeapWord* to_start_word, 120 size_t word_num) { 121 _bm.mostly_disjoint_range_union(from_bitmap, 122 from_start_index, 123 heapWordToOffset(to_start_word), 124 word_num); 125 } 126 127 #ifndef PRODUCT 128 bool CMBitMapRO::covers(ReservedSpace rs) const { 129 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 130 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize, 131 "size inconsistency"); 132 return _bmStartWord == (HeapWord*)(rs.base()) && 133 _bmWordSize == rs.size()>>LogHeapWordSize; 134 } 135 #endif 136 137 void CMBitMap::clearAll() { 138 _bm.clear(); 139 return; 140 } 141 142 void CMBitMap::markRange(MemRegion mr) { 143 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 144 assert(!mr.is_empty(), "unexpected empty region"); 145 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 146 ((HeapWord *) mr.end())), 147 "markRange memory region end is not card aligned"); 148 // convert address range into offset range 149 _bm.at_put_range(heapWordToOffset(mr.start()), 150 heapWordToOffset(mr.end()), true); 151 } 152 153 void CMBitMap::clearRange(MemRegion mr) { 154 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 155 assert(!mr.is_empty(), "unexpected empty region"); 156 // convert address range into offset range 157 _bm.at_put_range(heapWordToOffset(mr.start()), 158 heapWordToOffset(mr.end()), false); 159 } 160 161 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 162 HeapWord* end_addr) { 163 HeapWord* start = getNextMarkedWordAddress(addr); 164 start = MIN2(start, end_addr); 165 HeapWord* end = getNextUnmarkedWordAddress(start); 166 end = MIN2(end, end_addr); 167 assert(start <= end, "Consistency check"); 168 MemRegion mr(start, end); 169 if (!mr.is_empty()) { 170 clearRange(mr); 171 } 172 return mr; 173 } 174 175 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 176 _base(NULL), _cm(cm) 177 #ifdef ASSERT 178 , _drain_in_progress(false) 179 , _drain_in_progress_yields(false) 180 #endif 181 {} 182 183 void CMMarkStack::allocate(size_t size) { 184 _base = NEW_C_HEAP_ARRAY(oop, size); 185 if (_base == NULL) { 186 vm_exit_during_initialization("Failed to allocate " 187 "CM region mark stack"); 188 } 189 _index = 0; 190 _capacity = (jint) size; 191 _oops_do_bound = -1; 192 NOT_PRODUCT(_max_depth = 0); 193 } 194 195 CMMarkStack::~CMMarkStack() { 196 if (_base != NULL) { 197 FREE_C_HEAP_ARRAY(oop, _base); 198 } 199 } 200 201 void CMMarkStack::par_push(oop ptr) { 202 while (true) { 203 if (isFull()) { 204 _overflow = true; 205 return; 206 } 207 // Otherwise... 208 jint index = _index; 209 jint next_index = index+1; 210 jint res = Atomic::cmpxchg(next_index, &_index, index); 211 if (res == index) { 212 _base[index] = ptr; 213 // Note that we don't maintain this atomically. We could, but it 214 // doesn't seem necessary. 215 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 216 return; 217 } 218 // Otherwise, we need to try again. 219 } 220 } 221 222 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 223 while (true) { 224 if (isFull()) { 225 _overflow = true; 226 return; 227 } 228 // Otherwise... 229 jint index = _index; 230 jint next_index = index + n; 231 if (next_index > _capacity) { 232 _overflow = true; 233 return; 234 } 235 jint res = Atomic::cmpxchg(next_index, &_index, index); 236 if (res == index) { 237 for (int i = 0; i < n; i++) { 238 int ind = index + i; 239 assert(ind < _capacity, "By overflow test above."); 240 _base[ind] = ptr_arr[i]; 241 } 242 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 243 return; 244 } 245 // Otherwise, we need to try again. 246 } 247 } 248 249 250 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 251 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 252 jint start = _index; 253 jint next_index = start + n; 254 if (next_index > _capacity) { 255 _overflow = true; 256 return; 257 } 258 // Otherwise. 259 _index = next_index; 260 for (int i = 0; i < n; i++) { 261 int ind = start + i; 262 assert(ind < _capacity, "By overflow test above."); 263 _base[ind] = ptr_arr[i]; 264 } 265 } 266 267 268 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 269 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 270 jint index = _index; 271 if (index == 0) { 272 *n = 0; 273 return false; 274 } else { 275 int k = MIN2(max, index); 276 jint new_ind = index - k; 277 for (int j = 0; j < k; j++) { 278 ptr_arr[j] = _base[new_ind + j]; 279 } 280 _index = new_ind; 281 *n = k; 282 return true; 283 } 284 } 285 286 287 CMRegionStack::CMRegionStack() : _base(NULL) {} 288 289 void CMRegionStack::allocate(size_t size) { 290 _base = NEW_C_HEAP_ARRAY(MemRegion, size); 291 if (_base == NULL) { 292 vm_exit_during_initialization("Failed to allocate CM region mark stack"); 293 } 294 _index = 0; 295 _capacity = (jint) size; 296 } 297 298 CMRegionStack::~CMRegionStack() { 299 if (_base != NULL) { 300 FREE_C_HEAP_ARRAY(oop, _base); 301 } 302 } 303 304 void CMRegionStack::push_lock_free(MemRegion mr) { 305 assert(mr.word_size() > 0, "Precondition"); 306 while (true) { 307 jint index = _index; 308 309 if (index >= _capacity) { 310 _overflow = true; 311 return; 312 } 313 // Otherwise... 314 jint next_index = index+1; 315 jint res = Atomic::cmpxchg(next_index, &_index, index); 316 if (res == index) { 317 _base[index] = mr; 318 return; 319 } 320 // Otherwise, we need to try again. 321 } 322 } 323 324 // Lock-free pop of the region stack. Called during the concurrent 325 // marking / remark phases. Should only be called in tandem with 326 // other lock-free pops. 327 MemRegion CMRegionStack::pop_lock_free() { 328 while (true) { 329 jint index = _index; 330 331 if (index == 0) { 332 return MemRegion(); 333 } 334 // Otherwise... 335 jint next_index = index-1; 336 jint res = Atomic::cmpxchg(next_index, &_index, index); 337 if (res == index) { 338 MemRegion mr = _base[next_index]; 339 if (mr.start() != NULL) { 340 assert(mr.end() != NULL, "invariant"); 341 assert(mr.word_size() > 0, "invariant"); 342 return mr; 343 } else { 344 // that entry was invalidated... let's skip it 345 assert(mr.end() == NULL, "invariant"); 346 } 347 } 348 // Otherwise, we need to try again. 349 } 350 } 351 352 #if 0 353 // The routines that manipulate the region stack with a lock are 354 // not currently used. They should be retained, however, as a 355 // diagnostic aid. 356 357 void CMRegionStack::push_with_lock(MemRegion mr) { 358 assert(mr.word_size() > 0, "Precondition"); 359 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 360 361 if (isFull()) { 362 _overflow = true; 363 return; 364 } 365 366 _base[_index] = mr; 367 _index += 1; 368 } 369 370 MemRegion CMRegionStack::pop_with_lock() { 371 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 372 373 while (true) { 374 if (_index == 0) { 375 return MemRegion(); 376 } 377 _index -= 1; 378 379 MemRegion mr = _base[_index]; 380 if (mr.start() != NULL) { 381 assert(mr.end() != NULL, "invariant"); 382 assert(mr.word_size() > 0, "invariant"); 383 return mr; 384 } else { 385 // that entry was invalidated... let's skip it 386 assert(mr.end() == NULL, "invariant"); 387 } 388 } 389 } 390 #endif 391 392 bool CMRegionStack::invalidate_entries_into_cset() { 393 bool result = false; 394 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 395 for (int i = 0; i < _oops_do_bound; ++i) { 396 MemRegion mr = _base[i]; 397 if (mr.start() != NULL) { 398 assert(mr.end() != NULL, "invariant"); 399 assert(mr.word_size() > 0, "invariant"); 400 HeapRegion* hr = g1h->heap_region_containing(mr.start()); 401 assert(hr != NULL, "invariant"); 402 if (hr->in_collection_set()) { 403 // The region points into the collection set 404 _base[i] = MemRegion(); 405 result = true; 406 } 407 } else { 408 // that entry was invalidated... let's skip it 409 assert(mr.end() == NULL, "invariant"); 410 } 411 } 412 return result; 413 } 414 415 template<class OopClosureClass> 416 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 417 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 418 || SafepointSynchronize::is_at_safepoint(), 419 "Drain recursion must be yield-safe."); 420 bool res = true; 421 debug_only(_drain_in_progress = true); 422 debug_only(_drain_in_progress_yields = yield_after); 423 while (!isEmpty()) { 424 oop newOop = pop(); 425 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 426 assert(newOop->is_oop(), "Expected an oop"); 427 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 428 "only grey objects on this stack"); 429 // iterate over the oops in this oop, marking and pushing 430 // the ones in CMS generation. 431 newOop->oop_iterate(cl); 432 if (yield_after && _cm->do_yield_check()) { 433 res = false; 434 break; 435 } 436 } 437 debug_only(_drain_in_progress = false); 438 return res; 439 } 440 441 void CMMarkStack::oops_do(OopClosure* f) { 442 if (_index == 0) return; 443 assert(_oops_do_bound != -1 && _oops_do_bound <= _index, 444 "Bound must be set."); 445 for (int i = 0; i < _oops_do_bound; i++) { 446 f->do_oop(&_base[i]); 447 } 448 _oops_do_bound = -1; 449 } 450 451 bool ConcurrentMark::not_yet_marked(oop obj) const { 452 return (_g1h->is_obj_ill(obj) 453 || (_g1h->is_in_permanent(obj) 454 && !nextMarkBitMap()->isMarked((HeapWord*)obj))); 455 } 456 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 459 #endif // _MSC_VER 460 461 size_t ConcurrentMark::scale_parallel_threads(size_t n_par_threads) { 462 return MAX2((n_par_threads + 2) / 4, (size_t)1); 463 } 464 465 ConcurrentMark::ConcurrentMark(ReservedSpace rs, 466 int max_regions) : 467 _markBitMap1(rs, MinObjAlignment - 1), 468 _markBitMap2(rs, MinObjAlignment - 1), 469 470 _parallel_marking_threads(0), 471 _max_parallel_marking_threads(0), 472 _sleep_factor(0.0), 473 _marking_task_overhead(1.0), 474 _cleanup_sleep_factor(0.0), 475 _cleanup_task_overhead(1.0), 476 _cleanup_list("Cleanup List"), 477 _region_bm(max_regions, false /* in_resource_area*/), 478 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 479 CardTableModRefBS::card_shift, 480 false /* in_resource_area*/), 481 482 _prevMarkBitMap(&_markBitMap1), 483 _nextMarkBitMap(&_markBitMap2), 484 _at_least_one_mark_complete(false), 485 486 _markStack(this), 487 _regionStack(), 488 // _finger set in set_non_marking_state 489 490 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)), 491 // _active_tasks set in set_non_marking_state 492 // _tasks set inside the constructor 493 _task_queues(new CMTaskQueueSet((int) _max_task_num)), 494 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 495 496 _has_overflown(false), 497 _concurrent(false), 498 _has_aborted(false), 499 _restart_for_overflow(false), 500 _concurrent_marking_in_progress(false), 501 _should_gray_objects(false), 502 503 // _verbose_level set below 504 505 _init_times(), 506 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 507 _cleanup_times(), 508 _total_counting_time(0.0), 509 _total_rs_scrub_time(0.0), 510 511 _parallel_workers(NULL), 512 513 _count_card_bitmaps(NULL), 514 _count_marked_bytes(NULL) 515 { 516 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 517 if (verbose_level < no_verbose) { 518 verbose_level = no_verbose; 519 } 520 if (verbose_level > high_verbose) { 521 verbose_level = high_verbose; 522 } 523 _verbose_level = verbose_level; 524 525 if (verbose_low()) { 526 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 527 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 528 } 529 530 _markStack.allocate(MarkStackSize); 531 _regionStack.allocate(G1MarkRegionStackSize); 532 533 // Create & start a ConcurrentMark thread. 534 _cmThread = new ConcurrentMarkThread(this); 535 assert(cmThread() != NULL, "CM Thread should have been created"); 536 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 537 538 _g1h = G1CollectedHeap::heap(); 539 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 540 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 541 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 542 543 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 544 satb_qs.set_buffer_size(G1SATBBufferSize); 545 546 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num); 547 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num); 548 549 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_task_num); 550 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num); 551 552 BitMap::idx_t card_bm_size = _card_bm.size(); 553 554 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 555 _active_tasks = _max_task_num; 556 for (int i = 0; i < (int) _max_task_num; ++i) { 557 CMTaskQueue* task_queue = new CMTaskQueue(); 558 task_queue->initialize(); 559 _task_queues->register_queue(i, task_queue); 560 561 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 562 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions); 563 564 _tasks[i] = new CMTask(i, this, 565 _count_marked_bytes[i], 566 &_count_card_bitmaps[i], 567 task_queue, _task_queues); 568 569 _accum_task_vtime[i] = 0.0; 570 } 571 572 // Calculate the card number for the bottom of the heap. Used 573 // in biasing indexes into the accounting card bitmaps. 574 _heap_bottom_card_num = 575 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 576 CardTableModRefBS::card_shift); 577 578 579 if (ConcGCThreads > ParallelGCThreads) { 580 vm_exit_during_initialization("Can't have more ConcGCThreads " 581 "than ParallelGCThreads."); 582 } 583 if (ParallelGCThreads == 0) { 584 // if we are not running with any parallel GC threads we will not 585 // spawn any marking threads either 586 _parallel_marking_threads = 0; 587 _max_parallel_marking_threads = 0; 588 _sleep_factor = 0.0; 589 _marking_task_overhead = 1.0; 590 } else { 591 if (ConcGCThreads > 0) { 592 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 593 // if both are set 594 595 _parallel_marking_threads = ConcGCThreads; 596 _max_parallel_marking_threads = _parallel_marking_threads; 597 _sleep_factor = 0.0; 598 _marking_task_overhead = 1.0; 599 } else if (G1MarkingOverheadPercent > 0) { 600 // we will calculate the number of parallel marking threads 601 // based on a target overhead with respect to the soft real-time 602 // goal 603 604 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 605 double overall_cm_overhead = 606 (double) MaxGCPauseMillis * marking_overhead / 607 (double) GCPauseIntervalMillis; 608 double cpu_ratio = 1.0 / (double) os::processor_count(); 609 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 610 double marking_task_overhead = 611 overall_cm_overhead / marking_thread_num * 612 (double) os::processor_count(); 613 double sleep_factor = 614 (1.0 - marking_task_overhead) / marking_task_overhead; 615 616 _parallel_marking_threads = (size_t) marking_thread_num; 617 _max_parallel_marking_threads = _parallel_marking_threads; 618 _sleep_factor = sleep_factor; 619 _marking_task_overhead = marking_task_overhead; 620 } else { 621 _parallel_marking_threads = scale_parallel_threads(ParallelGCThreads); 622 _max_parallel_marking_threads = _parallel_marking_threads; 623 _sleep_factor = 0.0; 624 _marking_task_overhead = 1.0; 625 } 626 627 if (parallel_marking_threads() > 1) { 628 _cleanup_task_overhead = 1.0; 629 } else { 630 _cleanup_task_overhead = marking_task_overhead(); 631 } 632 _cleanup_sleep_factor = 633 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 634 635 #if 0 636 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 637 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 638 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 639 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 640 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 641 #endif 642 643 guarantee(parallel_marking_threads() > 0, "peace of mind"); 644 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 645 (int) _max_parallel_marking_threads, false, true); 646 if (_parallel_workers == NULL) { 647 vm_exit_during_initialization("Failed necessary allocation."); 648 } else { 649 _parallel_workers->initialize_workers(); 650 } 651 } 652 653 // so that the call below can read a sensible value 654 _heap_start = (HeapWord*) rs.base(); 655 set_non_marking_state(); 656 } 657 658 void ConcurrentMark::update_g1_committed(bool force) { 659 // If concurrent marking is not in progress, then we do not need to 660 // update _heap_end. This has a subtle and important 661 // side-effect. Imagine that two evacuation pauses happen between 662 // marking completion and remark. The first one can grow the 663 // heap (hence now the finger is below the heap end). Then, the 664 // second one could unnecessarily push regions on the region 665 // stack. This causes the invariant that the region stack is empty 666 // at the beginning of remark to be false. By ensuring that we do 667 // not observe heap expansions after marking is complete, then we do 668 // not have this problem. 669 if (!concurrent_marking_in_progress() && !force) return; 670 671 MemRegion committed = _g1h->g1_committed(); 672 assert(committed.start() == _heap_start, "start shouldn't change"); 673 HeapWord* new_end = committed.end(); 674 if (new_end > _heap_end) { 675 // The heap has been expanded. 676 677 _heap_end = new_end; 678 } 679 // Notice that the heap can also shrink. However, this only happens 680 // during a Full GC (at least currently) and the entire marking 681 // phase will bail out and the task will not be restarted. So, let's 682 // do nothing. 683 } 684 685 void ConcurrentMark::reset() { 686 // Starting values for these two. This should be called in a STW 687 // phase. CM will be notified of any future g1_committed expansions 688 // will be at the end of evacuation pauses, when tasks are 689 // inactive. 690 MemRegion committed = _g1h->g1_committed(); 691 _heap_start = committed.start(); 692 _heap_end = committed.end(); 693 694 // Separated the asserts so that we know which one fires. 695 assert(_heap_start != NULL, "heap bounds should look ok"); 696 assert(_heap_end != NULL, "heap bounds should look ok"); 697 assert(_heap_start < _heap_end, "heap bounds should look ok"); 698 699 // reset all the marking data structures and any necessary flags 700 clear_marking_state(); 701 702 clear_all_count_data(); 703 704 if (verbose_low()) { 705 gclog_or_tty->print_cr("[global] resetting"); 706 } 707 708 // We do reset all of them, since different phases will use 709 // different number of active threads. So, it's easiest to have all 710 // of them ready. 711 for (int i = 0; i < (int) _max_task_num; ++i) { 712 _tasks[i]->reset(_nextMarkBitMap); 713 } 714 715 // we need this to make sure that the flag is on during the evac 716 // pause with initial mark piggy-backed 717 set_concurrent_marking_in_progress(); 718 } 719 720 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) { 721 assert(active_tasks <= _max_task_num, "we should not have more"); 722 723 _active_tasks = active_tasks; 724 // Need to update the three data structures below according to the 725 // number of active threads for this phase. 726 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 727 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 728 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 729 730 _concurrent = concurrent; 731 // We propagate this to all tasks, not just the active ones. 732 for (int i = 0; i < (int) _max_task_num; ++i) 733 _tasks[i]->set_concurrent(concurrent); 734 735 if (concurrent) { 736 set_concurrent_marking_in_progress(); 737 } else { 738 // We currently assume that the concurrent flag has been set to 739 // false before we start remark. At this point we should also be 740 // in a STW phase. 741 assert(!concurrent_marking_in_progress(), "invariant"); 742 assert(_finger == _heap_end, "only way to get here"); 743 update_g1_committed(true); 744 } 745 } 746 747 void ConcurrentMark::set_non_marking_state() { 748 // We set the global marking state to some default values when we're 749 // not doing marking. 750 clear_marking_state(); 751 _active_tasks = 0; 752 clear_concurrent_marking_in_progress(); 753 } 754 755 // This closure is used to mark refs into the g1 generation 756 // from external roots in the CMS bit map. 757 // Called at the first checkpoint. 758 // 759 760 void ConcurrentMark::clearNextBitmap() { 761 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 762 G1CollectorPolicy* g1p = g1h->g1_policy(); 763 764 // Make sure that the concurrent mark thread looks to still be in 765 // the current cycle. 766 guarantee(cmThread()->during_cycle(), "invariant"); 767 768 // We are finishing up the current cycle by clearing the next 769 // marking bitmap and getting it ready for the next cycle. During 770 // this time no other cycle can start. So, let's make sure that this 771 // is the case. 772 guarantee(!g1h->mark_in_progress(), "invariant"); 773 774 // clear the mark bitmap (no grey objects to start with). 775 // We need to do this in chunks and offer to yield in between 776 // each chunk. 777 HeapWord* start = _nextMarkBitMap->startWord(); 778 HeapWord* end = _nextMarkBitMap->endWord(); 779 HeapWord* cur = start; 780 size_t chunkSize = M; 781 while (cur < end) { 782 HeapWord* next = cur + chunkSize; 783 if (next > end) { 784 next = end; 785 } 786 MemRegion mr(cur,next); 787 _nextMarkBitMap->clearRange(mr); 788 cur = next; 789 do_yield_check(); 790 791 // Repeat the asserts from above. We'll do them as asserts here to 792 // minimize their overhead on the product. However, we'll have 793 // them as guarantees at the beginning / end of the bitmap 794 // clearing to get some checking in the product. 795 assert(cmThread()->during_cycle(), "invariant"); 796 assert(!g1h->mark_in_progress(), "invariant"); 797 } 798 799 // Repeat the asserts from above. 800 guarantee(cmThread()->during_cycle(), "invariant"); 801 guarantee(!g1h->mark_in_progress(), "invariant"); 802 } 803 804 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 805 public: 806 bool doHeapRegion(HeapRegion* r) { 807 if (!r->continuesHumongous()) { 808 r->note_start_of_marking(true); 809 } 810 return false; 811 } 812 }; 813 814 void ConcurrentMark::checkpointRootsInitialPre() { 815 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 816 G1CollectorPolicy* g1p = g1h->g1_policy(); 817 818 _has_aborted = false; 819 820 #ifndef PRODUCT 821 if (G1PrintReachableAtInitialMark) { 822 print_reachable("at-cycle-start", 823 VerifyOption_G1UsePrevMarking, true /* all */); 824 } 825 #endif 826 827 // Initialise marking structures. This has to be done in a STW phase. 828 reset(); 829 } 830 831 832 void ConcurrentMark::checkpointRootsInitialPost() { 833 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 834 835 // If we force an overflow during remark, the remark operation will 836 // actually abort and we'll restart concurrent marking. If we always 837 // force an oveflow during remark we'll never actually complete the 838 // marking phase. So, we initilize this here, at the start of the 839 // cycle, so that at the remaining overflow number will decrease at 840 // every remark and we'll eventually not need to cause one. 841 force_overflow_stw()->init(); 842 843 // For each region note start of marking. 844 NoteStartOfMarkHRClosure startcl; 845 g1h->heap_region_iterate(&startcl); 846 847 // Start Concurrent Marking weak-reference discovery. 848 ReferenceProcessor* rp = g1h->ref_processor_cm(); 849 // enable ("weak") refs discovery 850 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 851 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 852 853 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 854 // This is the start of the marking cycle, we're expected all 855 // threads to have SATB queues with active set to false. 856 satb_mq_set.set_active_all_threads(true, /* new active value */ 857 false /* expected_active */); 858 859 // update_g1_committed() will be called at the end of an evac pause 860 // when marking is on. So, it's also called at the end of the 861 // initial-mark pause to update the heap end, if the heap expands 862 // during it. No need to call it here. 863 } 864 865 /* 866 * Notice that in the next two methods, we actually leave the STS 867 * during the barrier sync and join it immediately afterwards. If we 868 * do not do this, the following deadlock can occur: one thread could 869 * be in the barrier sync code, waiting for the other thread to also 870 * sync up, whereas another one could be trying to yield, while also 871 * waiting for the other threads to sync up too. 872 * 873 * Note, however, that this code is also used during remark and in 874 * this case we should not attempt to leave / enter the STS, otherwise 875 * we'll either hit an asseert (debug / fastdebug) or deadlock 876 * (product). So we should only leave / enter the STS if we are 877 * operating concurrently. 878 * 879 * Because the thread that does the sync barrier has left the STS, it 880 * is possible to be suspended for a Full GC or an evacuation pause 881 * could occur. This is actually safe, since the entering the sync 882 * barrier is one of the last things do_marking_step() does, and it 883 * doesn't manipulate any data structures afterwards. 884 */ 885 886 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 887 if (verbose_low()) { 888 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 889 } 890 891 if (concurrent()) { 892 ConcurrentGCThread::stsLeave(); 893 } 894 _first_overflow_barrier_sync.enter(); 895 if (concurrent()) { 896 ConcurrentGCThread::stsJoin(); 897 } 898 // at this point everyone should have synced up and not be doing any 899 // more work 900 901 if (verbose_low()) { 902 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 903 } 904 905 // let task 0 do this 906 if (task_num == 0) { 907 // task 0 is responsible for clearing the global data structures 908 // We should be here because of an overflow. During STW we should 909 // not clear the overflow flag since we rely on it being true when 910 // we exit this method to abort the pause and restart concurent 911 // marking. 912 clear_marking_state(concurrent() /* clear_overflow */); 913 force_overflow()->update(); 914 915 if (PrintGC) { 916 gclog_or_tty->date_stamp(PrintGCDateStamps); 917 gclog_or_tty->stamp(PrintGCTimeStamps); 918 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 919 } 920 } 921 922 // after this, each task should reset its own data structures then 923 // then go into the second barrier 924 } 925 926 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 927 if (verbose_low()) { 928 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 929 } 930 931 if (concurrent()) { 932 ConcurrentGCThread::stsLeave(); 933 } 934 _second_overflow_barrier_sync.enter(); 935 if (concurrent()) { 936 ConcurrentGCThread::stsJoin(); 937 } 938 // at this point everything should be re-initialised and ready to go 939 940 if (verbose_low()) { 941 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 942 } 943 } 944 945 #ifndef PRODUCT 946 void ForceOverflowSettings::init() { 947 _num_remaining = G1ConcMarkForceOverflow; 948 _force = false; 949 update(); 950 } 951 952 void ForceOverflowSettings::update() { 953 if (_num_remaining > 0) { 954 _num_remaining -= 1; 955 _force = true; 956 } else { 957 _force = false; 958 } 959 } 960 961 bool ForceOverflowSettings::should_force() { 962 if (_force) { 963 _force = false; 964 return true; 965 } else { 966 return false; 967 } 968 } 969 #endif // !PRODUCT 970 971 void ConcurrentMark::grayRoot(oop p, int worker_i) { 972 HeapWord* addr = (HeapWord*) p; 973 // We can't really check against _heap_start and _heap_end, since it 974 // is possible during an evacuation pause with piggy-backed 975 // initial-mark that the committed space is expanded during the 976 // pause without CM observing this change. So the assertions below 977 // is a bit conservative; but better than nothing. 978 assert(_g1h->g1_committed().contains(addr), 979 "address should be within the heap bounds"); 980 981 if (!_nextMarkBitMap->isMarked(addr)) { 982 par_mark_and_count(p, worker_i); 983 } 984 } 985 986 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) { 987 // The objects on the region have already been marked "in bulk" by 988 // the caller. We only need to decide whether to push the region on 989 // the region stack or not. 990 991 if (!concurrent_marking_in_progress() || !_should_gray_objects) { 992 // We're done with marking and waiting for remark. We do not need to 993 // push anything else on the region stack. 994 return; 995 } 996 997 HeapWord* finger = _finger; 998 999 if (verbose_low()) { 1000 gclog_or_tty->print_cr("[global] attempting to push " 1001 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at " 1002 PTR_FORMAT, mr.start(), mr.end(), finger); 1003 } 1004 1005 if (mr.start() < finger) { 1006 // The finger is always heap region aligned and it is not possible 1007 // for mr to span heap regions. 1008 assert(mr.end() <= finger, "invariant"); 1009 1010 // Separated the asserts so that we know which one fires. 1011 assert(mr.start() <= mr.end(), 1012 "region boundaries should fall within the committed space"); 1013 assert(_heap_start <= mr.start(), 1014 "region boundaries should fall within the committed space"); 1015 assert(mr.end() <= _heap_end, 1016 "region boundaries should fall within the committed space"); 1017 if (verbose_low()) { 1018 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") " 1019 "below the finger, pushing it", 1020 mr.start(), mr.end()); 1021 } 1022 1023 if (!region_stack_push_lock_free(mr)) { 1024 if (verbose_low()) { 1025 gclog_or_tty->print_cr("[global] region stack has overflown."); 1026 } 1027 } 1028 } 1029 } 1030 1031 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p, int worker_i) { 1032 // The object is not marked by the caller. We need to at least mark 1033 // it and maybe push in on the stack. 1034 1035 HeapWord* addr = (HeapWord*)p; 1036 if (!_nextMarkBitMap->isMarked(addr)) { 1037 // We definitely need to mark it, irrespective whether we bail out 1038 // because we're done with marking. 1039 1040 if (par_mark_and_count(p, worker_i)) { 1041 if (!concurrent_marking_in_progress() || !_should_gray_objects) { 1042 // If we're done with concurrent marking and we're waiting for 1043 // remark, then we're not pushing anything on the stack. 1044 return; 1045 } 1046 1047 // No OrderAccess:store_load() is needed. It is implicit in the 1048 // CAS done in parMark(addr) above 1049 HeapWord* finger = _finger; 1050 1051 if (addr < finger) { 1052 if (!mark_stack_push(oop(addr))) { 1053 if (verbose_low()) { 1054 gclog_or_tty->print_cr("[global] global stack overflow " 1055 "during parMark"); 1056 } 1057 } 1058 } 1059 } 1060 } 1061 } 1062 1063 class CMConcurrentMarkingTask: public AbstractGangTask { 1064 private: 1065 ConcurrentMark* _cm; 1066 ConcurrentMarkThread* _cmt; 1067 1068 public: 1069 void work(int worker_i) { 1070 assert(Thread::current()->is_ConcurrentGC_thread(), 1071 "this should only be done by a conc GC thread"); 1072 ResourceMark rm; 1073 1074 double start_vtime = os::elapsedVTime(); 1075 1076 ConcurrentGCThread::stsJoin(); 1077 1078 assert((size_t) worker_i < _cm->active_tasks(), "invariant"); 1079 CMTask* the_task = _cm->task(worker_i); 1080 the_task->record_start_time(); 1081 if (!_cm->has_aborted()) { 1082 do { 1083 double start_vtime_sec = os::elapsedVTime(); 1084 double start_time_sec = os::elapsedTime(); 1085 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1086 1087 the_task->do_marking_step(mark_step_duration_ms, 1088 true /* do_stealing */, 1089 true /* do_termination */); 1090 1091 double end_time_sec = os::elapsedTime(); 1092 double end_vtime_sec = os::elapsedVTime(); 1093 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1094 double elapsed_time_sec = end_time_sec - start_time_sec; 1095 _cm->clear_has_overflown(); 1096 1097 bool ret = _cm->do_yield_check(worker_i); 1098 1099 jlong sleep_time_ms; 1100 if (!_cm->has_aborted() && the_task->has_aborted()) { 1101 sleep_time_ms = 1102 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1103 ConcurrentGCThread::stsLeave(); 1104 os::sleep(Thread::current(), sleep_time_ms, false); 1105 ConcurrentGCThread::stsJoin(); 1106 } 1107 double end_time2_sec = os::elapsedTime(); 1108 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1109 1110 #if 0 1111 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1112 "overhead %1.4lf", 1113 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1114 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1115 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1116 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1117 #endif 1118 } while (!_cm->has_aborted() && the_task->has_aborted()); 1119 } 1120 the_task->record_end_time(); 1121 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1122 1123 ConcurrentGCThread::stsLeave(); 1124 1125 double end_vtime = os::elapsedVTime(); 1126 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime); 1127 } 1128 1129 CMConcurrentMarkingTask(ConcurrentMark* cm, 1130 ConcurrentMarkThread* cmt) : 1131 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1132 1133 ~CMConcurrentMarkingTask() { } 1134 }; 1135 1136 // Calculates the number of active workers for a concurrent 1137 // phase. 1138 size_t ConcurrentMark::calc_parallel_marking_threads() { 1139 if (G1CollectedHeap::use_parallel_gc_threads()) { 1140 size_t n_conc_workers = 0; 1141 if (!UseDynamicNumberOfGCThreads || 1142 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1143 !ForceDynamicNumberOfGCThreads)) { 1144 n_conc_workers = max_parallel_marking_threads(); 1145 } else { 1146 n_conc_workers = 1147 AdaptiveSizePolicy::calc_default_active_workers( 1148 max_parallel_marking_threads(), 1149 1, /* Minimum workers */ 1150 parallel_marking_threads(), 1151 Threads::number_of_non_daemon_threads()); 1152 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1153 // that scaling has already gone into "_max_parallel_marking_threads". 1154 } 1155 assert(n_conc_workers > 0, "Always need at least 1"); 1156 return n_conc_workers; 1157 } 1158 // If we are not running with any parallel GC threads we will not 1159 // have spawned any marking threads either. Hence the number of 1160 // concurrent workers should be 0. 1161 return 0; 1162 } 1163 1164 void ConcurrentMark::markFromRoots() { 1165 // we might be tempted to assert that: 1166 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1167 // "inconsistent argument?"); 1168 // However that wouldn't be right, because it's possible that 1169 // a safepoint is indeed in progress as a younger generation 1170 // stop-the-world GC happens even as we mark in this generation. 1171 1172 _restart_for_overflow = false; 1173 force_overflow_conc()->init(); 1174 1175 // _g1h has _n_par_threads 1176 _parallel_marking_threads = calc_parallel_marking_threads(); 1177 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1178 "Maximum number of marking threads exceeded"); 1179 1180 size_t active_workers = MAX2((size_t) 1, parallel_marking_threads()); 1181 1182 // Parallel task terminator is set in "set_phase()" 1183 set_phase(active_workers, true /* concurrent */); 1184 1185 CMConcurrentMarkingTask markingTask(this, cmThread()); 1186 if (parallel_marking_threads() > 0) { 1187 _parallel_workers->set_active_workers((int)active_workers); 1188 // Don't set _n_par_threads because it affects MT in proceess_strong_roots() 1189 // and the decisions on that MT processing is made elsewhere. 1190 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1191 _parallel_workers->run_task(&markingTask); 1192 } else { 1193 markingTask.work(0); 1194 } 1195 print_stats(); 1196 } 1197 1198 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1199 // world is stopped at this checkpoint 1200 assert(SafepointSynchronize::is_at_safepoint(), 1201 "world should be stopped"); 1202 1203 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1204 1205 // If a full collection has happened, we shouldn't do this. 1206 if (has_aborted()) { 1207 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1208 return; 1209 } 1210 1211 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1212 1213 if (VerifyDuringGC) { 1214 HandleMark hm; // handle scope 1215 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1216 Universe::heap()->prepare_for_verify(); 1217 Universe::verify(/* allow dirty */ true, 1218 /* silent */ false, 1219 /* option */ VerifyOption_G1UsePrevMarking); 1220 } 1221 1222 G1CollectorPolicy* g1p = g1h->g1_policy(); 1223 g1p->record_concurrent_mark_remark_start(); 1224 1225 double start = os::elapsedTime(); 1226 1227 checkpointRootsFinalWork(); 1228 1229 double mark_work_end = os::elapsedTime(); 1230 1231 weakRefsWork(clear_all_soft_refs); 1232 1233 if (has_overflown()) { 1234 // Oops. We overflowed. Restart concurrent marking. 1235 _restart_for_overflow = true; 1236 // Clear the flag. We do not need it any more. 1237 clear_has_overflown(); 1238 if (G1TraceMarkStackOverflow) { 1239 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1240 } 1241 } else { 1242 // Aggregate the per-task counting data that we have accumulated 1243 // while marking. 1244 aggregate_and_clear_count_data(); 1245 1246 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1247 // We're done with marking. 1248 // This is the end of the marking cycle, we're expected all 1249 // threads to have SATB queues with active set to true. 1250 satb_mq_set.set_active_all_threads(false, /* new active value */ 1251 true /* expected_active */); 1252 1253 if (VerifyDuringGC) { 1254 1255 HandleMark hm; // handle scope 1256 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1257 Universe::heap()->prepare_for_verify(); 1258 Universe::verify(/* allow dirty */ true, 1259 /* silent */ false, 1260 /* option */ VerifyOption_G1UseNextMarking); 1261 } 1262 assert(!restart_for_overflow(), "sanity"); 1263 } 1264 1265 // Reset the marking state if marking completed 1266 if (!restart_for_overflow()) { 1267 set_non_marking_state(); 1268 } 1269 1270 #if VERIFY_OBJS_PROCESSED 1271 _scan_obj_cl.objs_processed = 0; 1272 ThreadLocalObjQueue::objs_enqueued = 0; 1273 #endif 1274 1275 // Statistics 1276 double now = os::elapsedTime(); 1277 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1278 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1279 _remark_times.add((now - start) * 1000.0); 1280 1281 g1p->record_concurrent_mark_remark_end(); 1282 } 1283 1284 #define CARD_BM_TEST_MODE 0 1285 1286 // Used to calculate the # live objects per region 1287 // for verification purposes 1288 class CalcLiveObjectsClosure: public HeapRegionClosure { 1289 1290 CMBitMapRO* _bm; 1291 ConcurrentMark* _cm; 1292 BitMap* _region_bm; 1293 BitMap* _card_bm; 1294 1295 size_t _tot_words_done; 1296 size_t _tot_live; 1297 size_t _tot_used; 1298 1299 size_t _region_marked_bytes; 1300 1301 intptr_t _bottom_card_num; 1302 1303 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { 1304 BitMap::idx_t start_idx = start_card_num - _bottom_card_num; 1305 BitMap::idx_t last_idx = last_card_num - _bottom_card_num; 1306 1307 for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) { 1308 #if CARD_BM_TEST_MODE 1309 guarantee(_card_bm->at(i), "Should already be set."); 1310 #else 1311 _card_bm->par_at_put(i, 1); 1312 #endif 1313 } 1314 } 1315 1316 public: 1317 CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm, 1318 BitMap* region_bm, BitMap* card_bm) : 1319 _bm(bm), _cm(cm), _region_bm(region_bm), _card_bm(card_bm), 1320 _region_marked_bytes(0), _tot_words_done(0), 1321 _tot_live(0), _tot_used(0) 1322 { 1323 _bottom_card_num = cm->heap_bottom_card_num(); 1324 } 1325 1326 // It takes a region that's not empty (i.e., it has at least one 1327 // live object in it and sets its corresponding bit on the region 1328 // bitmap to 1. If the region is "starts humongous" it will also set 1329 // to 1 the bits on the region bitmap that correspond to its 1330 // associated "continues humongous" regions. 1331 void set_bit_for_region(HeapRegion* hr) { 1332 assert(!hr->continuesHumongous(), "should have filtered those out"); 1333 1334 size_t index = hr->hrs_index(); 1335 if (!hr->startsHumongous()) { 1336 // Normal (non-humongous) case: just set the bit. 1337 _region_bm->par_at_put((BitMap::idx_t) index, true); 1338 } else { 1339 // Starts humongous case: calculate how many regions are part of 1340 // this humongous region and then set the bit range. 1341 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1342 HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1); 1343 size_t end_index = last_hr->hrs_index() + 1; 1344 _region_bm->par_at_put_range((BitMap::idx_t) index, 1345 (BitMap::idx_t) end_index, true); 1346 } 1347 } 1348 1349 bool doHeapRegion(HeapRegion* hr) { 1350 1351 if (hr->continuesHumongous()) { 1352 // We will ignore these here and process them when their 1353 // associated "starts humongous" region is processed (see 1354 // set_bit_for_heap_region()). Note that we cannot rely on their 1355 // associated "starts humongous" region to have their bit set to 1356 // 1 since, due to the region chunking in the parallel region 1357 // iteration, a "continues humongous" region might be visited 1358 // before its associated "starts humongous". 1359 return false; 1360 } 1361 1362 HeapWord* nextTop = hr->next_top_at_mark_start(); 1363 HeapWord* start = hr->bottom(); 1364 1365 assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(), 1366 "Preconditions."); 1367 1368 // Record the number of word's we'll examine. 1369 size_t words_done = (nextTop - start); 1370 1371 // Find the first marked object at or after "start". 1372 start = _bm->getNextMarkedWordAddress(start, nextTop); 1373 1374 size_t marked_bytes = 0; 1375 _region_marked_bytes = 0; 1376 1377 // Below, the term "card num" means the result of shifting an address 1378 // by the card shift -- address 0 corresponds to card number 0. One 1379 // must subtract the card num of the bottom of the heap to obtain a 1380 // card table index. 1381 1382 // The first card num of the sequence of live cards currently being 1383 // constructed. -1 ==> no sequence. 1384 intptr_t start_card_num = -1; 1385 1386 // The last card num of the sequence of live cards currently being 1387 // constructed. -1 ==> no sequence. 1388 intptr_t last_card_num = -1; 1389 1390 while (start < nextTop) { 1391 oop obj = oop(start); 1392 int obj_sz = obj->size(); 1393 1394 // The card num of the start of the current object. 1395 intptr_t obj_card_num = 1396 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); 1397 HeapWord* obj_last = start + obj_sz - 1; 1398 intptr_t obj_last_card_num = 1399 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift); 1400 1401 if (obj_card_num != last_card_num) { 1402 if (start_card_num == -1) { 1403 assert(last_card_num == -1, "Both or neither."); 1404 start_card_num = obj_card_num; 1405 } else { 1406 assert(last_card_num != -1, "Both or neither."); 1407 assert(obj_card_num >= last_card_num, "Inv"); 1408 if ((obj_card_num - last_card_num) > 1) { 1409 // Mark the last run, and start a new one. 1410 mark_card_num_range(start_card_num, last_card_num); 1411 start_card_num = obj_card_num; 1412 } 1413 } 1414 #if CARD_BM_TEST_MODE 1415 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) { 1416 _card_bm->par_at_put(j - _bottom_card_num, 1); 1417 } 1418 #endif // CARD_BM_TEST_MODE 1419 } 1420 // In any case, we set the last card num. 1421 last_card_num = obj_last_card_num; 1422 1423 marked_bytes += (size_t)obj_sz * HeapWordSize; 1424 1425 // Find the next marked object after this one. 1426 start = _bm->getNextMarkedWordAddress(start + 1, nextTop); 1427 } 1428 1429 // Handle the last range, if any. 1430 if (start_card_num != -1) { 1431 mark_card_num_range(start_card_num, last_card_num); 1432 } 1433 1434 // Mark the allocated-since-marking portion... 1435 HeapWord* top = hr->top(); 1436 if (nextTop < top) { 1437 start_card_num = intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift); 1438 last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift); 1439 1440 mark_card_num_range(start_card_num, last_card_num); 1441 1442 // This definitely means the region has live objects. 1443 set_bit_for_region(hr); 1444 } 1445 1446 // Update the live region bitmap. 1447 if (marked_bytes > 0) { 1448 set_bit_for_region(hr); 1449 } 1450 1451 // Set the marked bytes for the current region so that 1452 // it can be queried by a calling verificiation routine 1453 _region_marked_bytes = marked_bytes; 1454 1455 _tot_live += hr->next_live_bytes(); 1456 _tot_used += hr->used(); 1457 _tot_words_done = words_done; 1458 1459 return false; 1460 } 1461 1462 size_t region_marked_bytes() const { return _region_marked_bytes; } 1463 size_t tot_words_done() const { return _tot_words_done; } 1464 size_t tot_live() const { return _tot_live; } 1465 size_t tot_used() const { return _tot_used; } 1466 }; 1467 1468 // Heap region closure used for verifying the counting data 1469 // that was accumulated concurrently and aggregated during 1470 // the remark pause. This closure is applied to the heap 1471 // regions during the STW cleanup pause. 1472 1473 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1474 ConcurrentMark* _cm; 1475 CalcLiveObjectsClosure _calc_cl; 1476 BitMap* _region_bm; // Region BM to be verified 1477 BitMap* _card_bm; // Card BM to be verified 1478 bool _verbose; // verbose output? 1479 1480 BitMap* _exp_region_bm; // Expected Region BM values 1481 BitMap* _exp_card_bm; // Expected card BM values 1482 1483 intptr_t _bottom_card_num; // Used for calculatint bitmap indices 1484 1485 int _failures; 1486 1487 public: 1488 VerifyLiveObjectDataHRClosure(ConcurrentMark* cm, 1489 BitMap* region_bm, 1490 BitMap* card_bm, 1491 BitMap* exp_region_bm, 1492 BitMap* exp_card_bm, 1493 bool verbose) : 1494 _cm(cm), 1495 _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm), 1496 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1497 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1498 _failures(0) 1499 { 1500 _bottom_card_num = cm->heap_bottom_card_num(); 1501 } 1502 1503 int failures() const { return _failures; } 1504 1505 bool doHeapRegion(HeapRegion* hr) { 1506 if (hr->continuesHumongous()) { 1507 // We will ignore these here and process them when their 1508 // associated "starts humongous" region is processed (see 1509 // set_bit_for_heap_region()). Note that we cannot rely on their 1510 // associated "starts humongous" region to have their bit set to 1511 // 1 since, due to the region chunking in the parallel region 1512 // iteration, a "continues humongous" region might be visited 1513 // before its associated "starts humongous". 1514 return false; 1515 } 1516 1517 int failures = 0; 1518 1519 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1520 // this region and set the corresponding bits in the expected region 1521 // and card bitmaps. 1522 bool res = _calc_cl.doHeapRegion(hr); 1523 assert(res == false, "should be continuing"); 1524 1525 // Note that the calculated count data could be a subset of the 1526 // count data that was accumlated during marking. See the comment 1527 // in G1ParCopyHelper::copy_to_survivor space for an explanation 1528 // why. 1529 1530 // Verify that _top_at_conc_count == ntams 1531 if (hr->top_at_conc_mark_count() != hr->next_top_at_mark_start()) { 1532 if (_verbose) { 1533 gclog_or_tty->print_cr("Region %d: top at conc count incorrect: expected " 1534 PTR_FORMAT", actual: "PTR_FORMAT, 1535 hr->hrs_index(), hr->next_top_at_mark_start(), 1536 hr->top_at_conc_mark_count()); 1537 } 1538 failures += 1; 1539 } 1540 1541 // Verify the marked bytes for this region. 1542 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1543 size_t act_marked_bytes = hr->next_marked_bytes(); 1544 1545 // We're not OK if expected marked bytes > actual marked bytes. It means 1546 // we have missed accounting some objects during the actual marking. 1547 if (exp_marked_bytes > act_marked_bytes) { 1548 if (_verbose) { 1549 gclog_or_tty->print_cr("Region %d: marked bytes mismatch: expected: " 1550 SIZE_FORMAT", actual: "SIZE_FORMAT, 1551 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1552 } 1553 failures += 1; 1554 } 1555 1556 // Verify the bit, for this region, in the actual and expected 1557 // (which was just calculated) region bit maps. 1558 // We're not OK if the expected bit is set and the actual is not set. 1559 BitMap::idx_t index = (BitMap::idx_t)hr->hrs_index(); 1560 1561 bool expected = _exp_region_bm->at(index); 1562 bool actual = _region_bm->at(index); 1563 if (expected && !actual) { 1564 if (_verbose) { 1565 gclog_or_tty->print_cr("Region %d: region bitmap mismatch: expected: %d, actual: %d", 1566 hr->hrs_index(), expected, actual); 1567 } 1568 failures += 1; 1569 } 1570 1571 // Verify that the card bit maps for the cards spanned by the current 1572 // region match. The set of offsets that have set bits in the expected 1573 // bitmap should be a subset of the offsets with set bits from the actual 1574 // calculated card bitmap. 1575 // Again it's more important that if the expected bit is set then the 1576 // actual bit be set. 1577 intptr_t start_card_num = 1578 intptr_t(uintptr_t(hr->bottom()) >> CardTableModRefBS::card_shift); 1579 intptr_t top_card_num = 1580 intptr_t(uintptr_t(hr->top()) >> CardTableModRefBS::card_shift); 1581 1582 BitMap::idx_t start_idx = start_card_num - _bottom_card_num; 1583 BitMap::idx_t end_idx = top_card_num - _bottom_card_num; 1584 1585 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1586 expected = _exp_card_bm->at(i); 1587 actual = _card_bm->at(i); 1588 1589 if (expected && !actual) { 1590 if (_verbose) { 1591 gclog_or_tty->print_cr("Region %d: card bitmap mismatch at idx %d: expected: %d, actual: %d", 1592 hr->hrs_index(), i, expected, actual); 1593 } 1594 failures += 1; 1595 } 1596 } 1597 1598 if (failures > 0 && _verbose) { 1599 gclog_or_tty->print("Region %d: bottom: "PTR_FORMAT", ntams: " 1600 PTR_FORMAT", top: "PTR_FORMAT", end: "PTR_FORMAT, 1601 hr->hrs_index(), hr->bottom(), hr->next_top_at_mark_start(), 1602 hr->top(), hr->end()); 1603 gclog_or_tty->print_cr(", marked_bytes: calc/actual "SIZE_FORMAT"/"SIZE_FORMAT, 1604 _calc_cl.region_marked_bytes(), 1605 hr->next_marked_bytes()); 1606 } 1607 1608 _failures += failures; 1609 1610 // We could stop iteration over the heap when we 1611 // find the first voilating region by returning true. 1612 return false; 1613 } 1614 }; 1615 1616 1617 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1618 protected: 1619 G1CollectedHeap* _g1h; 1620 ConcurrentMark* _cm; 1621 BitMap* _actual_region_bm; 1622 BitMap* _actual_card_bm; 1623 1624 size_t _n_workers; 1625 1626 BitMap* _expected_region_bm; 1627 BitMap* _expected_card_bm; 1628 1629 int _failures; 1630 bool _verbose; 1631 1632 public: 1633 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1634 BitMap* region_bm, BitMap* card_bm, 1635 BitMap* expected_region_bm, BitMap* expected_card_bm) 1636 : AbstractGangTask("G1 verify final counting"), 1637 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1638 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1639 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1640 _failures(0), _verbose(false), 1641 _n_workers(0) 1642 { 1643 assert(VerifyDuringGC, "don't call this otherwise"); 1644 1645 // Use the value already set as the number of active threads 1646 // in the call to run_task(). 1647 if (G1CollectedHeap::use_parallel_gc_threads()) { 1648 assert( _g1h->workers()->active_workers() > 0, 1649 "Should have been previously set"); 1650 _n_workers = _g1h->workers()->active_workers(); 1651 } else { 1652 _n_workers = 1; 1653 } 1654 1655 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1656 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1657 1658 _verbose = _cm->verbose_medium(); 1659 } 1660 1661 void work(int worker_i) { 1662 assert((size_t) worker_i < _n_workers, "invariant"); 1663 1664 VerifyLiveObjectDataHRClosure verify_cl(_cm, 1665 _actual_region_bm, _actual_card_bm, 1666 _expected_region_bm, 1667 _expected_card_bm, 1668 _verbose); 1669 1670 if (G1CollectedHeap::use_parallel_gc_threads()) { 1671 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1672 worker_i, 1673 (int) _n_workers, 1674 HeapRegion::VerifyCountClaimValue); 1675 } else { 1676 _g1h->heap_region_iterate(&verify_cl); 1677 } 1678 1679 Atomic::add(verify_cl.failures(), &_failures); 1680 } 1681 1682 int failures() const { return _failures; } 1683 }; 1684 1685 // Final update of count data (during cleanup). 1686 // Adds [top_at_count, NTAMS) to the marked bytes for each 1687 // region. Sets the bits in the card bitmap corresponding 1688 // to the interval [top_at_count, top], and sets the 1689 // liveness bit for each region containing live data 1690 // in the region bitmap. 1691 1692 class FinalCountDataUpdateClosure: public HeapRegionClosure { 1693 ConcurrentMark* _cm; 1694 BitMap* _region_bm; 1695 BitMap* _card_bm; 1696 intptr_t _bottom_card_num; 1697 1698 size_t _total_live_bytes; 1699 size_t _total_used_bytes; 1700 size_t _total_words_done; 1701 1702 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { 1703 BitMap::idx_t start_idx = start_card_num - _bottom_card_num; 1704 BitMap::idx_t last_idx = last_card_num - _bottom_card_num; 1705 1706 // Inclusive bit range [start_idx, last_idx]. par_at_put_range 1707 // is exclusive so we have to also set the bit for last_idx. 1708 // Passing last_idx+1 to the clear_range would work in 1709 // most cases but could trip an OOB assertion. 1710 1711 if ((last_idx - start_idx) > 0) { 1712 _card_bm->par_at_put_range(start_idx, last_idx, true); 1713 } 1714 _card_bm->par_set_bit(last_idx); 1715 } 1716 1717 // It takes a region that's not empty (i.e., it has at least one 1718 // live object in it and sets its corresponding bit on the region 1719 // bitmap to 1. If the region is "starts humongous" it will also set 1720 // to 1 the bits on the region bitmap that correspond to its 1721 // associated "continues humongous" regions. 1722 void set_bit_for_region(HeapRegion* hr) { 1723 assert(!hr->continuesHumongous(), "should have filtered those out"); 1724 1725 size_t index = hr->hrs_index(); 1726 if (!hr->startsHumongous()) { 1727 // Normal (non-humongous) case: just set the bit. 1728 _region_bm->par_set_bit((BitMap::idx_t) index); 1729 } else { 1730 // Starts humongous case: calculate how many regions are part of 1731 // this humongous region and then set the bit range. 1732 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1733 HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1); 1734 size_t end_index = last_hr->hrs_index() + 1; 1735 _region_bm->par_at_put_range((BitMap::idx_t) index, 1736 (BitMap::idx_t) end_index, true); 1737 } 1738 } 1739 1740 public: 1741 FinalCountDataUpdateClosure(ConcurrentMark* cm, 1742 BitMap* region_bm, 1743 BitMap* card_bm) : 1744 _cm(cm), _region_bm(region_bm), _card_bm(card_bm), 1745 _total_words_done(0), _total_live_bytes(0), _total_used_bytes(0) 1746 { 1747 _bottom_card_num = cm->heap_bottom_card_num(); 1748 } 1749 1750 bool doHeapRegion(HeapRegion* hr) { 1751 1752 if (hr->continuesHumongous()) { 1753 // We will ignore these here and process them when their 1754 // associated "starts humongous" region is processed (see 1755 // set_bit_for_heap_region()). Note that we cannot rely on their 1756 // associated "starts humongous" region to have their bit set to 1757 // 1 since, due to the region chunking in the parallel region 1758 // iteration, a "continues humongous" region might be visited 1759 // before its associated "starts humongous". 1760 return false; 1761 } 1762 1763 HeapWord* start = hr->top_at_conc_mark_count(); 1764 HeapWord* ntams = hr->next_top_at_mark_start(); 1765 HeapWord* top = hr->top(); 1766 1767 assert(hr->bottom() <= start && start <= hr->end() && 1768 hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1769 1770 size_t words_done = ntams - hr->bottom(); 1771 1772 intptr_t start_card_num = intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); 1773 intptr_t last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift); 1774 1775 1776 if (start < ntams) { 1777 // Region was changed between remark and cleanup pauses 1778 // We need to add (ntams - start) to the marked bytes 1779 // for this region, and set bits for the range 1780 // [ card_num(start), card_num(ntams) ) in the 1781 // card bitmap. 1782 size_t live_bytes = (ntams - start) * HeapWordSize; 1783 hr->add_to_marked_bytes(live_bytes); 1784 1785 // Record the new top at conc count 1786 hr->set_top_at_conc_mark_count(ntams); 1787 1788 // The setting of the bits card bitmap takes place below 1789 } 1790 1791 // Mark the allocated-since-marking portion... 1792 if (ntams < top) { 1793 // This definitely means the region has live objects. 1794 set_bit_for_region(hr); 1795 } 1796 1797 // Now set the bits for [start, top] 1798 mark_card_num_range(start_card_num, last_card_num); 1799 1800 // Set the bit for the region if it contains live data 1801 if (hr->next_marked_bytes() > 0) { 1802 set_bit_for_region(hr); 1803 } 1804 1805 _total_words_done += words_done; 1806 _total_used_bytes += hr->used(); 1807 _total_live_bytes += hr->next_marked_bytes(); 1808 1809 return false; 1810 } 1811 1812 size_t total_words_done() const { return _total_words_done; } 1813 size_t total_live_bytes() const { return _total_live_bytes; } 1814 size_t total_used_bytes() const { return _total_used_bytes; } 1815 }; 1816 1817 class G1ParFinalCountTask: public AbstractGangTask { 1818 protected: 1819 G1CollectedHeap* _g1h; 1820 ConcurrentMark* _cm; 1821 BitMap* _actual_region_bm; 1822 BitMap* _actual_card_bm; 1823 1824 size_t _n_workers; 1825 1826 size_t *_live_bytes; 1827 size_t *_used_bytes; 1828 1829 public: 1830 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1831 : AbstractGangTask("G1 final counting"), 1832 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1833 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1834 _n_workers(0) 1835 { 1836 // Use the value already set as the number of active threads 1837 // in the call to run_task(). Needed for the allocation of 1838 // _live_bytes and _used_bytes. 1839 if (G1CollectedHeap::use_parallel_gc_threads()) { 1840 assert( _g1h->workers()->active_workers() > 0, 1841 "Should have been previously set"); 1842 _n_workers = _g1h->workers()->active_workers(); 1843 } else { 1844 _n_workers = 1; 1845 } 1846 1847 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1848 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1849 } 1850 1851 ~G1ParFinalCountTask() { 1852 FREE_C_HEAP_ARRAY(size_t, _live_bytes); 1853 FREE_C_HEAP_ARRAY(size_t, _used_bytes); 1854 } 1855 1856 void work(int worker_i) { 1857 assert((size_t) worker_i < _n_workers, "invariant"); 1858 1859 FinalCountDataUpdateClosure final_update_cl(_cm, 1860 _actual_region_bm, 1861 _actual_card_bm); 1862 1863 if (G1CollectedHeap::use_parallel_gc_threads()) { 1864 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1865 worker_i, 1866 (int) _n_workers, 1867 HeapRegion::FinalCountClaimValue); 1868 } else { 1869 _g1h->heap_region_iterate(&final_update_cl); 1870 } 1871 1872 _live_bytes[worker_i] = final_update_cl.total_live_bytes(); 1873 _used_bytes[worker_i] = final_update_cl.total_used_bytes(); 1874 } 1875 1876 size_t live_bytes() { 1877 size_t live_bytes = 0; 1878 for (size_t i = 0; i < _n_workers; ++i) 1879 live_bytes += _live_bytes[i]; 1880 return live_bytes; 1881 } 1882 1883 size_t used_bytes() { 1884 size_t used_bytes = 0; 1885 for (size_t i = 0; i < _n_workers; ++i) 1886 used_bytes += _used_bytes[i]; 1887 return used_bytes; 1888 } 1889 }; 1890 1891 class G1ParNoteEndTask; 1892 1893 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1894 G1CollectedHeap* _g1; 1895 int _worker_num; 1896 size_t _max_live_bytes; 1897 size_t _regions_claimed; 1898 size_t _freed_bytes; 1899 FreeRegionList* _local_cleanup_list; 1900 OldRegionSet* _old_proxy_set; 1901 HumongousRegionSet* _humongous_proxy_set; 1902 HRRSCleanupTask* _hrrs_cleanup_task; 1903 double _claimed_region_time; 1904 double _max_region_time; 1905 1906 public: 1907 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1908 int worker_num, 1909 FreeRegionList* local_cleanup_list, 1910 OldRegionSet* old_proxy_set, 1911 HumongousRegionSet* humongous_proxy_set, 1912 HRRSCleanupTask* hrrs_cleanup_task) : 1913 _g1(g1), _worker_num(worker_num), 1914 _max_live_bytes(0), _regions_claimed(0), 1915 _freed_bytes(0), 1916 _claimed_region_time(0.0), _max_region_time(0.0), 1917 _local_cleanup_list(local_cleanup_list), 1918 _old_proxy_set(old_proxy_set), 1919 _humongous_proxy_set(humongous_proxy_set), 1920 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1921 1922 size_t freed_bytes() { return _freed_bytes; } 1923 1924 bool doHeapRegion(HeapRegion *hr) { 1925 // We use a claim value of zero here because all regions 1926 // were claimed with value 1 in the FinalCount task. 1927 hr->reset_gc_time_stamp(); 1928 if (!hr->continuesHumongous()) { 1929 double start = os::elapsedTime(); 1930 _regions_claimed++; 1931 hr->note_end_of_marking(); 1932 _max_live_bytes += hr->max_live_bytes(); 1933 _g1->free_region_if_empty(hr, 1934 &_freed_bytes, 1935 _local_cleanup_list, 1936 _old_proxy_set, 1937 _humongous_proxy_set, 1938 _hrrs_cleanup_task, 1939 true /* par */); 1940 double region_time = (os::elapsedTime() - start); 1941 _claimed_region_time += region_time; 1942 if (region_time > _max_region_time) { 1943 _max_region_time = region_time; 1944 } 1945 } 1946 return false; 1947 } 1948 1949 size_t max_live_bytes() { return _max_live_bytes; } 1950 size_t regions_claimed() { return _regions_claimed; } 1951 double claimed_region_time_sec() { return _claimed_region_time; } 1952 double max_region_time_sec() { return _max_region_time; } 1953 }; 1954 1955 class G1ParNoteEndTask: public AbstractGangTask { 1956 friend class G1NoteEndOfConcMarkClosure; 1957 1958 protected: 1959 G1CollectedHeap* _g1h; 1960 size_t _max_live_bytes; 1961 size_t _freed_bytes; 1962 FreeRegionList* _cleanup_list; 1963 1964 public: 1965 G1ParNoteEndTask(G1CollectedHeap* g1h, 1966 FreeRegionList* cleanup_list) : 1967 AbstractGangTask("G1 note end"), _g1h(g1h), 1968 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1969 1970 void work(int i) { 1971 double start = os::elapsedTime(); 1972 FreeRegionList local_cleanup_list("Local Cleanup List"); 1973 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set"); 1974 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1975 HRRSCleanupTask hrrs_cleanup_task; 1976 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list, 1977 &old_proxy_set, 1978 &humongous_proxy_set, 1979 &hrrs_cleanup_task); 1980 if (G1CollectedHeap::use_parallel_gc_threads()) { 1981 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, 1982 _g1h->workers()->active_workers(), 1983 HeapRegion::NoteEndClaimValue); 1984 } else { 1985 _g1h->heap_region_iterate(&g1_note_end); 1986 } 1987 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1988 1989 // Now update the lists 1990 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1991 NULL /* free_list */, 1992 &old_proxy_set, 1993 &humongous_proxy_set, 1994 true /* par */); 1995 { 1996 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1997 _max_live_bytes += g1_note_end.max_live_bytes(); 1998 _freed_bytes += g1_note_end.freed_bytes(); 1999 2000 // If we iterate over the global cleanup list at the end of 2001 // cleanup to do this printing we will not guarantee to only 2002 // generate output for the newly-reclaimed regions (the list 2003 // might not be empty at the beginning of cleanup; we might 2004 // still be working on its previous contents). So we do the 2005 // printing here, before we append the new regions to the global 2006 // cleanup list. 2007 2008 G1HRPrinter* hr_printer = _g1h->hr_printer(); 2009 if (hr_printer->is_active()) { 2010 HeapRegionLinkedListIterator iter(&local_cleanup_list); 2011 while (iter.more_available()) { 2012 HeapRegion* hr = iter.get_next(); 2013 hr_printer->cleanup(hr); 2014 } 2015 } 2016 2017 _cleanup_list->add_as_tail(&local_cleanup_list); 2018 assert(local_cleanup_list.is_empty(), "post-condition"); 2019 2020 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 2021 } 2022 double end = os::elapsedTime(); 2023 if (G1PrintParCleanupStats) { 2024 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] " 2025 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n", 2026 i, start, end, (end-start)*1000.0, 2027 g1_note_end.regions_claimed(), 2028 g1_note_end.claimed_region_time_sec()*1000.0, 2029 g1_note_end.max_region_time_sec()*1000.0); 2030 } 2031 } 2032 size_t max_live_bytes() { return _max_live_bytes; } 2033 size_t freed_bytes() { return _freed_bytes; } 2034 }; 2035 2036 class G1ParScrubRemSetTask: public AbstractGangTask { 2037 protected: 2038 G1RemSet* _g1rs; 2039 BitMap* _region_bm; 2040 BitMap* _card_bm; 2041 public: 2042 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 2043 BitMap* region_bm, BitMap* card_bm) : 2044 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 2045 _region_bm(region_bm), _card_bm(card_bm) 2046 {} 2047 2048 void work(int i) { 2049 if (G1CollectedHeap::use_parallel_gc_threads()) { 2050 _g1rs->scrub_par(_region_bm, _card_bm, i, 2051 HeapRegion::ScrubRemSetClaimValue); 2052 } else { 2053 _g1rs->scrub(_region_bm, _card_bm); 2054 } 2055 } 2056 2057 }; 2058 2059 void ConcurrentMark::cleanup() { 2060 // world is stopped at this checkpoint 2061 assert(SafepointSynchronize::is_at_safepoint(), 2062 "world should be stopped"); 2063 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2064 2065 // If a full collection has happened, we shouldn't do this. 2066 if (has_aborted()) { 2067 g1h->set_marking_complete(); // So bitmap clearing isn't confused 2068 return; 2069 } 2070 2071 HRSPhaseSetter x(HRSPhaseCleanup); 2072 g1h->verify_region_sets_optional(); 2073 2074 if (VerifyDuringGC) { 2075 HandleMark hm; // handle scope 2076 gclog_or_tty->print(" VerifyDuringGC:(before)"); 2077 Universe::heap()->prepare_for_verify(); 2078 Universe::verify(/* allow dirty */ true, 2079 /* silent */ false, 2080 /* option */ VerifyOption_G1UsePrevMarking); 2081 } 2082 2083 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 2084 g1p->record_concurrent_mark_cleanup_start(); 2085 2086 double start = os::elapsedTime(); 2087 2088 HeapRegionRemSet::reset_for_cleanup_tasks(); 2089 2090 // Clear the global region bitmap - it will be filled as part 2091 // of the final counting task. 2092 _region_bm.clear(); 2093 2094 size_t n_workers; 2095 2096 // Do counting once more with the world stopped for good measure. 2097 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 2098 2099 if (G1CollectedHeap::use_parallel_gc_threads()) { 2100 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 2101 "sanity check"); 2102 2103 g1h->set_par_threads(); 2104 n_workers = g1h->n_par_threads(); 2105 assert(g1h->n_par_threads() == (int) n_workers, 2106 "Should not have been reset"); 2107 g1h->workers()->run_task(&g1_par_count_task); 2108 // Done with the parallel phase so reset to 0. 2109 g1h->set_par_threads(0); 2110 2111 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 2112 "sanity check"); 2113 } else { 2114 n_workers = 1; 2115 g1_par_count_task.work(0); 2116 } 2117 2118 if (VerifyDuringGC) { 2119 // Verify that the counting data accumulated during marking matches 2120 // that calculated by walking the marking bitmap. 2121 2122 // Bitmaps to hold expected values 2123 BitMap expected_region_bm(_region_bm.size(), false); 2124 BitMap expected_card_bm(_card_bm.size(), false); 2125 2126 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 2127 &_region_bm, 2128 &_card_bm, 2129 &expected_region_bm, 2130 &expected_card_bm); 2131 2132 if (G1CollectedHeap::use_parallel_gc_threads()) { 2133 g1h->set_par_threads((int)n_workers); 2134 g1h->workers()->run_task(&g1_par_verify_task); 2135 // Done with the parallel phase so reset to 0. 2136 g1h->set_par_threads(0); 2137 2138 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 2139 "sanity check"); 2140 } else { 2141 g1_par_verify_task.work(0); 2142 } 2143 2144 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 2145 } 2146 2147 size_t known_garbage_bytes = 2148 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes(); 2149 g1p->set_known_garbage_bytes(known_garbage_bytes); 2150 2151 size_t start_used_bytes = g1h->used(); 2152 _at_least_one_mark_complete = true; 2153 g1h->set_marking_complete(); 2154 2155 ergo_verbose4(ErgoConcCycles, 2156 "finish cleanup", 2157 ergo_format_byte("occupancy") 2158 ergo_format_byte("capacity") 2159 ergo_format_byte_perc("known garbage"), 2160 start_used_bytes, g1h->capacity(), 2161 known_garbage_bytes, 2162 ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0); 2163 2164 double count_end = os::elapsedTime(); 2165 double this_final_counting_time = (count_end - start); 2166 if (G1PrintParCleanupStats) { 2167 gclog_or_tty->print_cr("Cleanup:"); 2168 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms", 2169 this_final_counting_time*1000.0); 2170 } 2171 _total_counting_time += this_final_counting_time; 2172 2173 if (G1PrintRegionLivenessInfo) { 2174 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 2175 _g1h->heap_region_iterate(&cl); 2176 } 2177 2178 // Install newly created mark bitMap as "prev". 2179 swapMarkBitMaps(); 2180 2181 g1h->reset_gc_time_stamp(); 2182 2183 // Note end of marking in all heap regions. 2184 double note_end_start = os::elapsedTime(); 2185 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 2186 if (G1CollectedHeap::use_parallel_gc_threads()) { 2187 g1h->set_par_threads((int)n_workers); 2188 g1h->workers()->run_task(&g1_par_note_end_task); 2189 g1h->set_par_threads(0); 2190 2191 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 2192 "sanity check"); 2193 } else { 2194 g1_par_note_end_task.work(0); 2195 } 2196 2197 if (!cleanup_list_is_empty()) { 2198 // The cleanup list is not empty, so we'll have to process it 2199 // concurrently. Notify anyone else that might be wanting free 2200 // regions that there will be more free regions coming soon. 2201 g1h->set_free_regions_coming(); 2202 } 2203 double note_end_end = os::elapsedTime(); 2204 if (G1PrintParCleanupStats) { 2205 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.", 2206 (note_end_end - note_end_start)*1000.0); 2207 } 2208 2209 // call below, since it affects the metric by which we sort the heap 2210 // regions. 2211 if (G1ScrubRemSets) { 2212 double rs_scrub_start = os::elapsedTime(); 2213 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 2214 if (G1CollectedHeap::use_parallel_gc_threads()) { 2215 g1h->set_par_threads((int)n_workers); 2216 g1h->workers()->run_task(&g1_par_scrub_rs_task); 2217 g1h->set_par_threads(0); 2218 2219 assert(g1h->check_heap_region_claim_values( 2220 HeapRegion::ScrubRemSetClaimValue), 2221 "sanity check"); 2222 } else { 2223 g1_par_scrub_rs_task.work(0); 2224 } 2225 2226 double rs_scrub_end = os::elapsedTime(); 2227 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 2228 _total_rs_scrub_time += this_rs_scrub_time; 2229 } 2230 2231 // this will also free any regions totally full of garbage objects, 2232 // and sort the regions. 2233 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 2234 2235 // Statistics. 2236 double end = os::elapsedTime(); 2237 _cleanup_times.add((end - start) * 1000.0); 2238 2239 // G1CollectedHeap::heap()->print(); 2240 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d", 2241 // G1CollectedHeap::heap()->get_gc_time_stamp()); 2242 2243 if (PrintGC || PrintGCDetails) { 2244 g1h->print_size_transition(gclog_or_tty, 2245 start_used_bytes, 2246 g1h->used(), 2247 g1h->capacity()); 2248 } 2249 2250 size_t cleaned_up_bytes = start_used_bytes - g1h->used(); 2251 g1p->decrease_known_garbage_bytes(cleaned_up_bytes); 2252 2253 // Clean up will have freed any regions completely full of garbage. 2254 // Update the soft reference policy with the new heap occupancy. 2255 Universe::update_heap_info_at_gc(); 2256 2257 // We need to make this be a "collection" so any collection pause that 2258 // races with it goes around and waits for completeCleanup to finish. 2259 g1h->increment_total_collections(); 2260 2261 if (VerifyDuringGC) { 2262 HandleMark hm; // handle scope 2263 gclog_or_tty->print(" VerifyDuringGC:(after)"); 2264 Universe::heap()->prepare_for_verify(); 2265 Universe::verify(/* allow dirty */ true, 2266 /* silent */ false, 2267 /* option */ VerifyOption_G1UsePrevMarking); 2268 } 2269 2270 g1h->verify_region_sets_optional(); 2271 } 2272 2273 void ConcurrentMark::completeCleanup() { 2274 if (has_aborted()) return; 2275 2276 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2277 2278 _cleanup_list.verify_optional(); 2279 FreeRegionList tmp_free_list("Tmp Free List"); 2280 2281 if (G1ConcRegionFreeingVerbose) { 2282 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2283 "cleanup list has "SIZE_FORMAT" entries", 2284 _cleanup_list.length()); 2285 } 2286 2287 // Noone else should be accessing the _cleanup_list at this point, 2288 // so it's not necessary to take any locks 2289 while (!_cleanup_list.is_empty()) { 2290 HeapRegion* hr = _cleanup_list.remove_head(); 2291 assert(hr != NULL, "the list was not empty"); 2292 hr->par_clear(); 2293 tmp_free_list.add_as_tail(hr); 2294 2295 // Instead of adding one region at a time to the secondary_free_list, 2296 // we accumulate them in the local list and move them a few at a 2297 // time. This also cuts down on the number of notify_all() calls 2298 // we do during this process. We'll also append the local list when 2299 // _cleanup_list is empty (which means we just removed the last 2300 // region from the _cleanup_list). 2301 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2302 _cleanup_list.is_empty()) { 2303 if (G1ConcRegionFreeingVerbose) { 2304 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2305 "appending "SIZE_FORMAT" entries to the " 2306 "secondary_free_list, clean list still has " 2307 SIZE_FORMAT" entries", 2308 tmp_free_list.length(), 2309 _cleanup_list.length()); 2310 } 2311 2312 { 2313 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2314 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 2315 SecondaryFreeList_lock->notify_all(); 2316 } 2317 2318 if (G1StressConcRegionFreeing) { 2319 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2320 os::sleep(Thread::current(), (jlong) 1, false); 2321 } 2322 } 2323 } 2324 } 2325 assert(tmp_free_list.is_empty(), "post-condition"); 2326 } 2327 2328 // Support closures for reference procssing in G1 2329 2330 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2331 HeapWord* addr = (HeapWord*)obj; 2332 return addr != NULL && 2333 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2334 } 2335 2336 class G1CMKeepAliveClosure: public OopClosure { 2337 G1CollectedHeap* _g1; 2338 ConcurrentMark* _cm; 2339 public: 2340 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) : 2341 _g1(g1), _cm(cm) 2342 { 2343 assert(Thread::current()->is_VM_thread(), "otherwise fix worker id"); 2344 } 2345 2346 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2347 virtual void do_oop( oop* p) { do_oop_work(p); } 2348 2349 template <class T> void do_oop_work(T* p) { 2350 oop obj = oopDesc::load_decode_heap_oop(p); 2351 HeapWord* addr = (HeapWord*)obj; 2352 2353 if (_cm->verbose_high()) { 2354 gclog_or_tty->print_cr("\t[0] we're looking at location " 2355 "*"PTR_FORMAT" = "PTR_FORMAT, 2356 p, (void*) obj); 2357 } 2358 2359 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) { 2360 _cm->mark_and_count(obj); 2361 _cm->mark_stack_push(obj); 2362 } 2363 } 2364 }; 2365 2366 class G1CMDrainMarkingStackClosure: public VoidClosure { 2367 ConcurrentMark* _cm; 2368 CMMarkStack* _markStack; 2369 G1CMKeepAliveClosure* _oopClosure; 2370 public: 2371 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack, 2372 G1CMKeepAliveClosure* oopClosure) : 2373 _cm(cm), 2374 _markStack(markStack), 2375 _oopClosure(oopClosure) 2376 {} 2377 2378 void do_void() { 2379 _markStack->drain((OopClosure*)_oopClosure, _cm->nextMarkBitMap(), false); 2380 } 2381 }; 2382 2383 // 'Keep Alive' closure used by parallel reference processing. 2384 // An instance of this closure is used in the parallel reference processing 2385 // code rather than an instance of G1CMKeepAliveClosure. We could have used 2386 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are 2387 // placed on to discovered ref lists once so we can mark and push with no 2388 // need to check whether the object has already been marked. Using the 2389 // G1CMKeepAliveClosure would mean, however, having all the worker threads 2390 // operating on the global mark stack. This means that an individual 2391 // worker would be doing lock-free pushes while it processes its own 2392 // discovered ref list followed by drain call. If the discovered ref lists 2393 // are unbalanced then this could cause interference with the other 2394 // workers. Using a CMTask (and its embedded local data structures) 2395 // avoids that potential interference. 2396 class G1CMParKeepAliveAndDrainClosure: public OopClosure { 2397 ConcurrentMark* _cm; 2398 CMTask* _task; 2399 int _ref_counter_limit; 2400 int _ref_counter; 2401 public: 2402 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) : 2403 _cm(cm), _task(task), 2404 _ref_counter_limit(G1RefProcDrainInterval) { 2405 assert(_ref_counter_limit > 0, "sanity"); 2406 _ref_counter = _ref_counter_limit; 2407 } 2408 2409 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2410 virtual void do_oop( oop* p) { do_oop_work(p); } 2411 2412 template <class T> void do_oop_work(T* p) { 2413 if (!_cm->has_overflown()) { 2414 oop obj = oopDesc::load_decode_heap_oop(p); 2415 if (_cm->verbose_high()) { 2416 gclog_or_tty->print_cr("\t[%d] we're looking at location " 2417 "*"PTR_FORMAT" = "PTR_FORMAT, 2418 _task->task_id(), p, (void*) obj); 2419 } 2420 2421 _task->deal_with_reference(obj); 2422 _ref_counter--; 2423 2424 if (_ref_counter == 0) { 2425 // We have dealt with _ref_counter_limit references, pushing them and objects 2426 // reachable from them on to the local stack (and possibly the global stack). 2427 // Call do_marking_step() to process these entries. We call the routine in a 2428 // loop, which we'll exit if there's nothing more to do (i.e. we're done 2429 // with the entries that we've pushed as a result of the deal_with_reference 2430 // calls above) or we overflow. 2431 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2432 // while there may still be some work to do. (See the comment at the 2433 // beginning of CMTask::do_marking_step() for those conditions - one of which 2434 // is reaching the specified time target.) It is only when 2435 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2436 // that the marking has completed. 2437 do { 2438 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2439 _task->do_marking_step(mark_step_duration_ms, 2440 false /* do_stealing */, 2441 false /* do_termination */); 2442 } while (_task->has_aborted() && !_cm->has_overflown()); 2443 _ref_counter = _ref_counter_limit; 2444 } 2445 } else { 2446 if (_cm->verbose_high()) { 2447 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id()); 2448 } 2449 } 2450 } 2451 }; 2452 2453 class G1CMParDrainMarkingStackClosure: public VoidClosure { 2454 ConcurrentMark* _cm; 2455 CMTask* _task; 2456 public: 2457 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) : 2458 _cm(cm), _task(task) 2459 {} 2460 2461 void do_void() { 2462 do { 2463 if (_cm->verbose_high()) { 2464 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", 2465 _task->task_id()); 2466 } 2467 2468 // We call CMTask::do_marking_step() to completely drain the local and 2469 // global marking stacks. The routine is called in a loop, which we'll 2470 // exit if there's nothing more to do (i.e. we'completely drained the 2471 // entries that were pushed as a result of applying the 2472 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref 2473 // lists above) or we overflow the global marking stack. 2474 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2475 // while there may still be some work to do. (See the comment at the 2476 // beginning of CMTask::do_marking_step() for those conditions - one of which 2477 // is reaching the specified time target.) It is only when 2478 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2479 // that the marking has completed. 2480 2481 _task->do_marking_step(1000000000.0 /* something very large */, 2482 true /* do_stealing */, 2483 true /* do_termination */); 2484 } while (_task->has_aborted() && !_cm->has_overflown()); 2485 } 2486 }; 2487 2488 // Implementation of AbstractRefProcTaskExecutor for parallel 2489 // reference processing at the end of G1 concurrent marking 2490 2491 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2492 private: 2493 G1CollectedHeap* _g1h; 2494 ConcurrentMark* _cm; 2495 WorkGang* _workers; 2496 int _active_workers; 2497 2498 public: 2499 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2500 ConcurrentMark* cm, 2501 WorkGang* workers, 2502 int n_workers) : 2503 _g1h(g1h), _cm(cm), 2504 _workers(workers), _active_workers(n_workers) { } 2505 2506 // Executes the given task using concurrent marking worker threads. 2507 virtual void execute(ProcessTask& task); 2508 virtual void execute(EnqueueTask& task); 2509 }; 2510 2511 class G1CMRefProcTaskProxy: public AbstractGangTask { 2512 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2513 ProcessTask& _proc_task; 2514 G1CollectedHeap* _g1h; 2515 ConcurrentMark* _cm; 2516 2517 public: 2518 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2519 G1CollectedHeap* g1h, 2520 ConcurrentMark* cm) : 2521 AbstractGangTask("Process reference objects in parallel"), 2522 _proc_task(proc_task), _g1h(g1h), _cm(cm) { } 2523 2524 virtual void work(int i) { 2525 CMTask* marking_task = _cm->task(i); 2526 G1CMIsAliveClosure g1_is_alive(_g1h); 2527 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task); 2528 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task); 2529 2530 _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2531 } 2532 }; 2533 2534 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2535 assert(_workers != NULL, "Need parallel worker threads."); 2536 2537 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2538 2539 // We need to reset the phase for each task execution so that 2540 // the termination protocol of CMTask::do_marking_step works. 2541 _cm->set_phase(_active_workers, false /* concurrent */); 2542 _g1h->set_par_threads(_active_workers); 2543 _workers->run_task(&proc_task_proxy); 2544 _g1h->set_par_threads(0); 2545 } 2546 2547 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2548 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2549 EnqueueTask& _enq_task; 2550 2551 public: 2552 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2553 AbstractGangTask("Enqueue reference objects in parallel"), 2554 _enq_task(enq_task) { } 2555 2556 virtual void work(int i) { 2557 _enq_task.work(i); 2558 } 2559 }; 2560 2561 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2562 assert(_workers != NULL, "Need parallel worker threads."); 2563 2564 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2565 2566 _g1h->set_par_threads(_active_workers); 2567 _workers->run_task(&enq_task_proxy); 2568 _g1h->set_par_threads(0); 2569 } 2570 2571 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2572 ResourceMark rm; 2573 HandleMark hm; 2574 2575 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2576 2577 // Is alive closure. 2578 G1CMIsAliveClosure g1_is_alive(g1h); 2579 2580 // Inner scope to exclude the cleaning of the string and symbol 2581 // tables from the displayed time. 2582 { 2583 bool verbose = PrintGC && PrintGCDetails; 2584 if (verbose) { 2585 gclog_or_tty->put(' '); 2586 } 2587 TraceTime t("GC ref-proc", verbose, false, gclog_or_tty); 2588 2589 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2590 2591 // See the comment in G1CollectedHeap::ref_processing_init() 2592 // about how reference processing currently works in G1. 2593 2594 // Process weak references. 2595 rp->setup_policy(clear_all_soft_refs); 2596 assert(_markStack.isEmpty(), "mark stack should be empty"); 2597 2598 G1CMKeepAliveClosure g1_keep_alive(g1h, this); 2599 G1CMDrainMarkingStackClosure 2600 g1_drain_mark_stack(this, &_markStack, &g1_keep_alive); 2601 2602 // We use the work gang from the G1CollectedHeap and we utilize all 2603 // the worker threads. 2604 int active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1; 2605 active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1); 2606 2607 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2608 g1h->workers(), active_workers); 2609 2610 if (rp->processing_is_mt()) { 2611 // Set the degree of MT here. If the discovery is done MT, there 2612 // may have been a different number of threads doing the discovery 2613 // and a different number of discovered lists may have Ref objects. 2614 // That is OK as long as the Reference lists are balanced (see 2615 // balance_all_queues() and balance_queues()). 2616 rp->set_active_mt_degree(active_workers); 2617 2618 rp->process_discovered_references(&g1_is_alive, 2619 &g1_keep_alive, 2620 &g1_drain_mark_stack, 2621 &par_task_executor); 2622 2623 // The work routines of the parallel keep_alive and drain_marking_stack 2624 // will set the has_overflown flag if we overflow the global marking 2625 // stack. 2626 } else { 2627 rp->process_discovered_references(&g1_is_alive, 2628 &g1_keep_alive, 2629 &g1_drain_mark_stack, 2630 NULL); 2631 } 2632 2633 assert(_markStack.overflow() || _markStack.isEmpty(), 2634 "mark stack should be empty (unless it overflowed)"); 2635 if (_markStack.overflow()) { 2636 // Should have been done already when we tried to push an 2637 // entry on to the global mark stack. But let's do it again. 2638 set_has_overflown(); 2639 } 2640 2641 if (rp->processing_is_mt()) { 2642 assert(rp->num_q() == active_workers, "why not"); 2643 rp->enqueue_discovered_references(&par_task_executor); 2644 } else { 2645 rp->enqueue_discovered_references(); 2646 } 2647 2648 rp->verify_no_references_recorded(); 2649 assert(!rp->discovery_enabled(), "Post condition"); 2650 } 2651 2652 // Now clean up stale oops in StringTable 2653 StringTable::unlink(&g1_is_alive); 2654 // Clean up unreferenced symbols in symbol table. 2655 SymbolTable::unlink(); 2656 } 2657 2658 void ConcurrentMark::swapMarkBitMaps() { 2659 CMBitMapRO* temp = _prevMarkBitMap; 2660 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2661 _nextMarkBitMap = (CMBitMap*) temp; 2662 } 2663 2664 class CMRemarkTask: public AbstractGangTask { 2665 private: 2666 ConcurrentMark *_cm; 2667 2668 public: 2669 void work(int worker_i) { 2670 // Since all available tasks are actually started, we should 2671 // only proceed if we're supposed to be actived. 2672 if ((size_t)worker_i < _cm->active_tasks()) { 2673 CMTask* task = _cm->task(worker_i); 2674 task->record_start_time(); 2675 do { 2676 task->do_marking_step(1000000000.0 /* something very large */, 2677 true /* do_stealing */, 2678 true /* do_termination */); 2679 } while (task->has_aborted() && !_cm->has_overflown()); 2680 // If we overflow, then we do not want to restart. We instead 2681 // want to abort remark and do concurrent marking again. 2682 task->record_end_time(); 2683 } 2684 } 2685 2686 CMRemarkTask(ConcurrentMark* cm, int active_workers) : 2687 AbstractGangTask("Par Remark"), _cm(cm) { 2688 _cm->terminator()->reset_for_reuse(active_workers); 2689 } 2690 }; 2691 2692 void ConcurrentMark::checkpointRootsFinalWork() { 2693 ResourceMark rm; 2694 HandleMark hm; 2695 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2696 2697 g1h->ensure_parsability(false); 2698 2699 if (G1CollectedHeap::use_parallel_gc_threads()) { 2700 G1CollectedHeap::StrongRootsScope srs(g1h); 2701 // this is remark, so we'll use up all active threads 2702 int active_workers = g1h->workers()->active_workers(); 2703 if (active_workers == 0) { 2704 assert(active_workers > 0, "Should have been set earlier"); 2705 active_workers = ParallelGCThreads; 2706 g1h->workers()->set_active_workers(active_workers); 2707 } 2708 set_phase(active_workers, false /* concurrent */); 2709 // Leave _parallel_marking_threads at it's 2710 // value originally calculated in the ConcurrentMark 2711 // constructor and pass values of the active workers 2712 // through the gang in the task. 2713 2714 CMRemarkTask remarkTask(this, active_workers); 2715 g1h->set_par_threads(active_workers); 2716 g1h->workers()->run_task(&remarkTask); 2717 g1h->set_par_threads(0); 2718 } else { 2719 G1CollectedHeap::StrongRootsScope srs(g1h); 2720 // this is remark, so we'll use up all available threads 2721 int active_workers = 1; 2722 set_phase(active_workers, false /* concurrent */); 2723 2724 CMRemarkTask remarkTask(this, active_workers); 2725 // We will start all available threads, even if we decide that the 2726 // active_workers will be fewer. The extra ones will just bail out 2727 // immediately. 2728 remarkTask.work(0); 2729 } 2730 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2731 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2732 2733 print_stats(); 2734 2735 #if VERIFY_OBJS_PROCESSED 2736 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2737 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2738 _scan_obj_cl.objs_processed, 2739 ThreadLocalObjQueue::objs_enqueued); 2740 guarantee(_scan_obj_cl.objs_processed == 2741 ThreadLocalObjQueue::objs_enqueued, 2742 "Different number of objs processed and enqueued."); 2743 } 2744 #endif 2745 } 2746 2747 #ifndef PRODUCT 2748 2749 class PrintReachableOopClosure: public OopClosure { 2750 private: 2751 G1CollectedHeap* _g1h; 2752 outputStream* _out; 2753 VerifyOption _vo; 2754 bool _all; 2755 2756 public: 2757 PrintReachableOopClosure(outputStream* out, 2758 VerifyOption vo, 2759 bool all) : 2760 _g1h(G1CollectedHeap::heap()), 2761 _out(out), _vo(vo), _all(all) { } 2762 2763 void do_oop(narrowOop* p) { do_oop_work(p); } 2764 void do_oop( oop* p) { do_oop_work(p); } 2765 2766 template <class T> void do_oop_work(T* p) { 2767 oop obj = oopDesc::load_decode_heap_oop(p); 2768 const char* str = NULL; 2769 const char* str2 = ""; 2770 2771 if (obj == NULL) { 2772 str = ""; 2773 } else if (!_g1h->is_in_g1_reserved(obj)) { 2774 str = " O"; 2775 } else { 2776 HeapRegion* hr = _g1h->heap_region_containing(obj); 2777 guarantee(hr != NULL, "invariant"); 2778 bool over_tams = false; 2779 bool marked = false; 2780 2781 switch (_vo) { 2782 case VerifyOption_G1UsePrevMarking: 2783 over_tams = hr->obj_allocated_since_prev_marking(obj); 2784 marked = _g1h->isMarkedPrev(obj); 2785 break; 2786 case VerifyOption_G1UseNextMarking: 2787 over_tams = hr->obj_allocated_since_next_marking(obj); 2788 marked = _g1h->isMarkedNext(obj); 2789 break; 2790 case VerifyOption_G1UseMarkWord: 2791 marked = obj->is_gc_marked(); 2792 break; 2793 default: 2794 ShouldNotReachHere(); 2795 } 2796 2797 if (over_tams) { 2798 str = " >"; 2799 if (marked) { 2800 str2 = " AND MARKED"; 2801 } 2802 } else if (marked) { 2803 str = " M"; 2804 } else { 2805 str = " NOT"; 2806 } 2807 } 2808 2809 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2810 p, (void*) obj, str, str2); 2811 } 2812 }; 2813 2814 class PrintReachableObjectClosure : public ObjectClosure { 2815 private: 2816 G1CollectedHeap* _g1h; 2817 outputStream* _out; 2818 VerifyOption _vo; 2819 bool _all; 2820 HeapRegion* _hr; 2821 2822 public: 2823 PrintReachableObjectClosure(outputStream* out, 2824 VerifyOption vo, 2825 bool all, 2826 HeapRegion* hr) : 2827 _g1h(G1CollectedHeap::heap()), 2828 _out(out), _vo(vo), _all(all), _hr(hr) { } 2829 2830 void do_object(oop o) { 2831 bool over_tams = false; 2832 bool marked = false; 2833 2834 switch (_vo) { 2835 case VerifyOption_G1UsePrevMarking: 2836 over_tams = _hr->obj_allocated_since_prev_marking(o); 2837 marked = _g1h->isMarkedPrev(o); 2838 break; 2839 case VerifyOption_G1UseNextMarking: 2840 over_tams = _hr->obj_allocated_since_next_marking(o); 2841 marked = _g1h->isMarkedNext(o); 2842 break; 2843 case VerifyOption_G1UseMarkWord: 2844 marked = o->is_gc_marked(); 2845 break; 2846 default: 2847 ShouldNotReachHere(); 2848 } 2849 bool print_it = _all || over_tams || marked; 2850 2851 if (print_it) { 2852 _out->print_cr(" "PTR_FORMAT"%s", 2853 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2854 PrintReachableOopClosure oopCl(_out, _vo, _all); 2855 o->oop_iterate(&oopCl); 2856 } 2857 } 2858 }; 2859 2860 class PrintReachableRegionClosure : public HeapRegionClosure { 2861 private: 2862 outputStream* _out; 2863 VerifyOption _vo; 2864 bool _all; 2865 2866 public: 2867 bool doHeapRegion(HeapRegion* hr) { 2868 HeapWord* b = hr->bottom(); 2869 HeapWord* e = hr->end(); 2870 HeapWord* t = hr->top(); 2871 HeapWord* p = NULL; 2872 2873 switch (_vo) { 2874 case VerifyOption_G1UsePrevMarking: 2875 p = hr->prev_top_at_mark_start(); 2876 break; 2877 case VerifyOption_G1UseNextMarking: 2878 p = hr->next_top_at_mark_start(); 2879 break; 2880 case VerifyOption_G1UseMarkWord: 2881 // When we are verifying marking using the mark word 2882 // TAMS has no relevance. 2883 assert(p == NULL, "post-condition"); 2884 break; 2885 default: 2886 ShouldNotReachHere(); 2887 } 2888 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2889 "TAMS: "PTR_FORMAT, b, e, t, p); 2890 _out->cr(); 2891 2892 HeapWord* from = b; 2893 HeapWord* to = t; 2894 2895 if (to > from) { 2896 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2897 _out->cr(); 2898 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2899 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2900 _out->cr(); 2901 } 2902 2903 return false; 2904 } 2905 2906 PrintReachableRegionClosure(outputStream* out, 2907 VerifyOption vo, 2908 bool all) : 2909 _out(out), _vo(vo), _all(all) { } 2910 }; 2911 2912 static const char* verify_option_to_tams(VerifyOption vo) { 2913 switch (vo) { 2914 case VerifyOption_G1UsePrevMarking: 2915 return "PTAMS"; 2916 case VerifyOption_G1UseNextMarking: 2917 return "NTAMS"; 2918 default: 2919 return "NONE"; 2920 } 2921 } 2922 2923 void ConcurrentMark::print_reachable(const char* str, 2924 VerifyOption vo, 2925 bool all) { 2926 gclog_or_tty->cr(); 2927 gclog_or_tty->print_cr("== Doing heap dump... "); 2928 2929 if (G1PrintReachableBaseFile == NULL) { 2930 gclog_or_tty->print_cr(" #### error: no base file defined"); 2931 return; 2932 } 2933 2934 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2935 (JVM_MAXPATHLEN - 1)) { 2936 gclog_or_tty->print_cr(" #### error: file name too long"); 2937 return; 2938 } 2939 2940 char file_name[JVM_MAXPATHLEN]; 2941 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2942 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2943 2944 fileStream fout(file_name); 2945 if (!fout.is_open()) { 2946 gclog_or_tty->print_cr(" #### error: could not open file"); 2947 return; 2948 } 2949 2950 outputStream* out = &fout; 2951 out->print_cr("-- USING %s", verify_option_to_tams(vo)); 2952 out->cr(); 2953 2954 out->print_cr("--- ITERATING OVER REGIONS"); 2955 out->cr(); 2956 PrintReachableRegionClosure rcl(out, vo, all); 2957 _g1h->heap_region_iterate(&rcl); 2958 out->cr(); 2959 2960 gclog_or_tty->print_cr(" done"); 2961 gclog_or_tty->flush(); 2962 } 2963 2964 #endif // PRODUCT 2965 2966 // This note is for drainAllSATBBuffers and the code in between. 2967 // In the future we could reuse a task to do this work during an 2968 // evacuation pause (since now tasks are not active and can be claimed 2969 // during an evacuation pause). This was a late change to the code and 2970 // is currently not being taken advantage of. 2971 2972 class CMGlobalObjectClosure : public ObjectClosure { 2973 private: 2974 ConcurrentMark* _cm; 2975 2976 public: 2977 void do_object(oop obj) { 2978 _cm->deal_with_reference(obj, 0); 2979 } 2980 2981 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { } 2982 }; 2983 2984 void ConcurrentMark::deal_with_reference(oop obj, int worker_i) { 2985 if (verbose_high()) { 2986 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT, 2987 (void*) obj); 2988 } 2989 2990 HeapWord* objAddr = (HeapWord*) obj; 2991 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error"); 2992 if (_g1h->is_in_g1_reserved(objAddr)) { 2993 assert(obj != NULL, "null check is implicit"); 2994 if (!_nextMarkBitMap->isMarked(objAddr)) { 2995 // Only get the containing region if the object is not marked on the 2996 // bitmap (otherwise, it's a waste of time since we won't do 2997 // anything with it). 2998 HeapRegion* hr = _g1h->heap_region_containing_raw(obj); 2999 if (!hr->obj_allocated_since_next_marking(obj)) { 3000 if (verbose_high()) { 3001 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered " 3002 "marked", (void*) obj); 3003 } 3004 3005 // we need to mark it first 3006 if (par_mark_and_count(obj, hr, worker_i)) { 3007 // No OrderAccess:store_load() is needed. It is implicit in the 3008 // CAS done in the call to CMBitMap::parMark() in the above 3009 // routine. 3010 HeapWord* finger = _finger; 3011 if (objAddr < finger) { 3012 if (verbose_high()) { 3013 gclog_or_tty->print_cr("[global] below the global finger " 3014 "("PTR_FORMAT"), pushing it", finger); 3015 } 3016 if (!mark_stack_push(obj)) { 3017 if (verbose_low()) { 3018 gclog_or_tty->print_cr("[global] global stack overflow during " 3019 "deal_with_reference"); 3020 } 3021 } 3022 } 3023 } 3024 } 3025 } 3026 } 3027 } 3028 3029 void ConcurrentMark::drainAllSATBBuffers() { 3030 CMGlobalObjectClosure oc(this); 3031 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3032 satb_mq_set.set_closure(&oc); 3033 3034 while (satb_mq_set.apply_closure_to_completed_buffer()) { 3035 if (verbose_medium()) { 3036 gclog_or_tty->print_cr("[global] processed an SATB buffer"); 3037 } 3038 } 3039 3040 // no need to check whether we should do this, as this is only 3041 // called during an evacuation pause 3042 satb_mq_set.iterate_closure_all_threads(); 3043 3044 satb_mq_set.set_closure(NULL); 3045 assert(satb_mq_set.completed_buffers_num() == 0, "invariant"); 3046 } 3047 3048 void ConcurrentMark::markPrev(oop p) { 3049 // Note we are overriding the read-only view of the prev map here, via 3050 // the cast. 3051 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p); 3052 } 3053 3054 void ConcurrentMark::clear_mark(oop p) { 3055 assert(p != NULL && p->is_oop(), "expected an oop"); 3056 HeapWord* addr = (HeapWord*)p; 3057 assert(addr >= _nextMarkBitMap->startWord() || 3058 addr < _nextMarkBitMap->endWord(), "in a region"); 3059 3060 _nextMarkBitMap->clear(addr); 3061 } 3062 3063 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) { 3064 // Note we are overriding the read-only view of the prev map here, via 3065 // the cast. 3066 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 3067 _nextMarkBitMap->clearRange(mr); 3068 } 3069 3070 HeapRegion* 3071 ConcurrentMark::claim_region(int task_num) { 3072 // "checkpoint" the finger 3073 HeapWord* finger = _finger; 3074 3075 // _heap_end will not change underneath our feet; it only changes at 3076 // yield points. 3077 while (finger < _heap_end) { 3078 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 3079 3080 // Note on how this code handles humongous regions. In the 3081 // normal case the finger will reach the start of a "starts 3082 // humongous" (SH) region. Its end will either be the end of the 3083 // last "continues humongous" (CH) region in the sequence, or the 3084 // standard end of the SH region (if the SH is the only region in 3085 // the sequence). That way claim_region() will skip over the CH 3086 // regions. However, there is a subtle race between a CM thread 3087 // executing this method and a mutator thread doing a humongous 3088 // object allocation. The two are not mutually exclusive as the CM 3089 // thread does not need to hold the Heap_lock when it gets 3090 // here. So there is a chance that claim_region() will come across 3091 // a free region that's in the progress of becoming a SH or a CH 3092 // region. In the former case, it will either 3093 // a) Miss the update to the region's end, in which case it will 3094 // visit every subsequent CH region, will find their bitmaps 3095 // empty, and do nothing, or 3096 // b) Will observe the update of the region's end (in which case 3097 // it will skip the subsequent CH regions). 3098 // If it comes across a region that suddenly becomes CH, the 3099 // scenario will be similar to b). So, the race between 3100 // claim_region() and a humongous object allocation might force us 3101 // to do a bit of unnecessary work (due to some unnecessary bitmap 3102 // iterations) but it should not introduce and correctness issues. 3103 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 3104 HeapWord* bottom = curr_region->bottom(); 3105 HeapWord* end = curr_region->end(); 3106 HeapWord* limit = curr_region->next_top_at_mark_start(); 3107 3108 if (verbose_low()) { 3109 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 3110 "["PTR_FORMAT", "PTR_FORMAT"), " 3111 "limit = "PTR_FORMAT, 3112 task_num, curr_region, bottom, end, limit); 3113 } 3114 3115 // Is the gap between reading the finger and doing the CAS too long? 3116 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 3117 if (res == finger) { 3118 // we succeeded 3119 3120 // notice that _finger == end cannot be guaranteed here since, 3121 // someone else might have moved the finger even further 3122 assert(_finger >= end, "the finger should have moved forward"); 3123 3124 if (verbose_low()) { 3125 gclog_or_tty->print_cr("[%d] we were successful with region = " 3126 PTR_FORMAT, task_num, curr_region); 3127 } 3128 3129 if (limit > bottom) { 3130 if (verbose_low()) { 3131 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 3132 "returning it ", task_num, curr_region); 3133 } 3134 return curr_region; 3135 } else { 3136 assert(limit == bottom, 3137 "the region limit should be at bottom"); 3138 if (verbose_low()) { 3139 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 3140 "returning NULL", task_num, curr_region); 3141 } 3142 // we return NULL and the caller should try calling 3143 // claim_region() again. 3144 return NULL; 3145 } 3146 } else { 3147 assert(_finger > finger, "the finger should have moved forward"); 3148 if (verbose_low()) { 3149 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 3150 "global finger = "PTR_FORMAT", " 3151 "our finger = "PTR_FORMAT, 3152 task_num, _finger, finger); 3153 } 3154 3155 // read it again 3156 finger = _finger; 3157 } 3158 } 3159 3160 return NULL; 3161 } 3162 3163 bool ConcurrentMark::invalidate_aborted_regions_in_cset() { 3164 bool result = false; 3165 for (int i = 0; i < (int)_max_task_num; ++i) { 3166 CMTask* the_task = _tasks[i]; 3167 MemRegion mr = the_task->aborted_region(); 3168 if (mr.start() != NULL) { 3169 assert(mr.end() != NULL, "invariant"); 3170 assert(mr.word_size() > 0, "invariant"); 3171 HeapRegion* hr = _g1h->heap_region_containing(mr.start()); 3172 assert(hr != NULL, "invariant"); 3173 if (hr->in_collection_set()) { 3174 // The region points into the collection set 3175 the_task->set_aborted_region(MemRegion()); 3176 result = true; 3177 } 3178 } 3179 } 3180 return result; 3181 } 3182 3183 bool ConcurrentMark::has_aborted_regions() { 3184 for (int i = 0; i < (int)_max_task_num; ++i) { 3185 CMTask* the_task = _tasks[i]; 3186 MemRegion mr = the_task->aborted_region(); 3187 if (mr.start() != NULL) { 3188 assert(mr.end() != NULL, "invariant"); 3189 assert(mr.word_size() > 0, "invariant"); 3190 return true; 3191 } 3192 } 3193 return false; 3194 } 3195 3196 void ConcurrentMark::oops_do(OopClosure* cl) { 3197 if (_markStack.size() > 0 && verbose_low()) { 3198 gclog_or_tty->print_cr("[global] scanning the global marking stack, " 3199 "size = %d", _markStack.size()); 3200 } 3201 // we first iterate over the contents of the mark stack... 3202 _markStack.oops_do(cl); 3203 3204 for (int i = 0; i < (int)_max_task_num; ++i) { 3205 OopTaskQueue* queue = _task_queues->queue((int)i); 3206 3207 if (queue->size() > 0 && verbose_low()) { 3208 gclog_or_tty->print_cr("[global] scanning task queue of task %d, " 3209 "size = %d", i, queue->size()); 3210 } 3211 3212 // ...then over the contents of the all the task queues. 3213 queue->oops_do(cl); 3214 } 3215 3216 // Invalidate any entries, that are in the region stack, that 3217 // point into the collection set 3218 if (_regionStack.invalidate_entries_into_cset()) { 3219 // otherwise, any gray objects copied during the evacuation pause 3220 // might not be visited. 3221 assert(_should_gray_objects, "invariant"); 3222 } 3223 3224 // Invalidate any aborted regions, recorded in the individual CM 3225 // tasks, that point into the collection set. 3226 if (invalidate_aborted_regions_in_cset()) { 3227 // otherwise, any gray objects copied during the evacuation pause 3228 // might not be visited. 3229 assert(_should_gray_objects, "invariant"); 3230 } 3231 3232 } 3233 3234 void ConcurrentMark::clear_marking_state(bool clear_overflow) { 3235 _markStack.setEmpty(); 3236 _markStack.clear_overflow(); 3237 _regionStack.setEmpty(); 3238 _regionStack.clear_overflow(); 3239 if (clear_overflow) { 3240 clear_has_overflown(); 3241 } else { 3242 assert(has_overflown(), "pre-condition"); 3243 } 3244 _finger = _heap_start; 3245 3246 for (int i = 0; i < (int)_max_task_num; ++i) { 3247 OopTaskQueue* queue = _task_queues->queue(i); 3248 queue->set_empty(); 3249 // Clear any partial regions from the CMTasks 3250 _tasks[i]->clear_aborted_region(); 3251 } 3252 } 3253 3254 // Aggregate the counting data that was constructed concurrently 3255 // with marking. 3256 class AggregateCountDataHRClosure: public HeapRegionClosure { 3257 ConcurrentMark* _cm; 3258 BitMap* _cm_card_bm; 3259 intptr_t _bottom_card_num; 3260 size_t _max_task_num; 3261 3262 public: 3263 AggregateCountDataHRClosure(ConcurrentMark *cm, 3264 BitMap* cm_card_bm, 3265 intptr_t bottom_card_num, 3266 size_t max_task_num) : 3267 _cm(cm), 3268 _cm_card_bm(cm_card_bm), 3269 _bottom_card_num(bottom_card_num), 3270 _max_task_num(max_task_num) 3271 { } 3272 3273 bool is_card_aligned(HeapWord* p) { 3274 return ((uintptr_t(p) & (CardTableModRefBS::card_size - 1)) == 0); 3275 } 3276 3277 bool doHeapRegion(HeapRegion* hr) { 3278 if (hr->continuesHumongous()) { 3279 // We will ignore these here and process them when their 3280 // associated "starts humongous" region is processed. 3281 // Note that we cannot rely on their associated 3282 // "starts humongous" region to have their bit set to 1 3283 // since, due to the region chunking in the parallel region 3284 // iteration, a "continues humongous" region might be visited 3285 // before its associated "starts humongous". 3286 return false; 3287 } 3288 3289 HeapWord* start = hr->bottom(); 3290 HeapWord* limit = hr->next_top_at_mark_start(); 3291 HeapWord* end = hr->end(); 3292 3293 assert(start <= limit && limit <= hr->top() && 3294 hr->top() <= hr->end(), "Preconditions"); 3295 3296 assert(hr->next_marked_bytes() == 0, "Precondition"); 3297 3298 if (start == limit) { 3299 // NTAMS of this region has not been set so nothing to do. 3300 return false; 3301 } 3302 3303 intptr_t start_card_num = intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); 3304 intptr_t limit_card_num = intptr_t(uintptr_t(limit) >> CardTableModRefBS::card_shift); 3305 intptr_t end_card_num = intptr_t(uintptr_t(end) >> CardTableModRefBS::card_shift); 3306 3307 assert(is_card_aligned(start), "sanity"); 3308 assert(is_card_aligned(end), "sanity"); 3309 3310 // If ntams is not card aligned then we bump the index for 3311 // limit so that we get the card spanning ntams. 3312 if (!is_card_aligned(limit)) { 3313 limit_card_num += 1; 3314 } 3315 3316 assert(limit_card_num <= end_card_num, "or else use atomics"); 3317 3318 BitMap::idx_t start_idx = start_card_num - _bottom_card_num; 3319 BitMap::idx_t limit_idx = limit_card_num - _bottom_card_num; 3320 3321 // Aggregate the "stripe" in the count data associated with hr. 3322 size_t hrs_index = hr->hrs_index(); 3323 size_t marked_bytes = 0; 3324 3325 for (int i = 0; (size_t)i < _max_task_num; i += 1) { 3326 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 3327 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 3328 3329 // Fetch the marked_bytes in this region for task i and 3330 // add it to the running total for this region. 3331 marked_bytes += marked_bytes_array[hrs_index]; 3332 3333 // Now clear the value in the task's marked bytes array 3334 // for this region. 3335 marked_bytes_array[hrs_index] = 0; 3336 3337 // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx) 3338 // into the global card bitmap. 3339 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 3340 3341 while (scan_idx < limit_idx) { 3342 assert(task_card_bm->at(scan_idx) == true, "should be"); 3343 _cm_card_bm->set_bit(scan_idx); 3344 task_card_bm->clear_bit(scan_idx); 3345 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 3346 scan_idx = task_card_bm->get_next_one_offset(start_idx + 1, limit_idx); 3347 } 3348 } 3349 3350 // Update the marked bytes for this region. 3351 hr->add_to_marked_bytes(marked_bytes); 3352 3353 // Now set the top at count to NTAMS. 3354 hr->set_top_at_conc_mark_count(limit); 3355 3356 // Next heap region 3357 return false; 3358 } 3359 }; 3360 3361 class G1AggregateCountDataTask: public AbstractGangTask { 3362 protected: 3363 G1CollectedHeap* _g1h; 3364 ConcurrentMark* _cm; 3365 BitMap* _cm_card_bm; 3366 intptr_t _heap_bottom_card_num; 3367 size_t _max_task_num; 3368 int _active_workers; 3369 3370 public: 3371 G1AggregateCountDataTask(G1CollectedHeap* g1h, 3372 ConcurrentMark* cm, 3373 BitMap* cm_card_bm, 3374 intptr_t bottom_card_num, 3375 size_t max_task_num, 3376 int n_workers) : 3377 AbstractGangTask("Count Aggregation"), 3378 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 3379 _heap_bottom_card_num(bottom_card_num), 3380 _max_task_num(max_task_num), 3381 _active_workers(n_workers) 3382 { } 3383 3384 void work(int worker_i) { 3385 AggregateCountDataHRClosure cl(_cm, _cm_card_bm, 3386 _heap_bottom_card_num, _max_task_num); 3387 3388 if (G1CollectedHeap::use_parallel_gc_threads()) { 3389 _g1h->heap_region_par_iterate_chunked(&cl, worker_i, 3390 _active_workers, 3391 HeapRegion::AggregateCountClaimValue); 3392 } else { 3393 _g1h->heap_region_iterate(&cl); 3394 } 3395 } 3396 }; 3397 3398 3399 void ConcurrentMark::aggregate_and_clear_count_data() { 3400 // Clear the global card bitmap 3401 _card_bm.clear(); 3402 3403 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3404 _g1h->workers()->active_workers() : 3405 1); 3406 3407 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3408 _heap_bottom_card_num, _max_task_num, 3409 n_workers); 3410 3411 if (G1CollectedHeap::use_parallel_gc_threads()) { 3412 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3413 "sanity check"); 3414 _g1h->set_par_threads(n_workers); 3415 _g1h->workers()->run_task(&g1_par_agg_task); 3416 _g1h->set_par_threads(0); 3417 3418 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3419 "sanity check"); 3420 _g1h->reset_heap_region_claim_values(); 3421 } else { 3422 g1_par_agg_task.work(0); 3423 } 3424 } 3425 3426 // Clear the per-worker arrays used to store the per-region counting data 3427 void ConcurrentMark::clear_all_count_data() { 3428 assert(SafepointSynchronize::is_at_safepoint() || 3429 !Universe::is_fully_initialized(), "must be"); 3430 3431 size_t max_regions = _g1h->max_regions(); 3432 3433 assert(_max_task_num != 0, "unitialized"); 3434 assert(_count_card_bitmaps != NULL, "uninitialized"); 3435 assert(_count_marked_bytes != NULL, "uninitialized"); 3436 3437 for (int i = 0; (size_t) i < _max_task_num; i += 1) { 3438 BitMap* task_card_bm = count_card_bitmap_for(i); 3439 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3440 3441 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3442 assert(marked_bytes_array != NULL, "uninitialized"); 3443 3444 for (int j = 0; (size_t) j < max_regions; j++) { 3445 marked_bytes_array[j] = 0; 3446 } 3447 task_card_bm->clear(); 3448 } 3449 } 3450 3451 void ConcurrentMark::clear_count_data_for_heap_region(HeapRegion* hr) { 3452 // Clears the count data for the given region from _all_ of 3453 // the per-task counting data structures. 3454 3455 MemRegion used_region = hr->used_region(); 3456 HeapWord* start = used_region.start(); 3457 HeapWord* last = used_region.last(); 3458 size_t hr_index = hr->hrs_index(); 3459 3460 intptr_t start_card_num = 3461 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); 3462 intptr_t last_card_num = 3463 intptr_t(uintptr_t(last) >> CardTableModRefBS::card_shift); 3464 3465 BitMap::idx_t start_idx = start_card_num - heap_bottom_card_num(); 3466 BitMap::idx_t last_idx = last_card_num - heap_bottom_card_num(); 3467 3468 size_t used_region_bytes = used_region.byte_size(); 3469 size_t marked_bytes = 0; 3470 3471 for (int i=0; (size_t)i < _max_task_num; i += 1) { 3472 BitMap* task_card_bm = count_card_bitmap_for(i); 3473 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3474 3475 marked_bytes += marked_bytes_array[hr_index]; 3476 // clear the amount of marked bytes in the task array for this 3477 // region 3478 marked_bytes_array[hr_index] = 0; 3479 3480 // Clear the inclusive range [start_idx, last_idx] from the 3481 // card bitmap. The clear_range routine is exclusive so we 3482 // need to also explicitly clear the bit at last_idx. 3483 // Passing last_idx+1 to the clear_range would work in 3484 // most cases but could trip an OOB assertion. 3485 3486 if ((last_idx - start_idx) > 0) { 3487 task_card_bm->clear_range(start_idx, last_idx); 3488 } 3489 task_card_bm->clear_bit(last_idx); 3490 } 3491 } 3492 3493 void ConcurrentMark::print_stats() { 3494 if (verbose_stats()) { 3495 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3496 for (size_t i = 0; i < _active_tasks; ++i) { 3497 _tasks[i]->print_stats(); 3498 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3499 } 3500 } 3501 } 3502 3503 // Closures used by ConcurrentMark::complete_marking_in_collection_set(). 3504 3505 class CSetMarkOopClosure: public OopClosure { 3506 friend class CSetMarkBitMapClosure; 3507 3508 G1CollectedHeap* _g1h; 3509 ConcurrentMark* _cm; 3510 oop* _ms; 3511 jint* _array_ind_stack; 3512 int _ms_size; 3513 int _ms_ind; 3514 int _array_increment; 3515 int _worker_i; 3516 3517 bool push(oop obj, int arr_ind = 0) { 3518 if (_ms_ind == _ms_size) { 3519 gclog_or_tty->print_cr("Mark stack is full."); 3520 return false; 3521 } 3522 _ms[_ms_ind] = obj; 3523 if (obj->is_objArray()) { 3524 _array_ind_stack[_ms_ind] = arr_ind; 3525 } 3526 _ms_ind++; 3527 return true; 3528 } 3529 3530 oop pop() { 3531 if (_ms_ind == 0) { 3532 return NULL; 3533 } else { 3534 _ms_ind--; 3535 return _ms[_ms_ind]; 3536 } 3537 } 3538 3539 template <class T> bool drain() { 3540 while (_ms_ind > 0) { 3541 oop obj = pop(); 3542 assert(obj != NULL, "Since index was non-zero."); 3543 if (obj->is_objArray()) { 3544 jint arr_ind = _array_ind_stack[_ms_ind]; 3545 objArrayOop aobj = objArrayOop(obj); 3546 jint len = aobj->length(); 3547 jint next_arr_ind = arr_ind + _array_increment; 3548 if (next_arr_ind < len) { 3549 push(obj, next_arr_ind); 3550 } 3551 // Now process this portion of this one. 3552 int lim = MIN2(next_arr_ind, len); 3553 for (int j = arr_ind; j < lim; j++) { 3554 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j)); 3555 } 3556 } else { 3557 obj->oop_iterate(this); 3558 } 3559 if (abort()) return false; 3560 } 3561 return true; 3562 } 3563 3564 public: 3565 CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, int worker_i) : 3566 _g1h(G1CollectedHeap::heap()), 3567 _cm(cm), 3568 _ms_size(ms_size), _ms_ind(0), 3569 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)), 3570 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)), 3571 _array_increment(MAX2(ms_size/8, 16)), 3572 _worker_i(worker_i) { } 3573 3574 ~CSetMarkOopClosure() { 3575 FREE_C_HEAP_ARRAY(oop, _ms); 3576 FREE_C_HEAP_ARRAY(jint, _array_ind_stack); 3577 } 3578 3579 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 3580 virtual void do_oop( oop* p) { do_oop_work(p); } 3581 3582 template <class T> void do_oop_work(T* p) { 3583 T heap_oop = oopDesc::load_heap_oop(p); 3584 if (oopDesc::is_null(heap_oop)) return; 3585 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); 3586 if (obj->is_forwarded()) { 3587 // If the object has already been forwarded, we have to make sure 3588 // that it's marked. So follow the forwarding pointer. Note that 3589 // this does the right thing for self-forwarding pointers in the 3590 // evacuation failure case. 3591 obj = obj->forwardee(); 3592 } 3593 HeapRegion* hr = _g1h->heap_region_containing(obj); 3594 if (hr != NULL) { 3595 if (hr->in_collection_set()) { 3596 if (_g1h->is_obj_ill(obj)) { 3597 if (_cm->par_mark_and_count(obj, hr, _worker_i)) { 3598 if (!push(obj)) { 3599 gclog_or_tty->print_cr("Setting abort in CSetMarkOopClosure because push failed."); 3600 set_abort(); 3601 } 3602 } 3603 } 3604 } else { 3605 // Outside the collection set; we need to gray it 3606 _cm->deal_with_reference(obj, _worker_i); 3607 } 3608 } 3609 } 3610 }; 3611 3612 class CSetMarkBitMapClosure: public BitMapClosure { 3613 G1CollectedHeap* _g1h; 3614 CMBitMap* _bitMap; 3615 ConcurrentMark* _cm; 3616 CSetMarkOopClosure _oop_cl; 3617 int _worker_i; 3618 3619 public: 3620 CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_i) : 3621 _g1h(G1CollectedHeap::heap()), 3622 _bitMap(cm->nextMarkBitMap()), 3623 _oop_cl(cm, ms_size, worker_i), 3624 _worker_i(worker_i) { } 3625 3626 bool do_bit(size_t offset) { 3627 // convert offset into a HeapWord* 3628 HeapWord* addr = _bitMap->offsetToHeapWord(offset); 3629 assert(_bitMap->endWord() && addr < _bitMap->endWord(), 3630 "address out of range"); 3631 assert(_bitMap->isMarked(addr), "tautology"); 3632 oop obj = oop(addr); 3633 if (!obj->is_forwarded()) { 3634 if (!_oop_cl.push(obj)) return false; 3635 if (UseCompressedOops) { 3636 if (!_oop_cl.drain<narrowOop>()) return false; 3637 } else { 3638 if (!_oop_cl.drain<oop>()) return false; 3639 } 3640 } 3641 // Otherwise... 3642 return true; 3643 } 3644 }; 3645 3646 class CompleteMarkingInCSetHRClosure: public HeapRegionClosure { 3647 CMBitMap* _bm; 3648 CSetMarkBitMapClosure _bit_cl; 3649 int _worker_i; 3650 3651 enum SomePrivateConstants { 3652 MSSize = 1000 3653 }; 3654 3655 public: 3656 CompleteMarkingInCSetHRClosure(ConcurrentMark* cm, int worker_i) : 3657 _bm(cm->nextMarkBitMap()), 3658 _bit_cl(cm, MSSize, worker_i), 3659 _worker_i(worker_i) { } 3660 3661 bool doHeapRegion(HeapRegion* hr) { 3662 if (hr->claimHeapRegion(HeapRegion::CompleteMarkCSetClaimValue)) { 3663 // The current worker has successfully claimed the region. 3664 if (!hr->evacuation_failed()) { 3665 MemRegion mr = MemRegion(hr->bottom(), hr->next_top_at_mark_start()); 3666 if (!mr.is_empty()) { 3667 bool done = false; 3668 while (!done) { 3669 done = _bm->iterate(&_bit_cl, mr); 3670 } 3671 } 3672 } 3673 } 3674 return false; 3675 } 3676 }; 3677 3678 class SetClaimValuesInCSetHRClosure: public HeapRegionClosure { 3679 jint _claim_value; 3680 3681 public: 3682 SetClaimValuesInCSetHRClosure(jint claim_value) : 3683 _claim_value(claim_value) { } 3684 3685 bool doHeapRegion(HeapRegion* hr) { 3686 hr->set_claim_value(_claim_value); 3687 return false; 3688 } 3689 }; 3690 3691 class G1ParCompleteMarkInCSetTask: public AbstractGangTask { 3692 protected: 3693 G1CollectedHeap* _g1h; 3694 ConcurrentMark* _cm; 3695 3696 public: 3697 G1ParCompleteMarkInCSetTask(G1CollectedHeap* g1h, 3698 ConcurrentMark* cm) : 3699 AbstractGangTask("Complete Mark in CSet"), 3700 _g1h(g1h), _cm(cm) { } 3701 3702 void work(int worker_i) { 3703 CompleteMarkingInCSetHRClosure cmplt(_cm, worker_i); 3704 HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_i); 3705 _g1h->collection_set_iterate_from(hr, &cmplt); 3706 } 3707 }; 3708 3709 void ConcurrentMark::complete_marking_in_collection_set() { 3710 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 3711 3712 if (!g1h->mark_in_progress()) { 3713 g1h->g1_policy()->record_mark_closure_time(0.0); 3714 return; 3715 } 3716 3717 double start = os::elapsedTime(); 3718 G1ParCompleteMarkInCSetTask complete_mark_task(g1h, this); 3719 3720 assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity"); 3721 3722 if (G1CollectedHeap::use_parallel_gc_threads()) { 3723 int n_workers = g1h->workers()->active_workers(); 3724 g1h->set_par_threads(n_workers); 3725 g1h->workers()->run_task(&complete_mark_task); 3726 g1h->set_par_threads(0); 3727 } else { 3728 complete_mark_task.work(0); 3729 } 3730 3731 assert(g1h->check_cset_heap_region_claim_values(HeapRegion::CompleteMarkCSetClaimValue), "sanity"); 3732 3733 // Now reset the claim values in the regions in the collection set. 3734 SetClaimValuesInCSetHRClosure set_cv_cl(HeapRegion::InitialClaimValue); 3735 g1h->collection_set_iterate(&set_cv_cl); 3736 3737 assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity"); 3738 3739 double end_time = os::elapsedTime(); 3740 double elapsed_time_ms = (end_time - start) * 1000.0; 3741 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms); 3742 } 3743 3744 // The next two methods deal with the following optimisation. Some 3745 // objects are gray by being marked and located above the finger. If 3746 // they are copied, during an evacuation pause, below the finger then 3747 // the need to be pushed on the stack. The observation is that, if 3748 // there are no regions in the collection set located above the 3749 // finger, then the above cannot happen, hence we do not need to 3750 // explicitly gray any objects when copying them to below the 3751 // finger. The global stack will be scanned to ensure that, if it 3752 // points to objects being copied, it will update their 3753 // location. There is a tricky situation with the gray objects in 3754 // region stack that are being coped, however. See the comment in 3755 // newCSet(). 3756 3757 void ConcurrentMark::newCSet() { 3758 if (!concurrent_marking_in_progress()) { 3759 // nothing to do if marking is not in progress 3760 return; 3761 } 3762 3763 // find what the lowest finger is among the global and local fingers 3764 _min_finger = _finger; 3765 for (int i = 0; i < (int)_max_task_num; ++i) { 3766 CMTask* task = _tasks[i]; 3767 HeapWord* task_finger = task->finger(); 3768 if (task_finger != NULL && task_finger < _min_finger) { 3769 _min_finger = task_finger; 3770 } 3771 } 3772 3773 _should_gray_objects = false; 3774 3775 // This fixes a very subtle and fustrating bug. It might be the case 3776 // that, during en evacuation pause, heap regions that contain 3777 // objects that are gray (by being in regions contained in the 3778 // region stack) are included in the collection set. Since such gray 3779 // objects will be moved, and because it's not easy to redirect 3780 // region stack entries to point to a new location (because objects 3781 // in one region might be scattered to multiple regions after they 3782 // are copied), one option is to ensure that all marked objects 3783 // copied during a pause are pushed on the stack. Notice, however, 3784 // that this problem can only happen when the region stack is not 3785 // empty during an evacuation pause. So, we make the fix a bit less 3786 // conservative and ensure that regions are pushed on the stack, 3787 // irrespective whether all collection set regions are below the 3788 // finger, if the region stack is not empty. This is expected to be 3789 // a rare case, so I don't think it's necessary to be smarted about it. 3790 if (!region_stack_empty() || has_aborted_regions()) { 3791 _should_gray_objects = true; 3792 } 3793 } 3794 3795 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) { 3796 if (!concurrent_marking_in_progress()) return; 3797 3798 HeapWord* region_end = hr->end(); 3799 if (region_end > _min_finger) { 3800 _should_gray_objects = true; 3801 } 3802 } 3803 3804 // Resets the region fields of active CMTasks whose values point 3805 // into the collection set. 3806 void ConcurrentMark::reset_active_task_region_fields_in_cset() { 3807 assert(SafepointSynchronize::is_at_safepoint(), "should be in STW"); 3808 assert(parallel_marking_threads() <= _max_task_num, "sanity"); 3809 3810 for (int i = 0; i < (int)parallel_marking_threads(); i += 1) { 3811 CMTask* task = _tasks[i]; 3812 HeapWord* task_finger = task->finger(); 3813 if (task_finger != NULL) { 3814 assert(_g1h->is_in_g1_reserved(task_finger), "not in heap"); 3815 HeapRegion* finger_region = _g1h->heap_region_containing(task_finger); 3816 if (finger_region->in_collection_set()) { 3817 // The task's current region is in the collection set. 3818 // This region will be evacuated in the current GC and 3819 // the region fields in the task will be stale. 3820 task->giveup_current_region(); 3821 } 3822 } 3823 } 3824 } 3825 3826 // abandon current marking iteration due to a Full GC 3827 void ConcurrentMark::abort() { 3828 // Clear all marks to force marking thread to do nothing 3829 _nextMarkBitMap->clearAll(); 3830 // Empty mark stack 3831 clear_marking_state(); 3832 for (int i = 0; i < (int)_max_task_num; ++i) { 3833 _tasks[i]->clear_region_fields(); 3834 } 3835 _has_aborted = true; 3836 3837 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3838 satb_mq_set.abandon_partial_marking(); 3839 // This can be called either during or outside marking, we'll read 3840 // the expected_active value from the SATB queue set. 3841 satb_mq_set.set_active_all_threads( 3842 false, /* new active value */ 3843 satb_mq_set.is_active() /* expected_active */); 3844 } 3845 3846 static void print_ms_time_info(const char* prefix, const char* name, 3847 NumberSeq& ns) { 3848 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3849 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3850 if (ns.num() > 0) { 3851 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3852 prefix, ns.sd(), ns.maximum()); 3853 } 3854 } 3855 3856 void ConcurrentMark::print_summary_info() { 3857 gclog_or_tty->print_cr(" Concurrent marking:"); 3858 print_ms_time_info(" ", "init marks", _init_times); 3859 print_ms_time_info(" ", "remarks", _remark_times); 3860 { 3861 print_ms_time_info(" ", "final marks", _remark_mark_times); 3862 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3863 3864 } 3865 print_ms_time_info(" ", "cleanups", _cleanup_times); 3866 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3867 _total_counting_time, 3868 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3869 (double)_cleanup_times.num() 3870 : 0.0)); 3871 if (G1ScrubRemSets) { 3872 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3873 _total_rs_scrub_time, 3874 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3875 (double)_cleanup_times.num() 3876 : 0.0)); 3877 } 3878 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3879 (_init_times.sum() + _remark_times.sum() + 3880 _cleanup_times.sum())/1000.0); 3881 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3882 "(%8.2f s marking).", 3883 cmThread()->vtime_accum(), 3884 cmThread()->vtime_mark_accum()); 3885 } 3886 3887 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3888 _parallel_workers->print_worker_threads_on(st); 3889 } 3890 3891 // Closures 3892 // XXX: there seems to be a lot of code duplication here; 3893 // should refactor and consolidate the shared code. 3894 3895 // This closure is used to mark refs into the CMS generation in 3896 // the CMS bit map. Called at the first checkpoint. 3897 3898 // We take a break if someone is trying to stop the world. 3899 bool ConcurrentMark::do_yield_check(int worker_i) { 3900 if (should_yield()) { 3901 if (worker_i == 0) { 3902 _g1h->g1_policy()->record_concurrent_pause(); 3903 } 3904 cmThread()->yield(); 3905 if (worker_i == 0) { 3906 _g1h->g1_policy()->record_concurrent_pause_end(); 3907 } 3908 return true; 3909 } else { 3910 return false; 3911 } 3912 } 3913 3914 bool ConcurrentMark::should_yield() { 3915 return cmThread()->should_yield(); 3916 } 3917 3918 bool ConcurrentMark::containing_card_is_marked(void* p) { 3919 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3920 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3921 } 3922 3923 bool ConcurrentMark::containing_cards_are_marked(void* start, 3924 void* last) { 3925 return containing_card_is_marked(start) && 3926 containing_card_is_marked(last); 3927 } 3928 3929 #ifndef PRODUCT 3930 // for debugging purposes 3931 void ConcurrentMark::print_finger() { 3932 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3933 _heap_start, _heap_end, _finger); 3934 for (int i = 0; i < (int) _max_task_num; ++i) { 3935 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 3936 } 3937 gclog_or_tty->print_cr(""); 3938 } 3939 #endif 3940 3941 void CMTask::scan_object(oop obj) { 3942 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3943 3944 if (_cm->verbose_high()) { 3945 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT, 3946 _task_id, (void*) obj); 3947 } 3948 3949 size_t obj_size = obj->size(); 3950 _words_scanned += obj_size; 3951 3952 obj->oop_iterate(_cm_oop_closure); 3953 statsOnly( ++_objs_scanned ); 3954 check_limits(); 3955 } 3956 3957 // Closure for iteration over bitmaps 3958 class CMBitMapClosure : public BitMapClosure { 3959 private: 3960 // the bitmap that is being iterated over 3961 CMBitMap* _nextMarkBitMap; 3962 ConcurrentMark* _cm; 3963 CMTask* _task; 3964 // true if we're scanning a heap region claimed by the task (so that 3965 // we move the finger along), false if we're not, i.e. currently when 3966 // scanning a heap region popped from the region stack (so that we 3967 // do not move the task finger along; it'd be a mistake if we did so). 3968 bool _scanning_heap_region; 3969 3970 public: 3971 CMBitMapClosure(CMTask *task, 3972 ConcurrentMark* cm, 3973 CMBitMap* nextMarkBitMap) 3974 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3975 3976 void set_scanning_heap_region(bool scanning_heap_region) { 3977 _scanning_heap_region = scanning_heap_region; 3978 } 3979 3980 bool do_bit(size_t offset) { 3981 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3982 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3983 assert( addr < _cm->finger(), "invariant"); 3984 3985 if (_scanning_heap_region) { 3986 statsOnly( _task->increase_objs_found_on_bitmap() ); 3987 assert(addr >= _task->finger(), "invariant"); 3988 // We move that task's local finger along. 3989 _task->move_finger_to(addr); 3990 } else { 3991 // We move the task's region finger along. 3992 _task->move_region_finger_to(addr); 3993 } 3994 3995 _task->scan_object(oop(addr)); 3996 // we only partially drain the local queue and global stack 3997 _task->drain_local_queue(true); 3998 _task->drain_global_stack(true); 3999 4000 // if the has_aborted flag has been raised, we need to bail out of 4001 // the iteration 4002 return !_task->has_aborted(); 4003 } 4004 }; 4005 4006 // Closure for iterating over objects, currently only used for 4007 // processing SATB buffers. 4008 class CMObjectClosure : public ObjectClosure { 4009 private: 4010 CMTask* _task; 4011 4012 public: 4013 void do_object(oop obj) { 4014 _task->deal_with_reference(obj); 4015 } 4016 4017 CMObjectClosure(CMTask* task) : _task(task) { } 4018 }; 4019 4020 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 4021 ConcurrentMark* cm, 4022 CMTask* task) 4023 : _g1h(g1h), _cm(cm), _task(task) { 4024 assert(_ref_processor == NULL, "should be initialized to NULL"); 4025 4026 if (G1UseConcMarkReferenceProcessing) { 4027 _ref_processor = g1h->ref_processor_cm(); 4028 assert(_ref_processor != NULL, "should not be NULL"); 4029 } 4030 } 4031 4032 void CMTask::setup_for_region(HeapRegion* hr) { 4033 // Separated the asserts so that we know which one fires. 4034 assert(hr != NULL, 4035 "claim_region() should have filtered out continues humongous regions"); 4036 assert(!hr->continuesHumongous(), 4037 "claim_region() should have filtered out continues humongous regions"); 4038 4039 if (_cm->verbose_low()) { 4040 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 4041 _task_id, hr); 4042 } 4043 4044 _curr_region = hr; 4045 _finger = hr->bottom(); 4046 update_region_limit(); 4047 } 4048 4049 void CMTask::update_region_limit() { 4050 HeapRegion* hr = _curr_region; 4051 HeapWord* bottom = hr->bottom(); 4052 HeapWord* limit = hr->next_top_at_mark_start(); 4053 4054 if (limit == bottom) { 4055 if (_cm->verbose_low()) { 4056 gclog_or_tty->print_cr("[%d] found an empty region " 4057 "["PTR_FORMAT", "PTR_FORMAT")", 4058 _task_id, bottom, limit); 4059 } 4060 // The region was collected underneath our feet. 4061 // We set the finger to bottom to ensure that the bitmap 4062 // iteration that will follow this will not do anything. 4063 // (this is not a condition that holds when we set the region up, 4064 // as the region is not supposed to be empty in the first place) 4065 _finger = bottom; 4066 } else if (limit >= _region_limit) { 4067 assert(limit >= _finger, "peace of mind"); 4068 } else { 4069 assert(limit < _region_limit, "only way to get here"); 4070 // This can happen under some pretty unusual circumstances. An 4071 // evacuation pause empties the region underneath our feet (NTAMS 4072 // at bottom). We then do some allocation in the region (NTAMS 4073 // stays at bottom), followed by the region being used as a GC 4074 // alloc region (NTAMS will move to top() and the objects 4075 // originally below it will be grayed). All objects now marked in 4076 // the region are explicitly grayed, if below the global finger, 4077 // and we do not need in fact to scan anything else. So, we simply 4078 // set _finger to be limit to ensure that the bitmap iteration 4079 // doesn't do anything. 4080 _finger = limit; 4081 } 4082 4083 _region_limit = limit; 4084 } 4085 4086 void CMTask::giveup_current_region() { 4087 assert(_curr_region != NULL, "invariant"); 4088 if (_cm->verbose_low()) { 4089 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 4090 _task_id, _curr_region); 4091 } 4092 clear_region_fields(); 4093 } 4094 4095 void CMTask::clear_region_fields() { 4096 // Values for these three fields that indicate that we're not 4097 // holding on to a region. 4098 _curr_region = NULL; 4099 _finger = NULL; 4100 _region_limit = NULL; 4101 4102 _region_finger = NULL; 4103 } 4104 4105 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 4106 if (cm_oop_closure == NULL) { 4107 assert(_cm_oop_closure != NULL, "invariant"); 4108 } else { 4109 assert(_cm_oop_closure == NULL, "invariant"); 4110 } 4111 _cm_oop_closure = cm_oop_closure; 4112 } 4113 4114 void CMTask::reset(CMBitMap* nextMarkBitMap) { 4115 guarantee(nextMarkBitMap != NULL, "invariant"); 4116 4117 if (_cm->verbose_low()) { 4118 gclog_or_tty->print_cr("[%d] resetting", _task_id); 4119 } 4120 4121 _nextMarkBitMap = nextMarkBitMap; 4122 clear_region_fields(); 4123 assert(_aborted_region.is_empty(), "should have been cleared"); 4124 4125 _calls = 0; 4126 _elapsed_time_ms = 0.0; 4127 _termination_time_ms = 0.0; 4128 _termination_start_time_ms = 0.0; 4129 4130 #if _MARKING_STATS_ 4131 _local_pushes = 0; 4132 _local_pops = 0; 4133 _local_max_size = 0; 4134 _objs_scanned = 0; 4135 _global_pushes = 0; 4136 _global_pops = 0; 4137 _global_max_size = 0; 4138 _global_transfers_to = 0; 4139 _global_transfers_from = 0; 4140 _region_stack_pops = 0; 4141 _regions_claimed = 0; 4142 _objs_found_on_bitmap = 0; 4143 _satb_buffers_processed = 0; 4144 _steal_attempts = 0; 4145 _steals = 0; 4146 _aborted = 0; 4147 _aborted_overflow = 0; 4148 _aborted_cm_aborted = 0; 4149 _aborted_yield = 0; 4150 _aborted_timed_out = 0; 4151 _aborted_satb = 0; 4152 _aborted_termination = 0; 4153 #endif // _MARKING_STATS_ 4154 } 4155 4156 bool CMTask::should_exit_termination() { 4157 regular_clock_call(); 4158 // This is called when we are in the termination protocol. We should 4159 // quit if, for some reason, this task wants to abort or the global 4160 // stack is not empty (this means that we can get work from it). 4161 return !_cm->mark_stack_empty() || has_aborted(); 4162 } 4163 4164 void CMTask::reached_limit() { 4165 assert(_words_scanned >= _words_scanned_limit || 4166 _refs_reached >= _refs_reached_limit , 4167 "shouldn't have been called otherwise"); 4168 regular_clock_call(); 4169 } 4170 4171 void CMTask::regular_clock_call() { 4172 if (has_aborted()) return; 4173 4174 // First, we need to recalculate the words scanned and refs reached 4175 // limits for the next clock call. 4176 recalculate_limits(); 4177 4178 // During the regular clock call we do the following 4179 4180 // (1) If an overflow has been flagged, then we abort. 4181 if (_cm->has_overflown()) { 4182 set_has_aborted(); 4183 return; 4184 } 4185 4186 // If we are not concurrent (i.e. we're doing remark) we don't need 4187 // to check anything else. The other steps are only needed during 4188 // the concurrent marking phase. 4189 if (!concurrent()) return; 4190 4191 // (2) If marking has been aborted for Full GC, then we also abort. 4192 if (_cm->has_aborted()) { 4193 set_has_aborted(); 4194 statsOnly( ++_aborted_cm_aborted ); 4195 return; 4196 } 4197 4198 double curr_time_ms = os::elapsedVTime() * 1000.0; 4199 4200 // (3) If marking stats are enabled, then we update the step history. 4201 #if _MARKING_STATS_ 4202 if (_words_scanned >= _words_scanned_limit) { 4203 ++_clock_due_to_scanning; 4204 } 4205 if (_refs_reached >= _refs_reached_limit) { 4206 ++_clock_due_to_marking; 4207 } 4208 4209 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 4210 _interval_start_time_ms = curr_time_ms; 4211 _all_clock_intervals_ms.add(last_interval_ms); 4212 4213 if (_cm->verbose_medium()) { 4214 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 4215 "scanned = %d%s, refs reached = %d%s", 4216 _task_id, last_interval_ms, 4217 _words_scanned, 4218 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 4219 _refs_reached, 4220 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 4221 } 4222 #endif // _MARKING_STATS_ 4223 4224 // (4) We check whether we should yield. If we have to, then we abort. 4225 if (_cm->should_yield()) { 4226 // We should yield. To do this we abort the task. The caller is 4227 // responsible for yielding. 4228 set_has_aborted(); 4229 statsOnly( ++_aborted_yield ); 4230 return; 4231 } 4232 4233 // (5) We check whether we've reached our time quota. If we have, 4234 // then we abort. 4235 double elapsed_time_ms = curr_time_ms - _start_time_ms; 4236 if (elapsed_time_ms > _time_target_ms) { 4237 set_has_aborted(); 4238 _has_timed_out = true; 4239 statsOnly( ++_aborted_timed_out ); 4240 return; 4241 } 4242 4243 // (6) Finally, we check whether there are enough completed STAB 4244 // buffers available for processing. If there are, we abort. 4245 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 4246 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 4247 if (_cm->verbose_low()) { 4248 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 4249 _task_id); 4250 } 4251 // we do need to process SATB buffers, we'll abort and restart 4252 // the marking task to do so 4253 set_has_aborted(); 4254 statsOnly( ++_aborted_satb ); 4255 return; 4256 } 4257 } 4258 4259 void CMTask::recalculate_limits() { 4260 _real_words_scanned_limit = _words_scanned + words_scanned_period; 4261 _words_scanned_limit = _real_words_scanned_limit; 4262 4263 _real_refs_reached_limit = _refs_reached + refs_reached_period; 4264 _refs_reached_limit = _real_refs_reached_limit; 4265 } 4266 4267 void CMTask::decrease_limits() { 4268 // This is called when we believe that we're going to do an infrequent 4269 // operation which will increase the per byte scanned cost (i.e. move 4270 // entries to/from the global stack). It basically tries to decrease the 4271 // scanning limit so that the clock is called earlier. 4272 4273 if (_cm->verbose_medium()) { 4274 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 4275 } 4276 4277 _words_scanned_limit = _real_words_scanned_limit - 4278 3 * words_scanned_period / 4; 4279 _refs_reached_limit = _real_refs_reached_limit - 4280 3 * refs_reached_period / 4; 4281 } 4282 4283 void CMTask::move_entries_to_global_stack() { 4284 // local array where we'll store the entries that will be popped 4285 // from the local queue 4286 oop buffer[global_stack_transfer_size]; 4287 4288 int n = 0; 4289 oop obj; 4290 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 4291 buffer[n] = obj; 4292 ++n; 4293 } 4294 4295 if (n > 0) { 4296 // we popped at least one entry from the local queue 4297 4298 statsOnly( ++_global_transfers_to; _local_pops += n ); 4299 4300 if (!_cm->mark_stack_push(buffer, n)) { 4301 if (_cm->verbose_low()) { 4302 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", 4303 _task_id); 4304 } 4305 set_has_aborted(); 4306 } else { 4307 // the transfer was successful 4308 4309 if (_cm->verbose_medium()) { 4310 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 4311 _task_id, n); 4312 } 4313 statsOnly( int tmp_size = _cm->mark_stack_size(); 4314 if (tmp_size > _global_max_size) { 4315 _global_max_size = tmp_size; 4316 } 4317 _global_pushes += n ); 4318 } 4319 } 4320 4321 // this operation was quite expensive, so decrease the limits 4322 decrease_limits(); 4323 } 4324 4325 void CMTask::get_entries_from_global_stack() { 4326 // local array where we'll store the entries that will be popped 4327 // from the global stack. 4328 oop buffer[global_stack_transfer_size]; 4329 int n; 4330 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 4331 assert(n <= global_stack_transfer_size, 4332 "we should not pop more than the given limit"); 4333 if (n > 0) { 4334 // yes, we did actually pop at least one entry 4335 4336 statsOnly( ++_global_transfers_from; _global_pops += n ); 4337 if (_cm->verbose_medium()) { 4338 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 4339 _task_id, n); 4340 } 4341 for (int i = 0; i < n; ++i) { 4342 bool success = _task_queue->push(buffer[i]); 4343 // We only call this when the local queue is empty or under a 4344 // given target limit. So, we do not expect this push to fail. 4345 assert(success, "invariant"); 4346 } 4347 4348 statsOnly( int tmp_size = _task_queue->size(); 4349 if (tmp_size > _local_max_size) { 4350 _local_max_size = tmp_size; 4351 } 4352 _local_pushes += n ); 4353 } 4354 4355 // this operation was quite expensive, so decrease the limits 4356 decrease_limits(); 4357 } 4358 4359 void CMTask::drain_local_queue(bool partially) { 4360 if (has_aborted()) return; 4361 4362 // Decide what the target size is, depending whether we're going to 4363 // drain it partially (so that other tasks can steal if they run out 4364 // of things to do) or totally (at the very end). 4365 size_t target_size; 4366 if (partially) { 4367 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 4368 } else { 4369 target_size = 0; 4370 } 4371 4372 if (_task_queue->size() > target_size) { 4373 if (_cm->verbose_high()) { 4374 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 4375 _task_id, target_size); 4376 } 4377 4378 oop obj; 4379 bool ret = _task_queue->pop_local(obj); 4380 while (ret) { 4381 statsOnly( ++_local_pops ); 4382 4383 if (_cm->verbose_high()) { 4384 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 4385 (void*) obj); 4386 } 4387 4388 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 4389 assert(!_g1h->is_on_master_free_list( 4390 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 4391 4392 scan_object(obj); 4393 4394 if (_task_queue->size() <= target_size || has_aborted()) { 4395 ret = false; 4396 } else { 4397 ret = _task_queue->pop_local(obj); 4398 } 4399 } 4400 4401 if (_cm->verbose_high()) { 4402 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 4403 _task_id, _task_queue->size()); 4404 } 4405 } 4406 } 4407 4408 void CMTask::drain_global_stack(bool partially) { 4409 if (has_aborted()) return; 4410 4411 // We have a policy to drain the local queue before we attempt to 4412 // drain the global stack. 4413 assert(partially || _task_queue->size() == 0, "invariant"); 4414 4415 // Decide what the target size is, depending whether we're going to 4416 // drain it partially (so that other tasks can steal if they run out 4417 // of things to do) or totally (at the very end). Notice that, 4418 // because we move entries from the global stack in chunks or 4419 // because another task might be doing the same, we might in fact 4420 // drop below the target. But, this is not a problem. 4421 size_t target_size; 4422 if (partially) { 4423 target_size = _cm->partial_mark_stack_size_target(); 4424 } else { 4425 target_size = 0; 4426 } 4427 4428 if (_cm->mark_stack_size() > target_size) { 4429 if (_cm->verbose_low()) { 4430 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 4431 _task_id, target_size); 4432 } 4433 4434 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 4435 get_entries_from_global_stack(); 4436 drain_local_queue(partially); 4437 } 4438 4439 if (_cm->verbose_low()) { 4440 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 4441 _task_id, _cm->mark_stack_size()); 4442 } 4443 } 4444 } 4445 4446 // SATB Queue has several assumptions on whether to call the par or 4447 // non-par versions of the methods. this is why some of the code is 4448 // replicated. We should really get rid of the single-threaded version 4449 // of the code to simplify things. 4450 void CMTask::drain_satb_buffers() { 4451 if (has_aborted()) return; 4452 4453 // We set this so that the regular clock knows that we're in the 4454 // middle of draining buffers and doesn't set the abort flag when it 4455 // notices that SATB buffers are available for draining. It'd be 4456 // very counter productive if it did that. :-) 4457 _draining_satb_buffers = true; 4458 4459 CMObjectClosure oc(this); 4460 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 4461 if (G1CollectedHeap::use_parallel_gc_threads()) { 4462 satb_mq_set.set_par_closure(_task_id, &oc); 4463 } else { 4464 satb_mq_set.set_closure(&oc); 4465 } 4466 4467 // This keeps claiming and applying the closure to completed buffers 4468 // until we run out of buffers or we need to abort. 4469 if (G1CollectedHeap::use_parallel_gc_threads()) { 4470 while (!has_aborted() && 4471 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 4472 if (_cm->verbose_medium()) { 4473 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 4474 } 4475 statsOnly( ++_satb_buffers_processed ); 4476 regular_clock_call(); 4477 } 4478 } else { 4479 while (!has_aborted() && 4480 satb_mq_set.apply_closure_to_completed_buffer()) { 4481 if (_cm->verbose_medium()) { 4482 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 4483 } 4484 statsOnly( ++_satb_buffers_processed ); 4485 regular_clock_call(); 4486 } 4487 } 4488 4489 if (!concurrent() && !has_aborted()) { 4490 // We should only do this during remark. 4491 if (G1CollectedHeap::use_parallel_gc_threads()) { 4492 satb_mq_set.par_iterate_closure_all_threads(_task_id); 4493 } else { 4494 satb_mq_set.iterate_closure_all_threads(); 4495 } 4496 } 4497 4498 _draining_satb_buffers = false; 4499 4500 assert(has_aborted() || 4501 concurrent() || 4502 satb_mq_set.completed_buffers_num() == 0, "invariant"); 4503 4504 if (G1CollectedHeap::use_parallel_gc_threads()) { 4505 satb_mq_set.set_par_closure(_task_id, NULL); 4506 } else { 4507 satb_mq_set.set_closure(NULL); 4508 } 4509 4510 // again, this was a potentially expensive operation, decrease the 4511 // limits to get the regular clock call early 4512 decrease_limits(); 4513 } 4514 4515 void CMTask::drain_region_stack(BitMapClosure* bc) { 4516 if (has_aborted()) return; 4517 4518 assert(_region_finger == NULL, 4519 "it should be NULL when we're not scanning a region"); 4520 4521 if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) { 4522 if (_cm->verbose_low()) { 4523 gclog_or_tty->print_cr("[%d] draining region stack, size = %d", 4524 _task_id, _cm->region_stack_size()); 4525 } 4526 4527 MemRegion mr; 4528 4529 if (!_aborted_region.is_empty()) { 4530 mr = _aborted_region; 4531 _aborted_region = MemRegion(); 4532 4533 if (_cm->verbose_low()) { 4534 gclog_or_tty->print_cr("[%d] scanning aborted region " 4535 "[ " PTR_FORMAT ", " PTR_FORMAT " )", 4536 _task_id, mr.start(), mr.end()); 4537 } 4538 } else { 4539 mr = _cm->region_stack_pop_lock_free(); 4540 // it returns MemRegion() if the pop fails 4541 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 4542 } 4543 4544 while (mr.start() != NULL) { 4545 if (_cm->verbose_medium()) { 4546 gclog_or_tty->print_cr("[%d] we are scanning region " 4547 "["PTR_FORMAT", "PTR_FORMAT")", 4548 _task_id, mr.start(), mr.end()); 4549 } 4550 4551 assert(mr.end() <= _cm->finger(), 4552 "otherwise the region shouldn't be on the stack"); 4553 assert(!mr.is_empty(), "Only non-empty regions live on the region stack"); 4554 if (_nextMarkBitMap->iterate(bc, mr)) { 4555 assert(!has_aborted(), 4556 "cannot abort the task without aborting the bitmap iteration"); 4557 4558 // We finished iterating over the region without aborting. 4559 regular_clock_call(); 4560 if (has_aborted()) { 4561 mr = MemRegion(); 4562 } else { 4563 mr = _cm->region_stack_pop_lock_free(); 4564 // it returns MemRegion() if the pop fails 4565 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 4566 } 4567 } else { 4568 assert(has_aborted(), "currently the only way to do so"); 4569 4570 // The only way to abort the bitmap iteration is to return 4571 // false from the do_bit() method. However, inside the 4572 // do_bit() method we move the _region_finger to point to the 4573 // object currently being looked at. So, if we bail out, we 4574 // have definitely set _region_finger to something non-null. 4575 assert(_region_finger != NULL, "invariant"); 4576 4577 // Make sure that any previously aborted region has been 4578 // cleared. 4579 assert(_aborted_region.is_empty(), "aborted region not cleared"); 4580 4581 // The iteration was actually aborted. So now _region_finger 4582 // points to the address of the object we last scanned. If we 4583 // leave it there, when we restart this task, we will rescan 4584 // the object. It is easy to avoid this. We move the finger by 4585 // enough to point to the next possible object header (the 4586 // bitmap knows by how much we need to move it as it knows its 4587 // granularity). 4588 MemRegion newRegion = 4589 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end()); 4590 4591 if (!newRegion.is_empty()) { 4592 if (_cm->verbose_low()) { 4593 gclog_or_tty->print_cr("[%d] recording unscanned region" 4594 "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask", 4595 _task_id, 4596 newRegion.start(), newRegion.end()); 4597 } 4598 // Now record the part of the region we didn't scan to 4599 // make sure this task scans it later. 4600 _aborted_region = newRegion; 4601 } 4602 // break from while 4603 mr = MemRegion(); 4604 } 4605 _region_finger = NULL; 4606 } 4607 4608 if (_cm->verbose_low()) { 4609 gclog_or_tty->print_cr("[%d] drained region stack, size = %d", 4610 _task_id, _cm->region_stack_size()); 4611 } 4612 } 4613 } 4614 4615 void CMTask::print_stats() { 4616 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 4617 _task_id, _calls); 4618 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 4619 _elapsed_time_ms, _termination_time_ms); 4620 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4621 _step_times_ms.num(), _step_times_ms.avg(), 4622 _step_times_ms.sd()); 4623 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4624 _step_times_ms.maximum(), _step_times_ms.sum()); 4625 4626 #if _MARKING_STATS_ 4627 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4628 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 4629 _all_clock_intervals_ms.sd()); 4630 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4631 _all_clock_intervals_ms.maximum(), 4632 _all_clock_intervals_ms.sum()); 4633 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 4634 _clock_due_to_scanning, _clock_due_to_marking); 4635 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 4636 _objs_scanned, _objs_found_on_bitmap); 4637 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 4638 _local_pushes, _local_pops, _local_max_size); 4639 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 4640 _global_pushes, _global_pops, _global_max_size); 4641 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 4642 _global_transfers_to,_global_transfers_from); 4643 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d", 4644 _regions_claimed, _region_stack_pops); 4645 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 4646 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 4647 _steal_attempts, _steals); 4648 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 4649 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 4650 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 4651 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 4652 _aborted_timed_out, _aborted_satb, _aborted_termination); 4653 #endif // _MARKING_STATS_ 4654 } 4655 4656 /***************************************************************************** 4657 4658 The do_marking_step(time_target_ms) method is the building block 4659 of the parallel marking framework. It can be called in parallel 4660 with other invocations of do_marking_step() on different tasks 4661 (but only one per task, obviously) and concurrently with the 4662 mutator threads, or during remark, hence it eliminates the need 4663 for two versions of the code. When called during remark, it will 4664 pick up from where the task left off during the concurrent marking 4665 phase. Interestingly, tasks are also claimable during evacuation 4666 pauses too, since do_marking_step() ensures that it aborts before 4667 it needs to yield. 4668 4669 The data structures that is uses to do marking work are the 4670 following: 4671 4672 (1) Marking Bitmap. If there are gray objects that appear only 4673 on the bitmap (this happens either when dealing with an overflow 4674 or when the initial marking phase has simply marked the roots 4675 and didn't push them on the stack), then tasks claim heap 4676 regions whose bitmap they then scan to find gray objects. A 4677 global finger indicates where the end of the last claimed region 4678 is. A local finger indicates how far into the region a task has 4679 scanned. The two fingers are used to determine how to gray an 4680 object (i.e. whether simply marking it is OK, as it will be 4681 visited by a task in the future, or whether it needs to be also 4682 pushed on a stack). 4683 4684 (2) Local Queue. The local queue of the task which is accessed 4685 reasonably efficiently by the task. Other tasks can steal from 4686 it when they run out of work. Throughout the marking phase, a 4687 task attempts to keep its local queue short but not totally 4688 empty, so that entries are available for stealing by other 4689 tasks. Only when there is no more work, a task will totally 4690 drain its local queue. 4691 4692 (3) Global Mark Stack. This handles local queue overflow. During 4693 marking only sets of entries are moved between it and the local 4694 queues, as access to it requires a mutex and more fine-grain 4695 interaction with it which might cause contention. If it 4696 overflows, then the marking phase should restart and iterate 4697 over the bitmap to identify gray objects. Throughout the marking 4698 phase, tasks attempt to keep the global mark stack at a small 4699 length but not totally empty, so that entries are available for 4700 popping by other tasks. Only when there is no more work, tasks 4701 will totally drain the global mark stack. 4702 4703 (4) Global Region Stack. Entries on it correspond to areas of 4704 the bitmap that need to be scanned since they contain gray 4705 objects. Pushes on the region stack only happen during 4706 evacuation pauses and typically correspond to areas covered by 4707 GC LABS. If it overflows, then the marking phase should restart 4708 and iterate over the bitmap to identify gray objects. Tasks will 4709 try to totally drain the region stack as soon as possible. 4710 4711 (5) SATB Buffer Queue. This is where completed SATB buffers are 4712 made available. Buffers are regularly removed from this queue 4713 and scanned for roots, so that the queue doesn't get too 4714 long. During remark, all completed buffers are processed, as 4715 well as the filled in parts of any uncompleted buffers. 4716 4717 The do_marking_step() method tries to abort when the time target 4718 has been reached. There are a few other cases when the 4719 do_marking_step() method also aborts: 4720 4721 (1) When the marking phase has been aborted (after a Full GC). 4722 4723 (2) When a global overflow (either on the global stack or the 4724 region stack) has been triggered. Before the task aborts, it 4725 will actually sync up with the other tasks to ensure that all 4726 the marking data structures (local queues, stacks, fingers etc.) 4727 are re-initialised so that when do_marking_step() completes, 4728 the marking phase can immediately restart. 4729 4730 (3) When enough completed SATB buffers are available. The 4731 do_marking_step() method only tries to drain SATB buffers right 4732 at the beginning. So, if enough buffers are available, the 4733 marking step aborts and the SATB buffers are processed at 4734 the beginning of the next invocation. 4735 4736 (4) To yield. when we have to yield then we abort and yield 4737 right at the end of do_marking_step(). This saves us from a lot 4738 of hassle as, by yielding we might allow a Full GC. If this 4739 happens then objects will be compacted underneath our feet, the 4740 heap might shrink, etc. We save checking for this by just 4741 aborting and doing the yield right at the end. 4742 4743 From the above it follows that the do_marking_step() method should 4744 be called in a loop (or, otherwise, regularly) until it completes. 4745 4746 If a marking step completes without its has_aborted() flag being 4747 true, it means it has completed the current marking phase (and 4748 also all other marking tasks have done so and have all synced up). 4749 4750 A method called regular_clock_call() is invoked "regularly" (in 4751 sub ms intervals) throughout marking. It is this clock method that 4752 checks all the abort conditions which were mentioned above and 4753 decides when the task should abort. A work-based scheme is used to 4754 trigger this clock method: when the number of object words the 4755 marking phase has scanned or the number of references the marking 4756 phase has visited reach a given limit. Additional invocations to 4757 the method clock have been planted in a few other strategic places 4758 too. The initial reason for the clock method was to avoid calling 4759 vtime too regularly, as it is quite expensive. So, once it was in 4760 place, it was natural to piggy-back all the other conditions on it 4761 too and not constantly check them throughout the code. 4762 4763 *****************************************************************************/ 4764 4765 void CMTask::do_marking_step(double time_target_ms, 4766 bool do_stealing, 4767 bool do_termination) { 4768 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 4769 assert(concurrent() == _cm->concurrent(), "they should be the same"); 4770 4771 assert(concurrent() || _cm->region_stack_empty(), 4772 "the region stack should have been cleared before remark"); 4773 assert(concurrent() || !_cm->has_aborted_regions(), 4774 "aborted regions should have been cleared before remark"); 4775 assert(_region_finger == NULL, 4776 "this should be non-null only when a region is being scanned"); 4777 4778 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4779 assert(_task_queues != NULL, "invariant"); 4780 assert(_task_queue != NULL, "invariant"); 4781 assert(_task_queues->queue(_task_id) == _task_queue, "invariant"); 4782 4783 assert(!_claimed, 4784 "only one thread should claim this task at any one time"); 4785 4786 // OK, this doesn't safeguard again all possible scenarios, as it is 4787 // possible for two threads to set the _claimed flag at the same 4788 // time. But it is only for debugging purposes anyway and it will 4789 // catch most problems. 4790 _claimed = true; 4791 4792 _start_time_ms = os::elapsedVTime() * 1000.0; 4793 statsOnly( _interval_start_time_ms = _start_time_ms ); 4794 4795 double diff_prediction_ms = 4796 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4797 _time_target_ms = time_target_ms - diff_prediction_ms; 4798 4799 // set up the variables that are used in the work-based scheme to 4800 // call the regular clock method 4801 _words_scanned = 0; 4802 _refs_reached = 0; 4803 recalculate_limits(); 4804 4805 // clear all flags 4806 clear_has_aborted(); 4807 _has_timed_out = false; 4808 _draining_satb_buffers = false; 4809 4810 ++_calls; 4811 4812 if (_cm->verbose_low()) { 4813 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 4814 "target = %1.2lfms >>>>>>>>>>", 4815 _task_id, _calls, _time_target_ms); 4816 } 4817 4818 // Set up the bitmap and oop closures. Anything that uses them is 4819 // eventually called from this method, so it is OK to allocate these 4820 // statically. 4821 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4822 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 4823 set_cm_oop_closure(&cm_oop_closure); 4824 4825 if (_cm->has_overflown()) { 4826 // This can happen if the region stack or the mark stack overflows 4827 // during a GC pause and this task, after a yield point, 4828 // restarts. We have to abort as we need to get into the overflow 4829 // protocol which happens right at the end of this task. 4830 set_has_aborted(); 4831 } 4832 4833 // First drain any available SATB buffers. After this, we will not 4834 // look at SATB buffers before the next invocation of this method. 4835 // If enough completed SATB buffers are queued up, the regular clock 4836 // will abort this task so that it restarts. 4837 drain_satb_buffers(); 4838 // ...then partially drain the local queue and the global stack 4839 drain_local_queue(true); 4840 drain_global_stack(true); 4841 4842 // Then totally drain the region stack. We will not look at 4843 // it again before the next invocation of this method. Entries on 4844 // the region stack are only added during evacuation pauses, for 4845 // which we have to yield. When we do, we abort the task anyway so 4846 // it will look at the region stack again when it restarts. 4847 bitmap_closure.set_scanning_heap_region(false); 4848 drain_region_stack(&bitmap_closure); 4849 // ...then partially drain the local queue and the global stack 4850 drain_local_queue(true); 4851 drain_global_stack(true); 4852 4853 do { 4854 if (!has_aborted() && _curr_region != NULL) { 4855 // This means that we're already holding on to a region. 4856 assert(_finger != NULL, "if region is not NULL, then the finger " 4857 "should not be NULL either"); 4858 4859 // We might have restarted this task after an evacuation pause 4860 // which might have evacuated the region we're holding on to 4861 // underneath our feet. Let's read its limit again to make sure 4862 // that we do not iterate over a region of the heap that 4863 // contains garbage (update_region_limit() will also move 4864 // _finger to the start of the region if it is found empty). 4865 update_region_limit(); 4866 // We will start from _finger not from the start of the region, 4867 // as we might be restarting this task after aborting half-way 4868 // through scanning this region. In this case, _finger points to 4869 // the address where we last found a marked object. If this is a 4870 // fresh region, _finger points to start(). 4871 MemRegion mr = MemRegion(_finger, _region_limit); 4872 4873 if (_cm->verbose_low()) { 4874 gclog_or_tty->print_cr("[%d] we're scanning part " 4875 "["PTR_FORMAT", "PTR_FORMAT") " 4876 "of region "PTR_FORMAT, 4877 _task_id, _finger, _region_limit, _curr_region); 4878 } 4879 4880 // Let's iterate over the bitmap of the part of the 4881 // region that is left. 4882 bitmap_closure.set_scanning_heap_region(true); 4883 if (mr.is_empty() || 4884 _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4885 // We successfully completed iterating over the region. Now, 4886 // let's give up the region. 4887 giveup_current_region(); 4888 regular_clock_call(); 4889 } else { 4890 assert(has_aborted(), "currently the only way to do so"); 4891 // The only way to abort the bitmap iteration is to return 4892 // false from the do_bit() method. However, inside the 4893 // do_bit() method we move the _finger to point to the 4894 // object currently being looked at. So, if we bail out, we 4895 // have definitely set _finger to something non-null. 4896 assert(_finger != NULL, "invariant"); 4897 4898 // Region iteration was actually aborted. So now _finger 4899 // points to the address of the object we last scanned. If we 4900 // leave it there, when we restart this task, we will rescan 4901 // the object. It is easy to avoid this. We move the finger by 4902 // enough to point to the next possible object header (the 4903 // bitmap knows by how much we need to move it as it knows its 4904 // granularity). 4905 assert(_finger < _region_limit, "invariant"); 4906 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 4907 // Check if bitmap iteration was aborted while scanning the last object 4908 if (new_finger >= _region_limit) { 4909 giveup_current_region(); 4910 } else { 4911 move_finger_to(new_finger); 4912 } 4913 } 4914 } 4915 // At this point we have either completed iterating over the 4916 // region we were holding on to, or we have aborted. 4917 4918 // We then partially drain the local queue and the global stack. 4919 // (Do we really need this?) 4920 drain_local_queue(true); 4921 drain_global_stack(true); 4922 4923 // Read the note on the claim_region() method on why it might 4924 // return NULL with potentially more regions available for 4925 // claiming and why we have to check out_of_regions() to determine 4926 // whether we're done or not. 4927 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4928 // We are going to try to claim a new region. We should have 4929 // given up on the previous one. 4930 // Separated the asserts so that we know which one fires. 4931 assert(_curr_region == NULL, "invariant"); 4932 assert(_finger == NULL, "invariant"); 4933 assert(_region_limit == NULL, "invariant"); 4934 if (_cm->verbose_low()) { 4935 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 4936 } 4937 HeapRegion* claimed_region = _cm->claim_region(_task_id); 4938 if (claimed_region != NULL) { 4939 // Yes, we managed to claim one 4940 statsOnly( ++_regions_claimed ); 4941 4942 if (_cm->verbose_low()) { 4943 gclog_or_tty->print_cr("[%d] we successfully claimed " 4944 "region "PTR_FORMAT, 4945 _task_id, claimed_region); 4946 } 4947 4948 setup_for_region(claimed_region); 4949 assert(_curr_region == claimed_region, "invariant"); 4950 } 4951 // It is important to call the regular clock here. It might take 4952 // a while to claim a region if, for example, we hit a large 4953 // block of empty regions. So we need to call the regular clock 4954 // method once round the loop to make sure it's called 4955 // frequently enough. 4956 regular_clock_call(); 4957 } 4958 4959 if (!has_aborted() && _curr_region == NULL) { 4960 assert(_cm->out_of_regions(), 4961 "at this point we should be out of regions"); 4962 } 4963 } while ( _curr_region != NULL && !has_aborted()); 4964 4965 if (!has_aborted()) { 4966 // We cannot check whether the global stack is empty, since other 4967 // tasks might be pushing objects to it concurrently. We also cannot 4968 // check if the region stack is empty because if a thread is aborting 4969 // it can push a partially done region back. 4970 assert(_cm->out_of_regions(), 4971 "at this point we should be out of regions"); 4972 4973 if (_cm->verbose_low()) { 4974 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 4975 } 4976 4977 // Try to reduce the number of available SATB buffers so that 4978 // remark has less work to do. 4979 drain_satb_buffers(); 4980 } 4981 4982 // Since we've done everything else, we can now totally drain the 4983 // local queue and global stack. 4984 drain_local_queue(false); 4985 drain_global_stack(false); 4986 4987 // Attempt at work stealing from other task's queues. 4988 if (do_stealing && !has_aborted()) { 4989 // We have not aborted. This means that we have finished all that 4990 // we could. Let's try to do some stealing... 4991 4992 // We cannot check whether the global stack is empty, since other 4993 // tasks might be pushing objects to it concurrently. We also cannot 4994 // check if the region stack is empty because if a thread is aborting 4995 // it can push a partially done region back. 4996 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4997 "only way to reach here"); 4998 4999 if (_cm->verbose_low()) { 5000 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 5001 } 5002 5003 while (!has_aborted()) { 5004 oop obj; 5005 statsOnly( ++_steal_attempts ); 5006 5007 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 5008 if (_cm->verbose_medium()) { 5009 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 5010 _task_id, (void*) obj); 5011 } 5012 5013 statsOnly( ++_steals ); 5014 5015 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 5016 "any stolen object should be marked"); 5017 scan_object(obj); 5018 5019 // And since we're towards the end, let's totally drain the 5020 // local queue and global stack. 5021 drain_local_queue(false); 5022 drain_global_stack(false); 5023 } else { 5024 break; 5025 } 5026 } 5027 } 5028 5029 // If we are about to wrap up and go into termination, check if we 5030 // should raise the overflow flag. 5031 if (do_termination && !has_aborted()) { 5032 if (_cm->force_overflow()->should_force()) { 5033 _cm->set_has_overflown(); 5034 regular_clock_call(); 5035 } 5036 } 5037 5038 // We still haven't aborted. Now, let's try to get into the 5039 // termination protocol. 5040 if (do_termination && !has_aborted()) { 5041 // We cannot check whether the global stack is empty, since other 5042 // tasks might be concurrently pushing objects on it. We also cannot 5043 // check if the region stack is empty because if a thread is aborting 5044 // it can push a partially done region back. 5045 // Separated the asserts so that we know which one fires. 5046 assert(_cm->out_of_regions(), "only way to reach here"); 5047 assert(_task_queue->size() == 0, "only way to reach here"); 5048 5049 if (_cm->verbose_low()) { 5050 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 5051 } 5052 5053 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 5054 // The CMTask class also extends the TerminatorTerminator class, 5055 // hence its should_exit_termination() method will also decide 5056 // whether to exit the termination protocol or not. 5057 bool finished = _cm->terminator()->offer_termination(this); 5058 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 5059 _termination_time_ms += 5060 termination_end_time_ms - _termination_start_time_ms; 5061 5062 if (finished) { 5063 // We're all done. 5064 5065 if (_task_id == 0) { 5066 // let's allow task 0 to do this 5067 if (concurrent()) { 5068 assert(_cm->concurrent_marking_in_progress(), "invariant"); 5069 // we need to set this to false before the next 5070 // safepoint. This way we ensure that the marking phase 5071 // doesn't observe any more heap expansions. 5072 _cm->clear_concurrent_marking_in_progress(); 5073 } 5074 } 5075 5076 // We can now guarantee that the global stack is empty, since 5077 // all other tasks have finished. We separated the guarantees so 5078 // that, if a condition is false, we can immediately find out 5079 // which one. 5080 guarantee(_cm->out_of_regions(), "only way to reach here"); 5081 guarantee(_aborted_region.is_empty(), "only way to reach here"); 5082 guarantee(_cm->region_stack_empty(), "only way to reach here"); 5083 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 5084 guarantee(_task_queue->size() == 0, "only way to reach here"); 5085 guarantee(!_cm->has_overflown(), "only way to reach here"); 5086 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 5087 guarantee(!_cm->region_stack_overflow(), "only way to reach here"); 5088 5089 if (_cm->verbose_low()) { 5090 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 5091 } 5092 } else { 5093 // Apparently there's more work to do. Let's abort this task. It 5094 // will restart it and we can hopefully find more things to do. 5095 5096 if (_cm->verbose_low()) { 5097 gclog_or_tty->print_cr("[%d] apparently there is more work to do", 5098 _task_id); 5099 } 5100 5101 set_has_aborted(); 5102 statsOnly( ++_aborted_termination ); 5103 } 5104 } 5105 5106 // Mainly for debugging purposes to make sure that a pointer to the 5107 // closure which was statically allocated in this frame doesn't 5108 // escape it by accident. 5109 set_cm_oop_closure(NULL); 5110 double end_time_ms = os::elapsedVTime() * 1000.0; 5111 double elapsed_time_ms = end_time_ms - _start_time_ms; 5112 // Update the step history. 5113 _step_times_ms.add(elapsed_time_ms); 5114 5115 if (has_aborted()) { 5116 // The task was aborted for some reason. 5117 5118 statsOnly( ++_aborted ); 5119 5120 if (_has_timed_out) { 5121 double diff_ms = elapsed_time_ms - _time_target_ms; 5122 // Keep statistics of how well we did with respect to hitting 5123 // our target only if we actually timed out (if we aborted for 5124 // other reasons, then the results might get skewed). 5125 _marking_step_diffs_ms.add(diff_ms); 5126 } 5127 5128 if (_cm->has_overflown()) { 5129 // This is the interesting one. We aborted because a global 5130 // overflow was raised. This means we have to restart the 5131 // marking phase and start iterating over regions. However, in 5132 // order to do this we have to make sure that all tasks stop 5133 // what they are doing and re-initialise in a safe manner. We 5134 // will achieve this with the use of two barrier sync points. 5135 5136 if (_cm->verbose_low()) { 5137 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 5138 } 5139 5140 _cm->enter_first_sync_barrier(_task_id); 5141 // When we exit this sync barrier we know that all tasks have 5142 // stopped doing marking work. So, it's now safe to 5143 // re-initialise our data structures. At the end of this method, 5144 // task 0 will clear the global data structures. 5145 5146 statsOnly( ++_aborted_overflow ); 5147 5148 // We clear the local state of this task... 5149 clear_region_fields(); 5150 5151 // ...and enter the second barrier. 5152 _cm->enter_second_sync_barrier(_task_id); 5153 // At this point everything has bee re-initialised and we're 5154 // ready to restart. 5155 } 5156 5157 if (_cm->verbose_low()) { 5158 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 5159 "elapsed = %1.2lfms <<<<<<<<<<", 5160 _task_id, _time_target_ms, elapsed_time_ms); 5161 if (_cm->has_aborted()) { 5162 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 5163 _task_id); 5164 } 5165 } 5166 } else { 5167 if (_cm->verbose_low()) { 5168 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 5169 "elapsed = %1.2lfms <<<<<<<<<<", 5170 _task_id, _time_target_ms, elapsed_time_ms); 5171 } 5172 } 5173 5174 _claimed = false; 5175 } 5176 5177 CMTask::CMTask(int task_id, 5178 ConcurrentMark* cm, 5179 size_t* marked_bytes, 5180 BitMap* card_bm, 5181 CMTaskQueue* task_queue, 5182 CMTaskQueueSet* task_queues) 5183 : _g1h(G1CollectedHeap::heap()), 5184 _task_id(task_id), _cm(cm), 5185 _claimed(false), 5186 _nextMarkBitMap(NULL), _hash_seed(17), 5187 _task_queue(task_queue), 5188 _task_queues(task_queues), 5189 _cm_oop_closure(NULL), 5190 _aborted_region(MemRegion()), 5191 _marked_bytes_array(marked_bytes), 5192 _card_bm(card_bm) { 5193 guarantee(task_queue != NULL, "invariant"); 5194 guarantee(task_queues != NULL, "invariant"); 5195 5196 statsOnly( _clock_due_to_scanning = 0; 5197 _clock_due_to_marking = 0 ); 5198 5199 _marking_step_diffs_ms.add(0.5); 5200 } 5201 5202 // These are formatting macros that are used below to ensure 5203 // consistent formatting. The *_H_* versions are used to format the 5204 // header for a particular value and they should be kept consistent 5205 // with the corresponding macro. Also note that most of the macros add 5206 // the necessary white space (as a prefix) which makes them a bit 5207 // easier to compose. 5208 5209 // All the output lines are prefixed with this string to be able to 5210 // identify them easily in a large log file. 5211 #define G1PPRL_LINE_PREFIX "###" 5212 5213 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 5214 #ifdef _LP64 5215 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 5216 #else // _LP64 5217 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 5218 #endif // _LP64 5219 5220 // For per-region info 5221 #define G1PPRL_TYPE_FORMAT " %-4s" 5222 #define G1PPRL_TYPE_H_FORMAT " %4s" 5223 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 5224 #define G1PPRL_BYTE_H_FORMAT " %9s" 5225 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 5226 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 5227 5228 // For summary info 5229 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 5230 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 5231 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 5232 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 5233 5234 G1PrintRegionLivenessInfoClosure:: 5235 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 5236 : _out(out), 5237 _total_used_bytes(0), _total_capacity_bytes(0), 5238 _total_prev_live_bytes(0), _total_next_live_bytes(0), 5239 _hum_used_bytes(0), _hum_capacity_bytes(0), 5240 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) { 5241 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 5242 MemRegion g1_committed = g1h->g1_committed(); 5243 MemRegion g1_reserved = g1h->g1_reserved(); 5244 double now = os::elapsedTime(); 5245 5246 // Print the header of the output. 5247 _out->cr(); 5248 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 5249 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 5250 G1PPRL_SUM_ADDR_FORMAT("committed") 5251 G1PPRL_SUM_ADDR_FORMAT("reserved") 5252 G1PPRL_SUM_BYTE_FORMAT("region-size"), 5253 g1_committed.start(), g1_committed.end(), 5254 g1_reserved.start(), g1_reserved.end(), 5255 HeapRegion::GrainBytes); 5256 _out->print_cr(G1PPRL_LINE_PREFIX); 5257 _out->print_cr(G1PPRL_LINE_PREFIX 5258 G1PPRL_TYPE_H_FORMAT 5259 G1PPRL_ADDR_BASE_H_FORMAT 5260 G1PPRL_BYTE_H_FORMAT 5261 G1PPRL_BYTE_H_FORMAT 5262 G1PPRL_BYTE_H_FORMAT 5263 G1PPRL_DOUBLE_H_FORMAT, 5264 "type", "address-range", 5265 "used", "prev-live", "next-live", "gc-eff"); 5266 _out->print_cr(G1PPRL_LINE_PREFIX 5267 G1PPRL_TYPE_H_FORMAT 5268 G1PPRL_ADDR_BASE_H_FORMAT 5269 G1PPRL_BYTE_H_FORMAT 5270 G1PPRL_BYTE_H_FORMAT 5271 G1PPRL_BYTE_H_FORMAT 5272 G1PPRL_DOUBLE_H_FORMAT, 5273 "", "", 5274 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)"); 5275 } 5276 5277 // It takes as a parameter a reference to one of the _hum_* fields, it 5278 // deduces the corresponding value for a region in a humongous region 5279 // series (either the region size, or what's left if the _hum_* field 5280 // is < the region size), and updates the _hum_* field accordingly. 5281 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 5282 size_t bytes = 0; 5283 // The > 0 check is to deal with the prev and next live bytes which 5284 // could be 0. 5285 if (*hum_bytes > 0) { 5286 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 5287 *hum_bytes -= bytes; 5288 } 5289 return bytes; 5290 } 5291 5292 // It deduces the values for a region in a humongous region series 5293 // from the _hum_* fields and updates those accordingly. It assumes 5294 // that that _hum_* fields have already been set up from the "starts 5295 // humongous" region and we visit the regions in address order. 5296 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 5297 size_t* capacity_bytes, 5298 size_t* prev_live_bytes, 5299 size_t* next_live_bytes) { 5300 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 5301 *used_bytes = get_hum_bytes(&_hum_used_bytes); 5302 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 5303 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 5304 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 5305 } 5306 5307 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 5308 const char* type = ""; 5309 HeapWord* bottom = r->bottom(); 5310 HeapWord* end = r->end(); 5311 size_t capacity_bytes = r->capacity(); 5312 size_t used_bytes = r->used(); 5313 size_t prev_live_bytes = r->live_bytes(); 5314 size_t next_live_bytes = r->next_live_bytes(); 5315 double gc_eff = r->gc_efficiency(); 5316 if (r->used() == 0) { 5317 type = "FREE"; 5318 } else if (r->is_survivor()) { 5319 type = "SURV"; 5320 } else if (r->is_young()) { 5321 type = "EDEN"; 5322 } else if (r->startsHumongous()) { 5323 type = "HUMS"; 5324 5325 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 5326 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 5327 "they should have been zeroed after the last time we used them"); 5328 // Set up the _hum_* fields. 5329 _hum_capacity_bytes = capacity_bytes; 5330 _hum_used_bytes = used_bytes; 5331 _hum_prev_live_bytes = prev_live_bytes; 5332 _hum_next_live_bytes = next_live_bytes; 5333 get_hum_bytes(&used_bytes, &capacity_bytes, 5334 &prev_live_bytes, &next_live_bytes); 5335 end = bottom + HeapRegion::GrainWords; 5336 } else if (r->continuesHumongous()) { 5337 type = "HUMC"; 5338 get_hum_bytes(&used_bytes, &capacity_bytes, 5339 &prev_live_bytes, &next_live_bytes); 5340 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 5341 } else { 5342 type = "OLD"; 5343 } 5344 5345 _total_used_bytes += used_bytes; 5346 _total_capacity_bytes += capacity_bytes; 5347 _total_prev_live_bytes += prev_live_bytes; 5348 _total_next_live_bytes += next_live_bytes; 5349 5350 // Print a line for this particular region. 5351 _out->print_cr(G1PPRL_LINE_PREFIX 5352 G1PPRL_TYPE_FORMAT 5353 G1PPRL_ADDR_BASE_FORMAT 5354 G1PPRL_BYTE_FORMAT 5355 G1PPRL_BYTE_FORMAT 5356 G1PPRL_BYTE_FORMAT 5357 G1PPRL_DOUBLE_FORMAT, 5358 type, bottom, end, 5359 used_bytes, prev_live_bytes, next_live_bytes, gc_eff); 5360 5361 return false; 5362 } 5363 5364 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 5365 // Print the footer of the output. 5366 _out->print_cr(G1PPRL_LINE_PREFIX); 5367 _out->print_cr(G1PPRL_LINE_PREFIX 5368 " SUMMARY" 5369 G1PPRL_SUM_MB_FORMAT("capacity") 5370 G1PPRL_SUM_MB_PERC_FORMAT("used") 5371 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 5372 G1PPRL_SUM_MB_PERC_FORMAT("next-live"), 5373 bytes_to_mb(_total_capacity_bytes), 5374 bytes_to_mb(_total_used_bytes), 5375 perc(_total_used_bytes, _total_capacity_bytes), 5376 bytes_to_mb(_total_prev_live_bytes), 5377 perc(_total_prev_live_bytes, _total_capacity_bytes), 5378 bytes_to_mb(_total_next_live_bytes), 5379 perc(_total_next_live_bytes, _total_capacity_bytes)); 5380 _out->cr(); 5381 }