1 /* 2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 32 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 33 #include "gc_implementation/g1/g1RemSet.hpp" 34 #include "gc_implementation/g1/heapRegionRemSet.hpp" 35 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 36 #include "gc_implementation/shared/vmGCOperations.hpp" 37 #include "memory/genOopClosures.inline.hpp" 38 #include "memory/referencePolicy.hpp" 39 #include "memory/resourceArea.hpp" 40 #include "oops/oop.inline.hpp" 41 #include "runtime/handles.inline.hpp" 42 #include "runtime/java.hpp" 43 44 // 45 // CMS Bit Map Wrapper 46 47 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) : 48 _bm((uintptr_t*)NULL,0), 49 _shifter(shifter) { 50 _bmStartWord = (HeapWord*)(rs.base()); 51 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 52 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 53 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 54 55 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map"); 56 // For now we'll just commit all of the bit map up fromt. 57 // Later on we'll try to be more parsimonious with swap. 58 guarantee(_virtual_space.initialize(brs, brs.size()), 59 "couldn't reseve backing store for CMS bit map"); 60 assert(_virtual_space.committed_size() == brs.size(), 61 "didn't reserve backing store for all of CMS bit map?"); 62 _bm.set_map((uintptr_t*)_virtual_space.low()); 63 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 64 _bmWordSize, "inconsistency in bit map sizing"); 65 _bm.set_size(_bmWordSize >> _shifter); 66 } 67 68 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 69 HeapWord* limit) const { 70 // First we must round addr *up* to a possible object boundary. 71 addr = (HeapWord*)align_size_up((intptr_t)addr, 72 HeapWordSize << _shifter); 73 size_t addrOffset = heapWordToOffset(addr); 74 if (limit == NULL) { 75 limit = _bmStartWord + _bmWordSize; 76 } 77 size_t limitOffset = heapWordToOffset(limit); 78 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 79 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 80 assert(nextAddr >= addr, "get_next_one postcondition"); 81 assert(nextAddr == limit || isMarked(nextAddr), 82 "get_next_one postcondition"); 83 return nextAddr; 84 } 85 86 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 87 HeapWord* limit) const { 88 size_t addrOffset = heapWordToOffset(addr); 89 if (limit == NULL) { 90 limit = _bmStartWord + _bmWordSize; 91 } 92 size_t limitOffset = heapWordToOffset(limit); 93 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 94 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 95 assert(nextAddr >= addr, "get_next_one postcondition"); 96 assert(nextAddr == limit || !isMarked(nextAddr), 97 "get_next_one postcondition"); 98 return nextAddr; 99 } 100 101 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 102 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 103 return (int) (diff >> _shifter); 104 } 105 106 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap, 107 size_t from_start_index, 108 HeapWord* to_start_word, 109 size_t word_num) { 110 _bm.mostly_disjoint_range_union(from_bitmap, 111 from_start_index, 112 heapWordToOffset(to_start_word), 113 word_num); 114 } 115 116 #ifndef PRODUCT 117 bool CMBitMapRO::covers(ReservedSpace rs) const { 118 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 119 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize, 120 "size inconsistency"); 121 return _bmStartWord == (HeapWord*)(rs.base()) && 122 _bmWordSize == rs.size()>>LogHeapWordSize; 123 } 124 #endif 125 126 void CMBitMap::clearAll() { 127 _bm.clear(); 128 return; 129 } 130 131 void CMBitMap::markRange(MemRegion mr) { 132 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 133 assert(!mr.is_empty(), "unexpected empty region"); 134 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 135 ((HeapWord *) mr.end())), 136 "markRange memory region end is not card aligned"); 137 // convert address range into offset range 138 _bm.at_put_range(heapWordToOffset(mr.start()), 139 heapWordToOffset(mr.end()), true); 140 } 141 142 void CMBitMap::clearRange(MemRegion mr) { 143 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 144 assert(!mr.is_empty(), "unexpected empty region"); 145 // convert address range into offset range 146 _bm.at_put_range(heapWordToOffset(mr.start()), 147 heapWordToOffset(mr.end()), false); 148 } 149 150 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 151 HeapWord* end_addr) { 152 HeapWord* start = getNextMarkedWordAddress(addr); 153 start = MIN2(start, end_addr); 154 HeapWord* end = getNextUnmarkedWordAddress(start); 155 end = MIN2(end, end_addr); 156 assert(start <= end, "Consistency check"); 157 MemRegion mr(start, end); 158 if (!mr.is_empty()) { 159 clearRange(mr); 160 } 161 return mr; 162 } 163 164 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 165 _base(NULL), _cm(cm) 166 #ifdef ASSERT 167 , _drain_in_progress(false) 168 , _drain_in_progress_yields(false) 169 #endif 170 {} 171 172 void CMMarkStack::allocate(size_t size) { 173 _base = NEW_C_HEAP_ARRAY(oop, size); 174 if (_base == NULL) { 175 vm_exit_during_initialization("Failed to allocate " 176 "CM region mark stack"); 177 } 178 _index = 0; 179 _capacity = (jint) size; 180 _oops_do_bound = -1; 181 NOT_PRODUCT(_max_depth = 0); 182 } 183 184 CMMarkStack::~CMMarkStack() { 185 if (_base != NULL) { 186 FREE_C_HEAP_ARRAY(oop, _base); 187 } 188 } 189 190 void CMMarkStack::par_push(oop ptr) { 191 while (true) { 192 if (isFull()) { 193 _overflow = true; 194 return; 195 } 196 // Otherwise... 197 jint index = _index; 198 jint next_index = index+1; 199 jint res = Atomic::cmpxchg(next_index, &_index, index); 200 if (res == index) { 201 _base[index] = ptr; 202 // Note that we don't maintain this atomically. We could, but it 203 // doesn't seem necessary. 204 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 205 return; 206 } 207 // Otherwise, we need to try again. 208 } 209 } 210 211 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 212 while (true) { 213 if (isFull()) { 214 _overflow = true; 215 return; 216 } 217 // Otherwise... 218 jint index = _index; 219 jint next_index = index + n; 220 if (next_index > _capacity) { 221 _overflow = true; 222 return; 223 } 224 jint res = Atomic::cmpxchg(next_index, &_index, index); 225 if (res == index) { 226 for (int i = 0; i < n; i++) { 227 int ind = index + i; 228 assert(ind < _capacity, "By overflow test above."); 229 _base[ind] = ptr_arr[i]; 230 } 231 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 232 return; 233 } 234 // Otherwise, we need to try again. 235 } 236 } 237 238 239 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 240 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 241 jint start = _index; 242 jint next_index = start + n; 243 if (next_index > _capacity) { 244 _overflow = true; 245 return; 246 } 247 // Otherwise. 248 _index = next_index; 249 for (int i = 0; i < n; i++) { 250 int ind = start + i; 251 assert(ind < _capacity, "By overflow test above."); 252 _base[ind] = ptr_arr[i]; 253 } 254 } 255 256 257 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 258 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 259 jint index = _index; 260 if (index == 0) { 261 *n = 0; 262 return false; 263 } else { 264 int k = MIN2(max, index); 265 jint new_ind = index - k; 266 for (int j = 0; j < k; j++) { 267 ptr_arr[j] = _base[new_ind + j]; 268 } 269 _index = new_ind; 270 *n = k; 271 return true; 272 } 273 } 274 275 276 CMRegionStack::CMRegionStack() : _base(NULL) {} 277 278 void CMRegionStack::allocate(size_t size) { 279 _base = NEW_C_HEAP_ARRAY(MemRegion, size); 280 if (_base == NULL) { 281 vm_exit_during_initialization("Failed to allocate CM region mark stack"); 282 } 283 _index = 0; 284 _capacity = (jint) size; 285 } 286 287 CMRegionStack::~CMRegionStack() { 288 if (_base != NULL) { 289 FREE_C_HEAP_ARRAY(oop, _base); 290 } 291 } 292 293 void CMRegionStack::push_lock_free(MemRegion mr) { 294 assert(mr.word_size() > 0, "Precondition"); 295 while (true) { 296 jint index = _index; 297 298 if (index >= _capacity) { 299 _overflow = true; 300 return; 301 } 302 // Otherwise... 303 jint next_index = index+1; 304 jint res = Atomic::cmpxchg(next_index, &_index, index); 305 if (res == index) { 306 _base[index] = mr; 307 return; 308 } 309 // Otherwise, we need to try again. 310 } 311 } 312 313 // Lock-free pop of the region stack. Called during the concurrent 314 // marking / remark phases. Should only be called in tandem with 315 // other lock-free pops. 316 MemRegion CMRegionStack::pop_lock_free() { 317 while (true) { 318 jint index = _index; 319 320 if (index == 0) { 321 return MemRegion(); 322 } 323 // Otherwise... 324 jint next_index = index-1; 325 jint res = Atomic::cmpxchg(next_index, &_index, index); 326 if (res == index) { 327 MemRegion mr = _base[next_index]; 328 if (mr.start() != NULL) { 329 assert(mr.end() != NULL, "invariant"); 330 assert(mr.word_size() > 0, "invariant"); 331 return mr; 332 } else { 333 // that entry was invalidated... let's skip it 334 assert(mr.end() == NULL, "invariant"); 335 } 336 } 337 // Otherwise, we need to try again. 338 } 339 } 340 341 #if 0 342 // The routines that manipulate the region stack with a lock are 343 // not currently used. They should be retained, however, as a 344 // diagnostic aid. 345 346 void CMRegionStack::push_with_lock(MemRegion mr) { 347 assert(mr.word_size() > 0, "Precondition"); 348 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 349 350 if (isFull()) { 351 _overflow = true; 352 return; 353 } 354 355 _base[_index] = mr; 356 _index += 1; 357 } 358 359 MemRegion CMRegionStack::pop_with_lock() { 360 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 361 362 while (true) { 363 if (_index == 0) { 364 return MemRegion(); 365 } 366 _index -= 1; 367 368 MemRegion mr = _base[_index]; 369 if (mr.start() != NULL) { 370 assert(mr.end() != NULL, "invariant"); 371 assert(mr.word_size() > 0, "invariant"); 372 return mr; 373 } else { 374 // that entry was invalidated... let's skip it 375 assert(mr.end() == NULL, "invariant"); 376 } 377 } 378 } 379 #endif 380 381 bool CMRegionStack::invalidate_entries_into_cset() { 382 bool result = false; 383 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 384 for (int i = 0; i < _oops_do_bound; ++i) { 385 MemRegion mr = _base[i]; 386 if (mr.start() != NULL) { 387 assert(mr.end() != NULL, "invariant"); 388 assert(mr.word_size() > 0, "invariant"); 389 HeapRegion* hr = g1h->heap_region_containing(mr.start()); 390 assert(hr != NULL, "invariant"); 391 if (hr->in_collection_set()) { 392 // The region points into the collection set 393 _base[i] = MemRegion(); 394 result = true; 395 } 396 } else { 397 // that entry was invalidated... let's skip it 398 assert(mr.end() == NULL, "invariant"); 399 } 400 } 401 return result; 402 } 403 404 template<class OopClosureClass> 405 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 406 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 407 || SafepointSynchronize::is_at_safepoint(), 408 "Drain recursion must be yield-safe."); 409 bool res = true; 410 debug_only(_drain_in_progress = true); 411 debug_only(_drain_in_progress_yields = yield_after); 412 while (!isEmpty()) { 413 oop newOop = pop(); 414 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 415 assert(newOop->is_oop(), "Expected an oop"); 416 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 417 "only grey objects on this stack"); 418 // iterate over the oops in this oop, marking and pushing 419 // the ones in CMS generation. 420 newOop->oop_iterate(cl); 421 if (yield_after && _cm->do_yield_check()) { 422 res = false; 423 break; 424 } 425 } 426 debug_only(_drain_in_progress = false); 427 return res; 428 } 429 430 void CMMarkStack::oops_do(OopClosure* f) { 431 if (_index == 0) return; 432 assert(_oops_do_bound != -1 && _oops_do_bound <= _index, 433 "Bound must be set."); 434 for (int i = 0; i < _oops_do_bound; i++) { 435 f->do_oop(&_base[i]); 436 } 437 _oops_do_bound = -1; 438 } 439 440 bool ConcurrentMark::not_yet_marked(oop obj) const { 441 return (_g1h->is_obj_ill(obj) 442 || (_g1h->is_in_permanent(obj) 443 && !nextMarkBitMap()->isMarked((HeapWord*)obj))); 444 } 445 446 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 447 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 448 #endif // _MSC_VER 449 450 size_t ConcurrentMark::scale_parallel_threads(size_t n_par_threads) { 451 return MAX2((n_par_threads + 2) / 4, (size_t)1); 452 } 453 454 ConcurrentMark::ConcurrentMark(ReservedSpace rs, 455 int max_regions) : 456 _markBitMap1(rs, MinObjAlignment - 1), 457 _markBitMap2(rs, MinObjAlignment - 1), 458 459 _parallel_marking_threads(0), 460 _max_parallel_marking_threads(0), 461 _sleep_factor(0.0), 462 _marking_task_overhead(1.0), 463 _cleanup_sleep_factor(0.0), 464 _cleanup_task_overhead(1.0), 465 _cleanup_list("Cleanup List"), 466 _region_bm(max_regions, false /* in_resource_area*/), 467 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 468 CardTableModRefBS::card_shift, 469 false /* in_resource_area*/), 470 _prevMarkBitMap(&_markBitMap1), 471 _nextMarkBitMap(&_markBitMap2), 472 _at_least_one_mark_complete(false), 473 474 _markStack(this), 475 _regionStack(), 476 // _finger set in set_non_marking_state 477 478 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)), 479 // _active_tasks set in set_non_marking_state 480 // _tasks set inside the constructor 481 _task_queues(new CMTaskQueueSet((int) _max_task_num)), 482 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 483 484 _has_overflown(false), 485 _concurrent(false), 486 _has_aborted(false), 487 _restart_for_overflow(false), 488 _concurrent_marking_in_progress(false), 489 _should_gray_objects(false), 490 491 // _verbose_level set below 492 493 _init_times(), 494 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 495 _cleanup_times(), 496 _total_counting_time(0.0), 497 _total_rs_scrub_time(0.0), 498 499 _parallel_workers(NULL) { 500 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 501 if (verbose_level < no_verbose) { 502 verbose_level = no_verbose; 503 } 504 if (verbose_level > high_verbose) { 505 verbose_level = high_verbose; 506 } 507 _verbose_level = verbose_level; 508 509 if (verbose_low()) { 510 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 511 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 512 } 513 514 _markStack.allocate(MarkStackSize); 515 _regionStack.allocate(G1MarkRegionStackSize); 516 517 // Create & start a ConcurrentMark thread. 518 _cmThread = new ConcurrentMarkThread(this); 519 assert(cmThread() != NULL, "CM Thread should have been created"); 520 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 521 522 _g1h = G1CollectedHeap::heap(); 523 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 524 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 525 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 526 527 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 528 satb_qs.set_buffer_size(G1SATBBufferSize); 529 530 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num); 531 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num); 532 533 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 534 _active_tasks = _max_task_num; 535 for (int i = 0; i < (int) _max_task_num; ++i) { 536 CMTaskQueue* task_queue = new CMTaskQueue(); 537 task_queue->initialize(); 538 _task_queues->register_queue(i, task_queue); 539 540 _tasks[i] = new CMTask(i, this, task_queue, _task_queues); 541 _accum_task_vtime[i] = 0.0; 542 } 543 544 if (ConcGCThreads > ParallelGCThreads) { 545 vm_exit_during_initialization("Can't have more ConcGCThreads " 546 "than ParallelGCThreads."); 547 } 548 if (ParallelGCThreads == 0) { 549 // if we are not running with any parallel GC threads we will not 550 // spawn any marking threads either 551 _parallel_marking_threads = 0; 552 _max_parallel_marking_threads = 0; 553 _sleep_factor = 0.0; 554 _marking_task_overhead = 1.0; 555 } else { 556 if (ConcGCThreads > 0) { 557 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 558 // if both are set 559 560 _parallel_marking_threads = ConcGCThreads; 561 _max_parallel_marking_threads = _parallel_marking_threads; 562 _sleep_factor = 0.0; 563 _marking_task_overhead = 1.0; 564 } else if (G1MarkingOverheadPercent > 0) { 565 // we will calculate the number of parallel marking threads 566 // based on a target overhead with respect to the soft real-time 567 // goal 568 569 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 570 double overall_cm_overhead = 571 (double) MaxGCPauseMillis * marking_overhead / 572 (double) GCPauseIntervalMillis; 573 double cpu_ratio = 1.0 / (double) os::processor_count(); 574 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 575 double marking_task_overhead = 576 overall_cm_overhead / marking_thread_num * 577 (double) os::processor_count(); 578 double sleep_factor = 579 (1.0 - marking_task_overhead) / marking_task_overhead; 580 581 _parallel_marking_threads = (size_t) marking_thread_num; 582 _max_parallel_marking_threads = _parallel_marking_threads; 583 _sleep_factor = sleep_factor; 584 _marking_task_overhead = marking_task_overhead; 585 } else { 586 _parallel_marking_threads = scale_parallel_threads(ParallelGCThreads); 587 _max_parallel_marking_threads = _parallel_marking_threads; 588 _sleep_factor = 0.0; 589 _marking_task_overhead = 1.0; 590 } 591 592 if (parallel_marking_threads() > 1) { 593 _cleanup_task_overhead = 1.0; 594 } else { 595 _cleanup_task_overhead = marking_task_overhead(); 596 } 597 _cleanup_sleep_factor = 598 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 599 600 #if 0 601 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 602 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 603 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 604 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 605 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 606 #endif 607 608 guarantee(parallel_marking_threads() > 0, "peace of mind"); 609 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 610 (int) _max_parallel_marking_threads, false, true); 611 if (_parallel_workers == NULL) { 612 vm_exit_during_initialization("Failed necessary allocation."); 613 } else { 614 _parallel_workers->initialize_workers(); 615 } 616 } 617 618 // so that the call below can read a sensible value 619 _heap_start = (HeapWord*) rs.base(); 620 set_non_marking_state(); 621 } 622 623 void ConcurrentMark::update_g1_committed(bool force) { 624 // If concurrent marking is not in progress, then we do not need to 625 // update _heap_end. This has a subtle and important 626 // side-effect. Imagine that two evacuation pauses happen between 627 // marking completion and remark. The first one can grow the 628 // heap (hence now the finger is below the heap end). Then, the 629 // second one could unnecessarily push regions on the region 630 // stack. This causes the invariant that the region stack is empty 631 // at the beginning of remark to be false. By ensuring that we do 632 // not observe heap expansions after marking is complete, then we do 633 // not have this problem. 634 if (!concurrent_marking_in_progress() && !force) return; 635 636 MemRegion committed = _g1h->g1_committed(); 637 assert(committed.start() == _heap_start, "start shouldn't change"); 638 HeapWord* new_end = committed.end(); 639 if (new_end > _heap_end) { 640 // The heap has been expanded. 641 642 _heap_end = new_end; 643 } 644 // Notice that the heap can also shrink. However, this only happens 645 // during a Full GC (at least currently) and the entire marking 646 // phase will bail out and the task will not be restarted. So, let's 647 // do nothing. 648 } 649 650 void ConcurrentMark::reset() { 651 // Starting values for these two. This should be called in a STW 652 // phase. CM will be notified of any future g1_committed expansions 653 // will be at the end of evacuation pauses, when tasks are 654 // inactive. 655 MemRegion committed = _g1h->g1_committed(); 656 _heap_start = committed.start(); 657 _heap_end = committed.end(); 658 659 // Separated the asserts so that we know which one fires. 660 assert(_heap_start != NULL, "heap bounds should look ok"); 661 assert(_heap_end != NULL, "heap bounds should look ok"); 662 assert(_heap_start < _heap_end, "heap bounds should look ok"); 663 664 // reset all the marking data structures and any necessary flags 665 clear_marking_state(); 666 667 if (verbose_low()) { 668 gclog_or_tty->print_cr("[global] resetting"); 669 } 670 671 // We do reset all of them, since different phases will use 672 // different number of active threads. So, it's easiest to have all 673 // of them ready. 674 for (int i = 0; i < (int) _max_task_num; ++i) { 675 _tasks[i]->reset(_nextMarkBitMap); 676 } 677 678 // we need this to make sure that the flag is on during the evac 679 // pause with initial mark piggy-backed 680 set_concurrent_marking_in_progress(); 681 } 682 683 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) { 684 assert(active_tasks <= _max_task_num, "we should not have more"); 685 686 _active_tasks = active_tasks; 687 // Need to update the three data structures below according to the 688 // number of active threads for this phase. 689 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 690 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 691 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 692 693 _concurrent = concurrent; 694 // We propagate this to all tasks, not just the active ones. 695 for (int i = 0; i < (int) _max_task_num; ++i) 696 _tasks[i]->set_concurrent(concurrent); 697 698 if (concurrent) { 699 set_concurrent_marking_in_progress(); 700 } else { 701 // We currently assume that the concurrent flag has been set to 702 // false before we start remark. At this point we should also be 703 // in a STW phase. 704 assert(!concurrent_marking_in_progress(), "invariant"); 705 assert(_finger == _heap_end, "only way to get here"); 706 update_g1_committed(true); 707 } 708 } 709 710 void ConcurrentMark::set_non_marking_state() { 711 // We set the global marking state to some default values when we're 712 // not doing marking. 713 clear_marking_state(); 714 _active_tasks = 0; 715 clear_concurrent_marking_in_progress(); 716 } 717 718 ConcurrentMark::~ConcurrentMark() { 719 for (int i = 0; i < (int) _max_task_num; ++i) { 720 delete _task_queues->queue(i); 721 delete _tasks[i]; 722 } 723 delete _task_queues; 724 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num); 725 } 726 727 // This closure is used to mark refs into the g1 generation 728 // from external roots in the CMS bit map. 729 // Called at the first checkpoint. 730 // 731 732 void ConcurrentMark::clearNextBitmap() { 733 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 734 G1CollectorPolicy* g1p = g1h->g1_policy(); 735 736 // Make sure that the concurrent mark thread looks to still be in 737 // the current cycle. 738 guarantee(cmThread()->during_cycle(), "invariant"); 739 740 // We are finishing up the current cycle by clearing the next 741 // marking bitmap and getting it ready for the next cycle. During 742 // this time no other cycle can start. So, let's make sure that this 743 // is the case. 744 guarantee(!g1h->mark_in_progress(), "invariant"); 745 746 // clear the mark bitmap (no grey objects to start with). 747 // We need to do this in chunks and offer to yield in between 748 // each chunk. 749 HeapWord* start = _nextMarkBitMap->startWord(); 750 HeapWord* end = _nextMarkBitMap->endWord(); 751 HeapWord* cur = start; 752 size_t chunkSize = M; 753 while (cur < end) { 754 HeapWord* next = cur + chunkSize; 755 if (next > end) { 756 next = end; 757 } 758 MemRegion mr(cur,next); 759 _nextMarkBitMap->clearRange(mr); 760 cur = next; 761 do_yield_check(); 762 763 // Repeat the asserts from above. We'll do them as asserts here to 764 // minimize their overhead on the product. However, we'll have 765 // them as guarantees at the beginning / end of the bitmap 766 // clearing to get some checking in the product. 767 assert(cmThread()->during_cycle(), "invariant"); 768 assert(!g1h->mark_in_progress(), "invariant"); 769 } 770 771 // Repeat the asserts from above. 772 guarantee(cmThread()->during_cycle(), "invariant"); 773 guarantee(!g1h->mark_in_progress(), "invariant"); 774 } 775 776 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 777 public: 778 bool doHeapRegion(HeapRegion* r) { 779 if (!r->continuesHumongous()) { 780 r->note_start_of_marking(true); 781 } 782 return false; 783 } 784 }; 785 786 void ConcurrentMark::checkpointRootsInitialPre() { 787 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 788 G1CollectorPolicy* g1p = g1h->g1_policy(); 789 790 _has_aborted = false; 791 792 #ifndef PRODUCT 793 if (G1PrintReachableAtInitialMark) { 794 print_reachable("at-cycle-start", 795 VerifyOption_G1UsePrevMarking, true /* all */); 796 } 797 #endif 798 799 // Initialise marking structures. This has to be done in a STW phase. 800 reset(); 801 } 802 803 804 void ConcurrentMark::checkpointRootsInitialPost() { 805 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 806 807 // If we force an overflow during remark, the remark operation will 808 // actually abort and we'll restart concurrent marking. If we always 809 // force an oveflow during remark we'll never actually complete the 810 // marking phase. So, we initilize this here, at the start of the 811 // cycle, so that at the remaining overflow number will decrease at 812 // every remark and we'll eventually not need to cause one. 813 force_overflow_stw()->init(); 814 815 // For each region note start of marking. 816 NoteStartOfMarkHRClosure startcl; 817 g1h->heap_region_iterate(&startcl); 818 819 // Start Concurrent Marking weak-reference discovery. 820 ReferenceProcessor* rp = g1h->ref_processor_cm(); 821 // enable ("weak") refs discovery 822 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 823 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 824 825 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 826 // This is the start of the marking cycle, we're expected all 827 // threads to have SATB queues with active set to false. 828 satb_mq_set.set_active_all_threads(true, /* new active value */ 829 false /* expected_active */); 830 831 // update_g1_committed() will be called at the end of an evac pause 832 // when marking is on. So, it's also called at the end of the 833 // initial-mark pause to update the heap end, if the heap expands 834 // during it. No need to call it here. 835 } 836 837 /* 838 * Notice that in the next two methods, we actually leave the STS 839 * during the barrier sync and join it immediately afterwards. If we 840 * do not do this, the following deadlock can occur: one thread could 841 * be in the barrier sync code, waiting for the other thread to also 842 * sync up, whereas another one could be trying to yield, while also 843 * waiting for the other threads to sync up too. 844 * 845 * Note, however, that this code is also used during remark and in 846 * this case we should not attempt to leave / enter the STS, otherwise 847 * we'll either hit an asseert (debug / fastdebug) or deadlock 848 * (product). So we should only leave / enter the STS if we are 849 * operating concurrently. 850 * 851 * Because the thread that does the sync barrier has left the STS, it 852 * is possible to be suspended for a Full GC or an evacuation pause 853 * could occur. This is actually safe, since the entering the sync 854 * barrier is one of the last things do_marking_step() does, and it 855 * doesn't manipulate any data structures afterwards. 856 */ 857 858 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 859 if (verbose_low()) { 860 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 861 } 862 863 if (concurrent()) { 864 ConcurrentGCThread::stsLeave(); 865 } 866 _first_overflow_barrier_sync.enter(); 867 if (concurrent()) { 868 ConcurrentGCThread::stsJoin(); 869 } 870 // at this point everyone should have synced up and not be doing any 871 // more work 872 873 if (verbose_low()) { 874 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 875 } 876 877 // let task 0 do this 878 if (task_num == 0) { 879 // task 0 is responsible for clearing the global data structures 880 // We should be here because of an overflow. During STW we should 881 // not clear the overflow flag since we rely on it being true when 882 // we exit this method to abort the pause and restart concurent 883 // marking. 884 clear_marking_state(concurrent() /* clear_overflow */); 885 force_overflow()->update(); 886 887 if (PrintGC) { 888 gclog_or_tty->date_stamp(PrintGCDateStamps); 889 gclog_or_tty->stamp(PrintGCTimeStamps); 890 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 891 } 892 } 893 894 // after this, each task should reset its own data structures then 895 // then go into the second barrier 896 } 897 898 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 899 if (verbose_low()) { 900 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 901 } 902 903 if (concurrent()) { 904 ConcurrentGCThread::stsLeave(); 905 } 906 _second_overflow_barrier_sync.enter(); 907 if (concurrent()) { 908 ConcurrentGCThread::stsJoin(); 909 } 910 // at this point everything should be re-initialised and ready to go 911 912 if (verbose_low()) { 913 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 914 } 915 } 916 917 #ifndef PRODUCT 918 void ForceOverflowSettings::init() { 919 _num_remaining = G1ConcMarkForceOverflow; 920 _force = false; 921 update(); 922 } 923 924 void ForceOverflowSettings::update() { 925 if (_num_remaining > 0) { 926 _num_remaining -= 1; 927 _force = true; 928 } else { 929 _force = false; 930 } 931 } 932 933 bool ForceOverflowSettings::should_force() { 934 if (_force) { 935 _force = false; 936 return true; 937 } else { 938 return false; 939 } 940 } 941 #endif // !PRODUCT 942 943 void ConcurrentMark::grayRoot(oop p) { 944 HeapWord* addr = (HeapWord*) p; 945 // We can't really check against _heap_start and _heap_end, since it 946 // is possible during an evacuation pause with piggy-backed 947 // initial-mark that the committed space is expanded during the 948 // pause without CM observing this change. So the assertions below 949 // is a bit conservative; but better than nothing. 950 assert(_g1h->g1_committed().contains(addr), 951 "address should be within the heap bounds"); 952 953 if (!_nextMarkBitMap->isMarked(addr)) { 954 _nextMarkBitMap->parMark(addr); 955 } 956 } 957 958 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) { 959 // The objects on the region have already been marked "in bulk" by 960 // the caller. We only need to decide whether to push the region on 961 // the region stack or not. 962 963 if (!concurrent_marking_in_progress() || !_should_gray_objects) { 964 // We're done with marking and waiting for remark. We do not need to 965 // push anything else on the region stack. 966 return; 967 } 968 969 HeapWord* finger = _finger; 970 971 if (verbose_low()) { 972 gclog_or_tty->print_cr("[global] attempting to push " 973 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at " 974 PTR_FORMAT, mr.start(), mr.end(), finger); 975 } 976 977 if (mr.start() < finger) { 978 // The finger is always heap region aligned and it is not possible 979 // for mr to span heap regions. 980 assert(mr.end() <= finger, "invariant"); 981 982 // Separated the asserts so that we know which one fires. 983 assert(mr.start() <= mr.end(), 984 "region boundaries should fall within the committed space"); 985 assert(_heap_start <= mr.start(), 986 "region boundaries should fall within the committed space"); 987 assert(mr.end() <= _heap_end, 988 "region boundaries should fall within the committed space"); 989 if (verbose_low()) { 990 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") " 991 "below the finger, pushing it", 992 mr.start(), mr.end()); 993 } 994 995 if (!region_stack_push_lock_free(mr)) { 996 if (verbose_low()) { 997 gclog_or_tty->print_cr("[global] region stack has overflown."); 998 } 999 } 1000 } 1001 } 1002 1003 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) { 1004 // The object is not marked by the caller. We need to at least mark 1005 // it and maybe push in on the stack. 1006 1007 HeapWord* addr = (HeapWord*)p; 1008 if (!_nextMarkBitMap->isMarked(addr)) { 1009 // We definitely need to mark it, irrespective whether we bail out 1010 // because we're done with marking. 1011 if (_nextMarkBitMap->parMark(addr)) { 1012 if (!concurrent_marking_in_progress() || !_should_gray_objects) { 1013 // If we're done with concurrent marking and we're waiting for 1014 // remark, then we're not pushing anything on the stack. 1015 return; 1016 } 1017 1018 // No OrderAccess:store_load() is needed. It is implicit in the 1019 // CAS done in parMark(addr) above 1020 HeapWord* finger = _finger; 1021 1022 if (addr < finger) { 1023 if (!mark_stack_push(oop(addr))) { 1024 if (verbose_low()) { 1025 gclog_or_tty->print_cr("[global] global stack overflow " 1026 "during parMark"); 1027 } 1028 } 1029 } 1030 } 1031 } 1032 } 1033 1034 class CMConcurrentMarkingTask: public AbstractGangTask { 1035 private: 1036 ConcurrentMark* _cm; 1037 ConcurrentMarkThread* _cmt; 1038 1039 public: 1040 void work(int worker_i) { 1041 assert(Thread::current()->is_ConcurrentGC_thread(), 1042 "this should only be done by a conc GC thread"); 1043 ResourceMark rm; 1044 1045 double start_vtime = os::elapsedVTime(); 1046 1047 ConcurrentGCThread::stsJoin(); 1048 1049 assert((size_t) worker_i < _cm->active_tasks(), "invariant"); 1050 CMTask* the_task = _cm->task(worker_i); 1051 the_task->record_start_time(); 1052 if (!_cm->has_aborted()) { 1053 do { 1054 double start_vtime_sec = os::elapsedVTime(); 1055 double start_time_sec = os::elapsedTime(); 1056 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1057 1058 the_task->do_marking_step(mark_step_duration_ms, 1059 true /* do_stealing */, 1060 true /* do_termination */); 1061 1062 double end_time_sec = os::elapsedTime(); 1063 double end_vtime_sec = os::elapsedVTime(); 1064 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1065 double elapsed_time_sec = end_time_sec - start_time_sec; 1066 _cm->clear_has_overflown(); 1067 1068 bool ret = _cm->do_yield_check(worker_i); 1069 1070 jlong sleep_time_ms; 1071 if (!_cm->has_aborted() && the_task->has_aborted()) { 1072 sleep_time_ms = 1073 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1074 ConcurrentGCThread::stsLeave(); 1075 os::sleep(Thread::current(), sleep_time_ms, false); 1076 ConcurrentGCThread::stsJoin(); 1077 } 1078 double end_time2_sec = os::elapsedTime(); 1079 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1080 1081 #if 0 1082 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1083 "overhead %1.4lf", 1084 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1085 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1086 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1087 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1088 #endif 1089 } while (!_cm->has_aborted() && the_task->has_aborted()); 1090 } 1091 the_task->record_end_time(); 1092 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1093 1094 ConcurrentGCThread::stsLeave(); 1095 1096 double end_vtime = os::elapsedVTime(); 1097 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime); 1098 } 1099 1100 CMConcurrentMarkingTask(ConcurrentMark* cm, 1101 ConcurrentMarkThread* cmt) : 1102 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1103 1104 ~CMConcurrentMarkingTask() { } 1105 }; 1106 1107 // Calculates the number of active workers for a concurrent 1108 // phase. 1109 size_t ConcurrentMark::calc_parallel_marking_threads() { 1110 if (G1CollectedHeap::use_parallel_gc_threads()) { 1111 size_t n_conc_workers = 0; 1112 if (!UseDynamicNumberOfGCThreads || 1113 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1114 !ForceDynamicNumberOfGCThreads)) { 1115 n_conc_workers = max_parallel_marking_threads(); 1116 } else { 1117 n_conc_workers = 1118 AdaptiveSizePolicy::calc_default_active_workers( 1119 max_parallel_marking_threads(), 1120 1, /* Minimum workers */ 1121 parallel_marking_threads(), 1122 Threads::number_of_non_daemon_threads()); 1123 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1124 // that scaling has already gone into "_max_parallel_marking_threads". 1125 } 1126 assert(n_conc_workers > 0, "Always need at least 1"); 1127 return n_conc_workers; 1128 } 1129 // If we are not running with any parallel GC threads we will not 1130 // have spawned any marking threads either. Hence the number of 1131 // concurrent workers should be 0. 1132 return 0; 1133 } 1134 1135 void ConcurrentMark::markFromRoots() { 1136 // we might be tempted to assert that: 1137 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1138 // "inconsistent argument?"); 1139 // However that wouldn't be right, because it's possible that 1140 // a safepoint is indeed in progress as a younger generation 1141 // stop-the-world GC happens even as we mark in this generation. 1142 1143 _restart_for_overflow = false; 1144 force_overflow_conc()->init(); 1145 1146 // _g1h has _n_par_threads 1147 _parallel_marking_threads = calc_parallel_marking_threads(); 1148 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1149 "Maximum number of marking threads exceeded"); 1150 1151 size_t active_workers = MAX2((size_t) 1, parallel_marking_threads()); 1152 1153 // Parallel task terminator is set in "set_phase()" 1154 set_phase(active_workers, true /* concurrent */); 1155 1156 CMConcurrentMarkingTask markingTask(this, cmThread()); 1157 if (parallel_marking_threads() > 0) { 1158 _parallel_workers->set_active_workers((int)active_workers); 1159 // Don't set _n_par_threads because it affects MT in proceess_strong_roots() 1160 // and the decisions on that MT processing is made elsewhere. 1161 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1162 _parallel_workers->run_task(&markingTask); 1163 } else { 1164 markingTask.work(0); 1165 } 1166 print_stats(); 1167 } 1168 1169 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1170 // world is stopped at this checkpoint 1171 assert(SafepointSynchronize::is_at_safepoint(), 1172 "world should be stopped"); 1173 1174 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1175 1176 // If a full collection has happened, we shouldn't do this. 1177 if (has_aborted()) { 1178 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1179 return; 1180 } 1181 1182 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1183 1184 if (VerifyDuringGC) { 1185 HandleMark hm; // handle scope 1186 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1187 Universe::heap()->prepare_for_verify(); 1188 Universe::verify(/* allow dirty */ true, 1189 /* silent */ false, 1190 /* option */ VerifyOption_G1UsePrevMarking); 1191 } 1192 1193 G1CollectorPolicy* g1p = g1h->g1_policy(); 1194 g1p->record_concurrent_mark_remark_start(); 1195 1196 double start = os::elapsedTime(); 1197 1198 checkpointRootsFinalWork(); 1199 1200 double mark_work_end = os::elapsedTime(); 1201 1202 weakRefsWork(clear_all_soft_refs); 1203 1204 if (has_overflown()) { 1205 // Oops. We overflowed. Restart concurrent marking. 1206 _restart_for_overflow = true; 1207 // Clear the flag. We do not need it any more. 1208 clear_has_overflown(); 1209 if (G1TraceMarkStackOverflow) { 1210 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1211 } 1212 } else { 1213 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1214 // We're done with marking. 1215 // This is the end of the marking cycle, we're expected all 1216 // threads to have SATB queues with active set to true. 1217 satb_mq_set.set_active_all_threads(false, /* new active value */ 1218 true /* expected_active */); 1219 1220 if (VerifyDuringGC) { 1221 1222 HandleMark hm; // handle scope 1223 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1224 Universe::heap()->prepare_for_verify(); 1225 Universe::verify(/* allow dirty */ true, 1226 /* silent */ false, 1227 /* option */ VerifyOption_G1UseNextMarking); 1228 } 1229 assert(!restart_for_overflow(), "sanity"); 1230 } 1231 1232 // Reset the marking state if marking completed 1233 if (!restart_for_overflow()) { 1234 set_non_marking_state(); 1235 } 1236 1237 #if VERIFY_OBJS_PROCESSED 1238 _scan_obj_cl.objs_processed = 0; 1239 ThreadLocalObjQueue::objs_enqueued = 0; 1240 #endif 1241 1242 // Statistics 1243 double now = os::elapsedTime(); 1244 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1245 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1246 _remark_times.add((now - start) * 1000.0); 1247 1248 g1p->record_concurrent_mark_remark_end(); 1249 } 1250 1251 #define CARD_BM_TEST_MODE 0 1252 1253 class CalcLiveObjectsClosure: public HeapRegionClosure { 1254 1255 CMBitMapRO* _bm; 1256 ConcurrentMark* _cm; 1257 bool _changed; 1258 bool _yield; 1259 size_t _words_done; 1260 size_t _tot_live; 1261 size_t _tot_used; 1262 size_t _regions_done; 1263 double _start_vtime_sec; 1264 1265 BitMap* _region_bm; 1266 BitMap* _card_bm; 1267 intptr_t _bottom_card_num; 1268 bool _final; 1269 1270 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { 1271 for (intptr_t i = start_card_num; i <= last_card_num; i++) { 1272 #if CARD_BM_TEST_MODE 1273 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set."); 1274 #else 1275 _card_bm->par_at_put(i - _bottom_card_num, 1); 1276 #endif 1277 } 1278 } 1279 1280 public: 1281 CalcLiveObjectsClosure(bool final, 1282 CMBitMapRO *bm, ConcurrentMark *cm, 1283 BitMap* region_bm, BitMap* card_bm) : 1284 _bm(bm), _cm(cm), _changed(false), _yield(true), 1285 _words_done(0), _tot_live(0), _tot_used(0), 1286 _region_bm(region_bm), _card_bm(card_bm),_final(final), 1287 _regions_done(0), _start_vtime_sec(0.0) 1288 { 1289 _bottom_card_num = 1290 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> 1291 CardTableModRefBS::card_shift); 1292 } 1293 1294 // It takes a region that's not empty (i.e., it has at least one 1295 // live object in it and sets its corresponding bit on the region 1296 // bitmap to 1. If the region is "starts humongous" it will also set 1297 // to 1 the bits on the region bitmap that correspond to its 1298 // associated "continues humongous" regions. 1299 void set_bit_for_region(HeapRegion* hr) { 1300 assert(!hr->continuesHumongous(), "should have filtered those out"); 1301 1302 size_t index = hr->hrs_index(); 1303 if (!hr->startsHumongous()) { 1304 // Normal (non-humongous) case: just set the bit. 1305 _region_bm->par_at_put((BitMap::idx_t) index, true); 1306 } else { 1307 // Starts humongous case: calculate how many regions are part of 1308 // this humongous region and then set the bit range. It might 1309 // have been a bit more efficient to look at the object that 1310 // spans these humongous regions to calculate their number from 1311 // the object's size. However, it's a good idea to calculate 1312 // this based on the metadata itself, and not the region 1313 // contents, so that this code is not aware of what goes into 1314 // the humongous regions (in case this changes in the future). 1315 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1316 size_t end_index = index + 1; 1317 while (end_index < g1h->n_regions()) { 1318 HeapRegion* chr = g1h->region_at(end_index); 1319 if (!chr->continuesHumongous()) break; 1320 end_index += 1; 1321 } 1322 _region_bm->par_at_put_range((BitMap::idx_t) index, 1323 (BitMap::idx_t) end_index, true); 1324 } 1325 } 1326 1327 bool doHeapRegion(HeapRegion* hr) { 1328 if (!_final && _regions_done == 0) { 1329 _start_vtime_sec = os::elapsedVTime(); 1330 } 1331 1332 if (hr->continuesHumongous()) { 1333 // We will ignore these here and process them when their 1334 // associated "starts humongous" region is processed (see 1335 // set_bit_for_heap_region()). Note that we cannot rely on their 1336 // associated "starts humongous" region to have their bit set to 1337 // 1 since, due to the region chunking in the parallel region 1338 // iteration, a "continues humongous" region might be visited 1339 // before its associated "starts humongous". 1340 return false; 1341 } 1342 1343 HeapWord* nextTop = hr->next_top_at_mark_start(); 1344 HeapWord* start = hr->top_at_conc_mark_count(); 1345 assert(hr->bottom() <= start && start <= hr->end() && 1346 hr->bottom() <= nextTop && nextTop <= hr->end() && 1347 start <= nextTop, 1348 "Preconditions."); 1349 // Otherwise, record the number of word's we'll examine. 1350 size_t words_done = (nextTop - start); 1351 // Find the first marked object at or after "start". 1352 start = _bm->getNextMarkedWordAddress(start, nextTop); 1353 size_t marked_bytes = 0; 1354 1355 // Below, the term "card num" means the result of shifting an address 1356 // by the card shift -- address 0 corresponds to card number 0. One 1357 // must subtract the card num of the bottom of the heap to obtain a 1358 // card table index. 1359 // The first card num of the sequence of live cards currently being 1360 // constructed. -1 ==> no sequence. 1361 intptr_t start_card_num = -1; 1362 // The last card num of the sequence of live cards currently being 1363 // constructed. -1 ==> no sequence. 1364 intptr_t last_card_num = -1; 1365 1366 while (start < nextTop) { 1367 if (_yield && _cm->do_yield_check()) { 1368 // We yielded. It might be for a full collection, in which case 1369 // all bets are off; terminate the traversal. 1370 if (_cm->has_aborted()) { 1371 _changed = false; 1372 return true; 1373 } else { 1374 // Otherwise, it might be a collection pause, and the region 1375 // we're looking at might be in the collection set. We'll 1376 // abandon this region. 1377 return false; 1378 } 1379 } 1380 oop obj = oop(start); 1381 int obj_sz = obj->size(); 1382 // The card num of the start of the current object. 1383 intptr_t obj_card_num = 1384 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); 1385 1386 HeapWord* obj_last = start + obj_sz - 1; 1387 intptr_t obj_last_card_num = 1388 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift); 1389 1390 if (obj_card_num != last_card_num) { 1391 if (start_card_num == -1) { 1392 assert(last_card_num == -1, "Both or neither."); 1393 start_card_num = obj_card_num; 1394 } else { 1395 assert(last_card_num != -1, "Both or neither."); 1396 assert(obj_card_num >= last_card_num, "Inv"); 1397 if ((obj_card_num - last_card_num) > 1) { 1398 // Mark the last run, and start a new one. 1399 mark_card_num_range(start_card_num, last_card_num); 1400 start_card_num = obj_card_num; 1401 } 1402 } 1403 #if CARD_BM_TEST_MODE 1404 /* 1405 gclog_or_tty->print_cr("Setting bits from %d/%d.", 1406 obj_card_num - _bottom_card_num, 1407 obj_last_card_num - _bottom_card_num); 1408 */ 1409 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) { 1410 _card_bm->par_at_put(j - _bottom_card_num, 1); 1411 } 1412 #endif 1413 } 1414 // In any case, we set the last card num. 1415 last_card_num = obj_last_card_num; 1416 1417 marked_bytes += (size_t)obj_sz * HeapWordSize; 1418 // Find the next marked object after this one. 1419 start = _bm->getNextMarkedWordAddress(start + 1, nextTop); 1420 _changed = true; 1421 } 1422 // Handle the last range, if any. 1423 if (start_card_num != -1) { 1424 mark_card_num_range(start_card_num, last_card_num); 1425 } 1426 if (_final) { 1427 // Mark the allocated-since-marking portion... 1428 HeapWord* tp = hr->top(); 1429 if (nextTop < tp) { 1430 start_card_num = 1431 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift); 1432 last_card_num = 1433 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift); 1434 mark_card_num_range(start_card_num, last_card_num); 1435 // This definitely means the region has live objects. 1436 set_bit_for_region(hr); 1437 } 1438 } 1439 1440 hr->add_to_marked_bytes(marked_bytes); 1441 // Update the live region bitmap. 1442 if (marked_bytes > 0) { 1443 set_bit_for_region(hr); 1444 } 1445 hr->set_top_at_conc_mark_count(nextTop); 1446 _tot_live += hr->next_live_bytes(); 1447 _tot_used += hr->used(); 1448 _words_done = words_done; 1449 1450 if (!_final) { 1451 ++_regions_done; 1452 if (_regions_done % 10 == 0) { 1453 double end_vtime_sec = os::elapsedVTime(); 1454 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec; 1455 if (elapsed_vtime_sec > (10.0 / 1000.0)) { 1456 jlong sleep_time_ms = 1457 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0); 1458 os::sleep(Thread::current(), sleep_time_ms, false); 1459 _start_vtime_sec = end_vtime_sec; 1460 } 1461 } 1462 } 1463 1464 return false; 1465 } 1466 1467 bool changed() { return _changed; } 1468 void reset() { _changed = false; _words_done = 0; } 1469 void no_yield() { _yield = false; } 1470 size_t words_done() { return _words_done; } 1471 size_t tot_live() { return _tot_live; } 1472 size_t tot_used() { return _tot_used; } 1473 }; 1474 1475 1476 void ConcurrentMark::calcDesiredRegions() { 1477 _region_bm.clear(); 1478 _card_bm.clear(); 1479 CalcLiveObjectsClosure calccl(false /*final*/, 1480 nextMarkBitMap(), this, 1481 &_region_bm, &_card_bm); 1482 G1CollectedHeap *g1h = G1CollectedHeap::heap(); 1483 g1h->heap_region_iterate(&calccl); 1484 1485 do { 1486 calccl.reset(); 1487 g1h->heap_region_iterate(&calccl); 1488 } while (calccl.changed()); 1489 } 1490 1491 class G1ParFinalCountTask: public AbstractGangTask { 1492 protected: 1493 G1CollectedHeap* _g1h; 1494 CMBitMap* _bm; 1495 size_t _n_workers; 1496 size_t *_live_bytes; 1497 size_t *_used_bytes; 1498 BitMap* _region_bm; 1499 BitMap* _card_bm; 1500 public: 1501 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm, 1502 BitMap* region_bm, BitMap* card_bm) 1503 : AbstractGangTask("G1 final counting"), _g1h(g1h), 1504 _bm(bm), _region_bm(region_bm), _card_bm(card_bm), 1505 _n_workers(0) 1506 { 1507 // Use the value already set as the number of active threads 1508 // in the call to run_task(). Needed for the allocation of 1509 // _live_bytes and _used_bytes. 1510 if (G1CollectedHeap::use_parallel_gc_threads()) { 1511 assert( _g1h->workers()->active_workers() > 0, 1512 "Should have been previously set"); 1513 _n_workers = _g1h->workers()->active_workers(); 1514 } else { 1515 _n_workers = 1; 1516 } 1517 1518 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1519 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1520 } 1521 1522 ~G1ParFinalCountTask() { 1523 FREE_C_HEAP_ARRAY(size_t, _live_bytes); 1524 FREE_C_HEAP_ARRAY(size_t, _used_bytes); 1525 } 1526 1527 void work(int i) { 1528 CalcLiveObjectsClosure calccl(true /*final*/, 1529 _bm, _g1h->concurrent_mark(), 1530 _region_bm, _card_bm); 1531 calccl.no_yield(); 1532 if (G1CollectedHeap::use_parallel_gc_threads()) { 1533 _g1h->heap_region_par_iterate_chunked(&calccl, i, 1534 (int) _n_workers, 1535 HeapRegion::FinalCountClaimValue); 1536 } else { 1537 _g1h->heap_region_iterate(&calccl); 1538 } 1539 assert(calccl.complete(), "Shouldn't have yielded!"); 1540 1541 assert((size_t) i < _n_workers, "invariant"); 1542 _live_bytes[i] = calccl.tot_live(); 1543 _used_bytes[i] = calccl.tot_used(); 1544 } 1545 size_t live_bytes() { 1546 size_t live_bytes = 0; 1547 for (size_t i = 0; i < _n_workers; ++i) 1548 live_bytes += _live_bytes[i]; 1549 return live_bytes; 1550 } 1551 size_t used_bytes() { 1552 size_t used_bytes = 0; 1553 for (size_t i = 0; i < _n_workers; ++i) 1554 used_bytes += _used_bytes[i]; 1555 return used_bytes; 1556 } 1557 }; 1558 1559 class G1ParNoteEndTask; 1560 1561 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1562 G1CollectedHeap* _g1; 1563 int _worker_num; 1564 size_t _max_live_bytes; 1565 size_t _regions_claimed; 1566 size_t _freed_bytes; 1567 FreeRegionList* _local_cleanup_list; 1568 OldRegionSet* _old_proxy_set; 1569 HumongousRegionSet* _humongous_proxy_set; 1570 HRRSCleanupTask* _hrrs_cleanup_task; 1571 double _claimed_region_time; 1572 double _max_region_time; 1573 1574 public: 1575 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1576 int worker_num, 1577 FreeRegionList* local_cleanup_list, 1578 OldRegionSet* old_proxy_set, 1579 HumongousRegionSet* humongous_proxy_set, 1580 HRRSCleanupTask* hrrs_cleanup_task) : 1581 _g1(g1), _worker_num(worker_num), 1582 _max_live_bytes(0), _regions_claimed(0), 1583 _freed_bytes(0), 1584 _claimed_region_time(0.0), _max_region_time(0.0), 1585 _local_cleanup_list(local_cleanup_list), 1586 _old_proxy_set(old_proxy_set), 1587 _humongous_proxy_set(humongous_proxy_set), 1588 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1589 1590 size_t freed_bytes() { return _freed_bytes; } 1591 1592 bool doHeapRegion(HeapRegion *hr) { 1593 // We use a claim value of zero here because all regions 1594 // were claimed with value 1 in the FinalCount task. 1595 hr->reset_gc_time_stamp(); 1596 if (!hr->continuesHumongous()) { 1597 double start = os::elapsedTime(); 1598 _regions_claimed++; 1599 hr->note_end_of_marking(); 1600 _max_live_bytes += hr->max_live_bytes(); 1601 _g1->free_region_if_empty(hr, 1602 &_freed_bytes, 1603 _local_cleanup_list, 1604 _old_proxy_set, 1605 _humongous_proxy_set, 1606 _hrrs_cleanup_task, 1607 true /* par */); 1608 double region_time = (os::elapsedTime() - start); 1609 _claimed_region_time += region_time; 1610 if (region_time > _max_region_time) { 1611 _max_region_time = region_time; 1612 } 1613 } 1614 return false; 1615 } 1616 1617 size_t max_live_bytes() { return _max_live_bytes; } 1618 size_t regions_claimed() { return _regions_claimed; } 1619 double claimed_region_time_sec() { return _claimed_region_time; } 1620 double max_region_time_sec() { return _max_region_time; } 1621 }; 1622 1623 class G1ParNoteEndTask: public AbstractGangTask { 1624 friend class G1NoteEndOfConcMarkClosure; 1625 1626 protected: 1627 G1CollectedHeap* _g1h; 1628 size_t _max_live_bytes; 1629 size_t _freed_bytes; 1630 FreeRegionList* _cleanup_list; 1631 1632 public: 1633 G1ParNoteEndTask(G1CollectedHeap* g1h, 1634 FreeRegionList* cleanup_list) : 1635 AbstractGangTask("G1 note end"), _g1h(g1h), 1636 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1637 1638 void work(int i) { 1639 double start = os::elapsedTime(); 1640 FreeRegionList local_cleanup_list("Local Cleanup List"); 1641 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set"); 1642 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1643 HRRSCleanupTask hrrs_cleanup_task; 1644 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list, 1645 &old_proxy_set, 1646 &humongous_proxy_set, 1647 &hrrs_cleanup_task); 1648 if (G1CollectedHeap::use_parallel_gc_threads()) { 1649 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, 1650 _g1h->workers()->active_workers(), 1651 HeapRegion::NoteEndClaimValue); 1652 } else { 1653 _g1h->heap_region_iterate(&g1_note_end); 1654 } 1655 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1656 1657 // Now update the lists 1658 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1659 NULL /* free_list */, 1660 &old_proxy_set, 1661 &humongous_proxy_set, 1662 true /* par */); 1663 { 1664 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1665 _max_live_bytes += g1_note_end.max_live_bytes(); 1666 _freed_bytes += g1_note_end.freed_bytes(); 1667 1668 // If we iterate over the global cleanup list at the end of 1669 // cleanup to do this printing we will not guarantee to only 1670 // generate output for the newly-reclaimed regions (the list 1671 // might not be empty at the beginning of cleanup; we might 1672 // still be working on its previous contents). So we do the 1673 // printing here, before we append the new regions to the global 1674 // cleanup list. 1675 1676 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1677 if (hr_printer->is_active()) { 1678 HeapRegionLinkedListIterator iter(&local_cleanup_list); 1679 while (iter.more_available()) { 1680 HeapRegion* hr = iter.get_next(); 1681 hr_printer->cleanup(hr); 1682 } 1683 } 1684 1685 _cleanup_list->add_as_tail(&local_cleanup_list); 1686 assert(local_cleanup_list.is_empty(), "post-condition"); 1687 1688 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1689 } 1690 double end = os::elapsedTime(); 1691 if (G1PrintParCleanupStats) { 1692 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] " 1693 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n", 1694 i, start, end, (end-start)*1000.0, 1695 g1_note_end.regions_claimed(), 1696 g1_note_end.claimed_region_time_sec()*1000.0, 1697 g1_note_end.max_region_time_sec()*1000.0); 1698 } 1699 } 1700 size_t max_live_bytes() { return _max_live_bytes; } 1701 size_t freed_bytes() { return _freed_bytes; } 1702 }; 1703 1704 class G1ParScrubRemSetTask: public AbstractGangTask { 1705 protected: 1706 G1RemSet* _g1rs; 1707 BitMap* _region_bm; 1708 BitMap* _card_bm; 1709 public: 1710 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1711 BitMap* region_bm, BitMap* card_bm) : 1712 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1713 _region_bm(region_bm), _card_bm(card_bm) 1714 {} 1715 1716 void work(int i) { 1717 if (G1CollectedHeap::use_parallel_gc_threads()) { 1718 _g1rs->scrub_par(_region_bm, _card_bm, i, 1719 HeapRegion::ScrubRemSetClaimValue); 1720 } else { 1721 _g1rs->scrub(_region_bm, _card_bm); 1722 } 1723 } 1724 1725 }; 1726 1727 void ConcurrentMark::cleanup() { 1728 // world is stopped at this checkpoint 1729 assert(SafepointSynchronize::is_at_safepoint(), 1730 "world should be stopped"); 1731 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1732 1733 // If a full collection has happened, we shouldn't do this. 1734 if (has_aborted()) { 1735 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1736 return; 1737 } 1738 1739 HRSPhaseSetter x(HRSPhaseCleanup); 1740 g1h->verify_region_sets_optional(); 1741 1742 if (VerifyDuringGC) { 1743 HandleMark hm; // handle scope 1744 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1745 Universe::heap()->prepare_for_verify(); 1746 Universe::verify(/* allow dirty */ true, 1747 /* silent */ false, 1748 /* option */ VerifyOption_G1UsePrevMarking); 1749 } 1750 1751 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1752 g1p->record_concurrent_mark_cleanup_start(); 1753 1754 double start = os::elapsedTime(); 1755 1756 HeapRegionRemSet::reset_for_cleanup_tasks(); 1757 1758 size_t n_workers; 1759 1760 // Do counting once more with the world stopped for good measure. 1761 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), 1762 &_region_bm, &_card_bm); 1763 if (G1CollectedHeap::use_parallel_gc_threads()) { 1764 assert(g1h->check_heap_region_claim_values( 1765 HeapRegion::InitialClaimValue), 1766 "sanity check"); 1767 1768 g1h->set_par_threads(); 1769 n_workers = g1h->n_par_threads(); 1770 assert(g1h->n_par_threads() == (int) n_workers, 1771 "Should not have been reset"); 1772 g1h->workers()->run_task(&g1_par_count_task); 1773 // Done with the parallel phase so reset to 0. 1774 g1h->set_par_threads(0); 1775 1776 assert(g1h->check_heap_region_claim_values( 1777 HeapRegion::FinalCountClaimValue), 1778 "sanity check"); 1779 } else { 1780 n_workers = 1; 1781 g1_par_count_task.work(0); 1782 } 1783 1784 size_t known_garbage_bytes = 1785 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes(); 1786 g1p->set_known_garbage_bytes(known_garbage_bytes); 1787 1788 size_t start_used_bytes = g1h->used(); 1789 _at_least_one_mark_complete = true; 1790 g1h->set_marking_complete(); 1791 1792 ergo_verbose4(ErgoConcCycles, 1793 "finish cleanup", 1794 ergo_format_byte("occupancy") 1795 ergo_format_byte("capacity") 1796 ergo_format_byte_perc("known garbage"), 1797 start_used_bytes, g1h->capacity(), 1798 known_garbage_bytes, 1799 ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0); 1800 1801 double count_end = os::elapsedTime(); 1802 double this_final_counting_time = (count_end - start); 1803 if (G1PrintParCleanupStats) { 1804 gclog_or_tty->print_cr("Cleanup:"); 1805 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms", 1806 this_final_counting_time*1000.0); 1807 } 1808 _total_counting_time += this_final_counting_time; 1809 1810 if (G1PrintRegionLivenessInfo) { 1811 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1812 _g1h->heap_region_iterate(&cl); 1813 } 1814 1815 // Install newly created mark bitMap as "prev". 1816 swapMarkBitMaps(); 1817 1818 g1h->reset_gc_time_stamp(); 1819 1820 // Note end of marking in all heap regions. 1821 double note_end_start = os::elapsedTime(); 1822 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 1823 if (G1CollectedHeap::use_parallel_gc_threads()) { 1824 g1h->set_par_threads((int)n_workers); 1825 g1h->workers()->run_task(&g1_par_note_end_task); 1826 g1h->set_par_threads(0); 1827 1828 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 1829 "sanity check"); 1830 } else { 1831 g1_par_note_end_task.work(0); 1832 } 1833 1834 if (!cleanup_list_is_empty()) { 1835 // The cleanup list is not empty, so we'll have to process it 1836 // concurrently. Notify anyone else that might be wanting free 1837 // regions that there will be more free regions coming soon. 1838 g1h->set_free_regions_coming(); 1839 } 1840 double note_end_end = os::elapsedTime(); 1841 if (G1PrintParCleanupStats) { 1842 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.", 1843 (note_end_end - note_end_start)*1000.0); 1844 } 1845 1846 // call below, since it affects the metric by which we sort the heap 1847 // regions. 1848 if (G1ScrubRemSets) { 1849 double rs_scrub_start = os::elapsedTime(); 1850 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1851 if (G1CollectedHeap::use_parallel_gc_threads()) { 1852 g1h->set_par_threads((int)n_workers); 1853 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1854 g1h->set_par_threads(0); 1855 1856 assert(g1h->check_heap_region_claim_values( 1857 HeapRegion::ScrubRemSetClaimValue), 1858 "sanity check"); 1859 } else { 1860 g1_par_scrub_rs_task.work(0); 1861 } 1862 1863 double rs_scrub_end = os::elapsedTime(); 1864 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1865 _total_rs_scrub_time += this_rs_scrub_time; 1866 } 1867 1868 // this will also free any regions totally full of garbage objects, 1869 // and sort the regions. 1870 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 1871 1872 // Statistics. 1873 double end = os::elapsedTime(); 1874 _cleanup_times.add((end - start) * 1000.0); 1875 1876 // G1CollectedHeap::heap()->print(); 1877 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d", 1878 // G1CollectedHeap::heap()->get_gc_time_stamp()); 1879 1880 if (PrintGC || PrintGCDetails) { 1881 g1h->print_size_transition(gclog_or_tty, 1882 start_used_bytes, 1883 g1h->used(), 1884 g1h->capacity()); 1885 } 1886 1887 size_t cleaned_up_bytes = start_used_bytes - g1h->used(); 1888 g1p->decrease_known_garbage_bytes(cleaned_up_bytes); 1889 1890 // Clean up will have freed any regions completely full of garbage. 1891 // Update the soft reference policy with the new heap occupancy. 1892 Universe::update_heap_info_at_gc(); 1893 1894 // We need to make this be a "collection" so any collection pause that 1895 // races with it goes around and waits for completeCleanup to finish. 1896 g1h->increment_total_collections(); 1897 1898 if (VerifyDuringGC) { 1899 HandleMark hm; // handle scope 1900 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1901 Universe::heap()->prepare_for_verify(); 1902 Universe::verify(/* allow dirty */ true, 1903 /* silent */ false, 1904 /* option */ VerifyOption_G1UsePrevMarking); 1905 } 1906 1907 g1h->verify_region_sets_optional(); 1908 } 1909 1910 void ConcurrentMark::completeCleanup() { 1911 if (has_aborted()) return; 1912 1913 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1914 1915 _cleanup_list.verify_optional(); 1916 FreeRegionList tmp_free_list("Tmp Free List"); 1917 1918 if (G1ConcRegionFreeingVerbose) { 1919 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1920 "cleanup list has "SIZE_FORMAT" entries", 1921 _cleanup_list.length()); 1922 } 1923 1924 // Noone else should be accessing the _cleanup_list at this point, 1925 // so it's not necessary to take any locks 1926 while (!_cleanup_list.is_empty()) { 1927 HeapRegion* hr = _cleanup_list.remove_head(); 1928 assert(hr != NULL, "the list was not empty"); 1929 hr->par_clear(); 1930 tmp_free_list.add_as_tail(hr); 1931 1932 // Instead of adding one region at a time to the secondary_free_list, 1933 // we accumulate them in the local list and move them a few at a 1934 // time. This also cuts down on the number of notify_all() calls 1935 // we do during this process. We'll also append the local list when 1936 // _cleanup_list is empty (which means we just removed the last 1937 // region from the _cleanup_list). 1938 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1939 _cleanup_list.is_empty()) { 1940 if (G1ConcRegionFreeingVerbose) { 1941 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1942 "appending "SIZE_FORMAT" entries to the " 1943 "secondary_free_list, clean list still has " 1944 SIZE_FORMAT" entries", 1945 tmp_free_list.length(), 1946 _cleanup_list.length()); 1947 } 1948 1949 { 1950 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1951 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 1952 SecondaryFreeList_lock->notify_all(); 1953 } 1954 1955 if (G1StressConcRegionFreeing) { 1956 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1957 os::sleep(Thread::current(), (jlong) 1, false); 1958 } 1959 } 1960 } 1961 } 1962 assert(tmp_free_list.is_empty(), "post-condition"); 1963 } 1964 1965 // Support closures for reference procssing in G1 1966 1967 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1968 HeapWord* addr = (HeapWord*)obj; 1969 return addr != NULL && 1970 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1971 } 1972 1973 class G1CMKeepAliveClosure: public OopClosure { 1974 G1CollectedHeap* _g1; 1975 ConcurrentMark* _cm; 1976 CMBitMap* _bitMap; 1977 public: 1978 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm, 1979 CMBitMap* bitMap) : 1980 _g1(g1), _cm(cm), 1981 _bitMap(bitMap) {} 1982 1983 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1984 virtual void do_oop( oop* p) { do_oop_work(p); } 1985 1986 template <class T> void do_oop_work(T* p) { 1987 oop obj = oopDesc::load_decode_heap_oop(p); 1988 HeapWord* addr = (HeapWord*)obj; 1989 1990 if (_cm->verbose_high()) { 1991 gclog_or_tty->print_cr("\t[0] we're looking at location " 1992 "*"PTR_FORMAT" = "PTR_FORMAT, 1993 p, (void*) obj); 1994 } 1995 1996 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) { 1997 _bitMap->mark(addr); 1998 _cm->mark_stack_push(obj); 1999 } 2000 } 2001 }; 2002 2003 class G1CMDrainMarkingStackClosure: public VoidClosure { 2004 CMMarkStack* _markStack; 2005 CMBitMap* _bitMap; 2006 G1CMKeepAliveClosure* _oopClosure; 2007 public: 2008 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack, 2009 G1CMKeepAliveClosure* oopClosure) : 2010 _bitMap(bitMap), 2011 _markStack(markStack), 2012 _oopClosure(oopClosure) 2013 {} 2014 2015 void do_void() { 2016 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false); 2017 } 2018 }; 2019 2020 // 'Keep Alive' closure used by parallel reference processing. 2021 // An instance of this closure is used in the parallel reference processing 2022 // code rather than an instance of G1CMKeepAliveClosure. We could have used 2023 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are 2024 // placed on to discovered ref lists once so we can mark and push with no 2025 // need to check whether the object has already been marked. Using the 2026 // G1CMKeepAliveClosure would mean, however, having all the worker threads 2027 // operating on the global mark stack. This means that an individual 2028 // worker would be doing lock-free pushes while it processes its own 2029 // discovered ref list followed by drain call. If the discovered ref lists 2030 // are unbalanced then this could cause interference with the other 2031 // workers. Using a CMTask (and its embedded local data structures) 2032 // avoids that potential interference. 2033 class G1CMParKeepAliveAndDrainClosure: public OopClosure { 2034 ConcurrentMark* _cm; 2035 CMTask* _task; 2036 int _ref_counter_limit; 2037 int _ref_counter; 2038 public: 2039 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) : 2040 _cm(cm), _task(task), 2041 _ref_counter_limit(G1RefProcDrainInterval) { 2042 assert(_ref_counter_limit > 0, "sanity"); 2043 _ref_counter = _ref_counter_limit; 2044 } 2045 2046 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2047 virtual void do_oop( oop* p) { do_oop_work(p); } 2048 2049 template <class T> void do_oop_work(T* p) { 2050 if (!_cm->has_overflown()) { 2051 oop obj = oopDesc::load_decode_heap_oop(p); 2052 if (_cm->verbose_high()) { 2053 gclog_or_tty->print_cr("\t[%d] we're looking at location " 2054 "*"PTR_FORMAT" = "PTR_FORMAT, 2055 _task->task_id(), p, (void*) obj); 2056 } 2057 2058 _task->deal_with_reference(obj); 2059 _ref_counter--; 2060 2061 if (_ref_counter == 0) { 2062 // We have dealt with _ref_counter_limit references, pushing them and objects 2063 // reachable from them on to the local stack (and possibly the global stack). 2064 // Call do_marking_step() to process these entries. We call the routine in a 2065 // loop, which we'll exit if there's nothing more to do (i.e. we're done 2066 // with the entries that we've pushed as a result of the deal_with_reference 2067 // calls above) or we overflow. 2068 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2069 // while there may still be some work to do. (See the comment at the 2070 // beginning of CMTask::do_marking_step() for those conditions - one of which 2071 // is reaching the specified time target.) It is only when 2072 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2073 // that the marking has completed. 2074 do { 2075 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2076 _task->do_marking_step(mark_step_duration_ms, 2077 false /* do_stealing */, 2078 false /* do_termination */); 2079 } while (_task->has_aborted() && !_cm->has_overflown()); 2080 _ref_counter = _ref_counter_limit; 2081 } 2082 } else { 2083 if (_cm->verbose_high()) { 2084 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id()); 2085 } 2086 } 2087 } 2088 }; 2089 2090 class G1CMParDrainMarkingStackClosure: public VoidClosure { 2091 ConcurrentMark* _cm; 2092 CMTask* _task; 2093 public: 2094 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) : 2095 _cm(cm), _task(task) 2096 {} 2097 2098 void do_void() { 2099 do { 2100 if (_cm->verbose_high()) { 2101 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", 2102 _task->task_id()); 2103 } 2104 2105 // We call CMTask::do_marking_step() to completely drain the local and 2106 // global marking stacks. The routine is called in a loop, which we'll 2107 // exit if there's nothing more to do (i.e. we'completely drained the 2108 // entries that were pushed as a result of applying the 2109 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref 2110 // lists above) or we overflow the global marking stack. 2111 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2112 // while there may still be some work to do. (See the comment at the 2113 // beginning of CMTask::do_marking_step() for those conditions - one of which 2114 // is reaching the specified time target.) It is only when 2115 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2116 // that the marking has completed. 2117 2118 _task->do_marking_step(1000000000.0 /* something very large */, 2119 true /* do_stealing */, 2120 true /* do_termination */); 2121 } while (_task->has_aborted() && !_cm->has_overflown()); 2122 } 2123 }; 2124 2125 // Implementation of AbstractRefProcTaskExecutor for parallel 2126 // reference processing at the end of G1 concurrent marking 2127 2128 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2129 private: 2130 G1CollectedHeap* _g1h; 2131 ConcurrentMark* _cm; 2132 WorkGang* _workers; 2133 int _active_workers; 2134 2135 public: 2136 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2137 ConcurrentMark* cm, 2138 WorkGang* workers, 2139 int n_workers) : 2140 _g1h(g1h), _cm(cm), 2141 _workers(workers), _active_workers(n_workers) { } 2142 2143 // Executes the given task using concurrent marking worker threads. 2144 virtual void execute(ProcessTask& task); 2145 virtual void execute(EnqueueTask& task); 2146 }; 2147 2148 class G1CMRefProcTaskProxy: public AbstractGangTask { 2149 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2150 ProcessTask& _proc_task; 2151 G1CollectedHeap* _g1h; 2152 ConcurrentMark* _cm; 2153 2154 public: 2155 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2156 G1CollectedHeap* g1h, 2157 ConcurrentMark* cm) : 2158 AbstractGangTask("Process reference objects in parallel"), 2159 _proc_task(proc_task), _g1h(g1h), _cm(cm) { } 2160 2161 virtual void work(int i) { 2162 CMTask* marking_task = _cm->task(i); 2163 G1CMIsAliveClosure g1_is_alive(_g1h); 2164 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task); 2165 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task); 2166 2167 _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2168 } 2169 }; 2170 2171 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2172 assert(_workers != NULL, "Need parallel worker threads."); 2173 2174 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2175 2176 // We need to reset the phase for each task execution so that 2177 // the termination protocol of CMTask::do_marking_step works. 2178 _cm->set_phase(_active_workers, false /* concurrent */); 2179 _g1h->set_par_threads(_active_workers); 2180 _workers->run_task(&proc_task_proxy); 2181 _g1h->set_par_threads(0); 2182 } 2183 2184 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2185 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2186 EnqueueTask& _enq_task; 2187 2188 public: 2189 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2190 AbstractGangTask("Enqueue reference objects in parallel"), 2191 _enq_task(enq_task) { } 2192 2193 virtual void work(int i) { 2194 _enq_task.work(i); 2195 } 2196 }; 2197 2198 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2199 assert(_workers != NULL, "Need parallel worker threads."); 2200 2201 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2202 2203 _g1h->set_par_threads(_active_workers); 2204 _workers->run_task(&enq_task_proxy); 2205 _g1h->set_par_threads(0); 2206 } 2207 2208 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2209 ResourceMark rm; 2210 HandleMark hm; 2211 2212 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2213 2214 // Is alive closure. 2215 G1CMIsAliveClosure g1_is_alive(g1h); 2216 2217 // Inner scope to exclude the cleaning of the string and symbol 2218 // tables from the displayed time. 2219 { 2220 bool verbose = PrintGC && PrintGCDetails; 2221 if (verbose) { 2222 gclog_or_tty->put(' '); 2223 } 2224 TraceTime t("GC ref-proc", verbose, false, gclog_or_tty); 2225 2226 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2227 2228 // See the comment in G1CollectedHeap::ref_processing_init() 2229 // about how reference processing currently works in G1. 2230 2231 // Process weak references. 2232 rp->setup_policy(clear_all_soft_refs); 2233 assert(_markStack.isEmpty(), "mark stack should be empty"); 2234 2235 G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap()); 2236 G1CMDrainMarkingStackClosure 2237 g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive); 2238 2239 // We use the work gang from the G1CollectedHeap and we utilize all 2240 // the worker threads. 2241 int active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1; 2242 active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1); 2243 2244 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2245 g1h->workers(), active_workers); 2246 2247 if (rp->processing_is_mt()) { 2248 // Set the degree of MT here. If the discovery is done MT, there 2249 // may have been a different number of threads doing the discovery 2250 // and a different number of discovered lists may have Ref objects. 2251 // That is OK as long as the Reference lists are balanced (see 2252 // balance_all_queues() and balance_queues()). 2253 rp->set_active_mt_degree(active_workers); 2254 2255 rp->process_discovered_references(&g1_is_alive, 2256 &g1_keep_alive, 2257 &g1_drain_mark_stack, 2258 &par_task_executor); 2259 2260 // The work routines of the parallel keep_alive and drain_marking_stack 2261 // will set the has_overflown flag if we overflow the global marking 2262 // stack. 2263 } else { 2264 rp->process_discovered_references(&g1_is_alive, 2265 &g1_keep_alive, 2266 &g1_drain_mark_stack, 2267 NULL); 2268 } 2269 2270 assert(_markStack.overflow() || _markStack.isEmpty(), 2271 "mark stack should be empty (unless it overflowed)"); 2272 if (_markStack.overflow()) { 2273 // Should have been done already when we tried to push an 2274 // entry on to the global mark stack. But let's do it again. 2275 set_has_overflown(); 2276 } 2277 2278 if (rp->processing_is_mt()) { 2279 assert(rp->num_q() == active_workers, "why not"); 2280 rp->enqueue_discovered_references(&par_task_executor); 2281 } else { 2282 rp->enqueue_discovered_references(); 2283 } 2284 2285 rp->verify_no_references_recorded(); 2286 assert(!rp->discovery_enabled(), "Post condition"); 2287 } 2288 2289 // Now clean up stale oops in StringTable 2290 StringTable::unlink(&g1_is_alive); 2291 // Clean up unreferenced symbols in symbol table. 2292 SymbolTable::unlink(); 2293 } 2294 2295 void ConcurrentMark::swapMarkBitMaps() { 2296 CMBitMapRO* temp = _prevMarkBitMap; 2297 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2298 _nextMarkBitMap = (CMBitMap*) temp; 2299 } 2300 2301 class CMRemarkTask: public AbstractGangTask { 2302 private: 2303 ConcurrentMark *_cm; 2304 2305 public: 2306 void work(int worker_i) { 2307 // Since all available tasks are actually started, we should 2308 // only proceed if we're supposed to be actived. 2309 if ((size_t)worker_i < _cm->active_tasks()) { 2310 CMTask* task = _cm->task(worker_i); 2311 task->record_start_time(); 2312 do { 2313 task->do_marking_step(1000000000.0 /* something very large */, 2314 true /* do_stealing */, 2315 true /* do_termination */); 2316 } while (task->has_aborted() && !_cm->has_overflown()); 2317 // If we overflow, then we do not want to restart. We instead 2318 // want to abort remark and do concurrent marking again. 2319 task->record_end_time(); 2320 } 2321 } 2322 2323 CMRemarkTask(ConcurrentMark* cm, int active_workers) : 2324 AbstractGangTask("Par Remark"), _cm(cm) { 2325 _cm->terminator()->reset_for_reuse(active_workers); 2326 } 2327 }; 2328 2329 void ConcurrentMark::checkpointRootsFinalWork() { 2330 ResourceMark rm; 2331 HandleMark hm; 2332 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2333 2334 g1h->ensure_parsability(false); 2335 2336 if (G1CollectedHeap::use_parallel_gc_threads()) { 2337 G1CollectedHeap::StrongRootsScope srs(g1h); 2338 // this is remark, so we'll use up all active threads 2339 int active_workers = g1h->workers()->active_workers(); 2340 if (active_workers == 0) { 2341 assert(active_workers > 0, "Should have been set earlier"); 2342 active_workers = ParallelGCThreads; 2343 g1h->workers()->set_active_workers(active_workers); 2344 } 2345 set_phase(active_workers, false /* concurrent */); 2346 // Leave _parallel_marking_threads at it's 2347 // value originally calculated in the ConcurrentMark 2348 // constructor and pass values of the active workers 2349 // through the gang in the task. 2350 2351 CMRemarkTask remarkTask(this, active_workers); 2352 g1h->set_par_threads(active_workers); 2353 g1h->workers()->run_task(&remarkTask); 2354 g1h->set_par_threads(0); 2355 } else { 2356 G1CollectedHeap::StrongRootsScope srs(g1h); 2357 // this is remark, so we'll use up all available threads 2358 int active_workers = 1; 2359 set_phase(active_workers, false /* concurrent */); 2360 2361 CMRemarkTask remarkTask(this, active_workers); 2362 // We will start all available threads, even if we decide that the 2363 // active_workers will be fewer. The extra ones will just bail out 2364 // immediately. 2365 remarkTask.work(0); 2366 } 2367 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2368 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2369 2370 print_stats(); 2371 2372 #if VERIFY_OBJS_PROCESSED 2373 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2374 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2375 _scan_obj_cl.objs_processed, 2376 ThreadLocalObjQueue::objs_enqueued); 2377 guarantee(_scan_obj_cl.objs_processed == 2378 ThreadLocalObjQueue::objs_enqueued, 2379 "Different number of objs processed and enqueued."); 2380 } 2381 #endif 2382 } 2383 2384 #ifndef PRODUCT 2385 2386 class PrintReachableOopClosure: public OopClosure { 2387 private: 2388 G1CollectedHeap* _g1h; 2389 outputStream* _out; 2390 VerifyOption _vo; 2391 bool _all; 2392 2393 public: 2394 PrintReachableOopClosure(outputStream* out, 2395 VerifyOption vo, 2396 bool all) : 2397 _g1h(G1CollectedHeap::heap()), 2398 _out(out), _vo(vo), _all(all) { } 2399 2400 void do_oop(narrowOop* p) { do_oop_work(p); } 2401 void do_oop( oop* p) { do_oop_work(p); } 2402 2403 template <class T> void do_oop_work(T* p) { 2404 oop obj = oopDesc::load_decode_heap_oop(p); 2405 const char* str = NULL; 2406 const char* str2 = ""; 2407 2408 if (obj == NULL) { 2409 str = ""; 2410 } else if (!_g1h->is_in_g1_reserved(obj)) { 2411 str = " O"; 2412 } else { 2413 HeapRegion* hr = _g1h->heap_region_containing(obj); 2414 guarantee(hr != NULL, "invariant"); 2415 bool over_tams = false; 2416 bool marked = false; 2417 2418 switch (_vo) { 2419 case VerifyOption_G1UsePrevMarking: 2420 over_tams = hr->obj_allocated_since_prev_marking(obj); 2421 marked = _g1h->isMarkedPrev(obj); 2422 break; 2423 case VerifyOption_G1UseNextMarking: 2424 over_tams = hr->obj_allocated_since_next_marking(obj); 2425 marked = _g1h->isMarkedNext(obj); 2426 break; 2427 case VerifyOption_G1UseMarkWord: 2428 marked = obj->is_gc_marked(); 2429 break; 2430 default: 2431 ShouldNotReachHere(); 2432 } 2433 2434 if (over_tams) { 2435 str = " >"; 2436 if (marked) { 2437 str2 = " AND MARKED"; 2438 } 2439 } else if (marked) { 2440 str = " M"; 2441 } else { 2442 str = " NOT"; 2443 } 2444 } 2445 2446 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2447 p, (void*) obj, str, str2); 2448 } 2449 }; 2450 2451 class PrintReachableObjectClosure : public ObjectClosure { 2452 private: 2453 G1CollectedHeap* _g1h; 2454 outputStream* _out; 2455 VerifyOption _vo; 2456 bool _all; 2457 HeapRegion* _hr; 2458 2459 public: 2460 PrintReachableObjectClosure(outputStream* out, 2461 VerifyOption vo, 2462 bool all, 2463 HeapRegion* hr) : 2464 _g1h(G1CollectedHeap::heap()), 2465 _out(out), _vo(vo), _all(all), _hr(hr) { } 2466 2467 void do_object(oop o) { 2468 bool over_tams = false; 2469 bool marked = false; 2470 2471 switch (_vo) { 2472 case VerifyOption_G1UsePrevMarking: 2473 over_tams = _hr->obj_allocated_since_prev_marking(o); 2474 marked = _g1h->isMarkedPrev(o); 2475 break; 2476 case VerifyOption_G1UseNextMarking: 2477 over_tams = _hr->obj_allocated_since_next_marking(o); 2478 marked = _g1h->isMarkedNext(o); 2479 break; 2480 case VerifyOption_G1UseMarkWord: 2481 marked = o->is_gc_marked(); 2482 break; 2483 default: 2484 ShouldNotReachHere(); 2485 } 2486 bool print_it = _all || over_tams || marked; 2487 2488 if (print_it) { 2489 _out->print_cr(" "PTR_FORMAT"%s", 2490 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2491 PrintReachableOopClosure oopCl(_out, _vo, _all); 2492 o->oop_iterate(&oopCl); 2493 } 2494 } 2495 }; 2496 2497 class PrintReachableRegionClosure : public HeapRegionClosure { 2498 private: 2499 outputStream* _out; 2500 VerifyOption _vo; 2501 bool _all; 2502 2503 public: 2504 bool doHeapRegion(HeapRegion* hr) { 2505 HeapWord* b = hr->bottom(); 2506 HeapWord* e = hr->end(); 2507 HeapWord* t = hr->top(); 2508 HeapWord* p = NULL; 2509 2510 switch (_vo) { 2511 case VerifyOption_G1UsePrevMarking: 2512 p = hr->prev_top_at_mark_start(); 2513 break; 2514 case VerifyOption_G1UseNextMarking: 2515 p = hr->next_top_at_mark_start(); 2516 break; 2517 case VerifyOption_G1UseMarkWord: 2518 // When we are verifying marking using the mark word 2519 // TAMS has no relevance. 2520 assert(p == NULL, "post-condition"); 2521 break; 2522 default: 2523 ShouldNotReachHere(); 2524 } 2525 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2526 "TAMS: "PTR_FORMAT, b, e, t, p); 2527 _out->cr(); 2528 2529 HeapWord* from = b; 2530 HeapWord* to = t; 2531 2532 if (to > from) { 2533 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2534 _out->cr(); 2535 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2536 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2537 _out->cr(); 2538 } 2539 2540 return false; 2541 } 2542 2543 PrintReachableRegionClosure(outputStream* out, 2544 VerifyOption vo, 2545 bool all) : 2546 _out(out), _vo(vo), _all(all) { } 2547 }; 2548 2549 static const char* verify_option_to_tams(VerifyOption vo) { 2550 switch (vo) { 2551 case VerifyOption_G1UsePrevMarking: 2552 return "PTAMS"; 2553 case VerifyOption_G1UseNextMarking: 2554 return "NTAMS"; 2555 default: 2556 return "NONE"; 2557 } 2558 } 2559 2560 void ConcurrentMark::print_reachable(const char* str, 2561 VerifyOption vo, 2562 bool all) { 2563 gclog_or_tty->cr(); 2564 gclog_or_tty->print_cr("== Doing heap dump... "); 2565 2566 if (G1PrintReachableBaseFile == NULL) { 2567 gclog_or_tty->print_cr(" #### error: no base file defined"); 2568 return; 2569 } 2570 2571 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2572 (JVM_MAXPATHLEN - 1)) { 2573 gclog_or_tty->print_cr(" #### error: file name too long"); 2574 return; 2575 } 2576 2577 char file_name[JVM_MAXPATHLEN]; 2578 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2579 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2580 2581 fileStream fout(file_name); 2582 if (!fout.is_open()) { 2583 gclog_or_tty->print_cr(" #### error: could not open file"); 2584 return; 2585 } 2586 2587 outputStream* out = &fout; 2588 out->print_cr("-- USING %s", verify_option_to_tams(vo)); 2589 out->cr(); 2590 2591 out->print_cr("--- ITERATING OVER REGIONS"); 2592 out->cr(); 2593 PrintReachableRegionClosure rcl(out, vo, all); 2594 _g1h->heap_region_iterate(&rcl); 2595 out->cr(); 2596 2597 gclog_or_tty->print_cr(" done"); 2598 gclog_or_tty->flush(); 2599 } 2600 2601 #endif // PRODUCT 2602 2603 // This note is for drainAllSATBBuffers and the code in between. 2604 // In the future we could reuse a task to do this work during an 2605 // evacuation pause (since now tasks are not active and can be claimed 2606 // during an evacuation pause). This was a late change to the code and 2607 // is currently not being taken advantage of. 2608 2609 class CMGlobalObjectClosure : public ObjectClosure { 2610 private: 2611 ConcurrentMark* _cm; 2612 2613 public: 2614 void do_object(oop obj) { 2615 _cm->deal_with_reference(obj); 2616 } 2617 2618 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { } 2619 }; 2620 2621 void ConcurrentMark::deal_with_reference(oop obj) { 2622 if (verbose_high()) { 2623 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT, 2624 (void*) obj); 2625 } 2626 2627 HeapWord* objAddr = (HeapWord*) obj; 2628 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error"); 2629 if (_g1h->is_in_g1_reserved(objAddr)) { 2630 assert(obj != NULL, "null check is implicit"); 2631 if (!_nextMarkBitMap->isMarked(objAddr)) { 2632 // Only get the containing region if the object is not marked on the 2633 // bitmap (otherwise, it's a waste of time since we won't do 2634 // anything with it). 2635 HeapRegion* hr = _g1h->heap_region_containing_raw(obj); 2636 if (!hr->obj_allocated_since_next_marking(obj)) { 2637 if (verbose_high()) { 2638 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered " 2639 "marked", (void*) obj); 2640 } 2641 2642 // we need to mark it first 2643 if (_nextMarkBitMap->parMark(objAddr)) { 2644 // No OrderAccess:store_load() is needed. It is implicit in the 2645 // CAS done in parMark(objAddr) above 2646 HeapWord* finger = _finger; 2647 if (objAddr < finger) { 2648 if (verbose_high()) { 2649 gclog_or_tty->print_cr("[global] below the global finger " 2650 "("PTR_FORMAT"), pushing it", finger); 2651 } 2652 if (!mark_stack_push(obj)) { 2653 if (verbose_low()) { 2654 gclog_or_tty->print_cr("[global] global stack overflow during " 2655 "deal_with_reference"); 2656 } 2657 } 2658 } 2659 } 2660 } 2661 } 2662 } 2663 } 2664 2665 void ConcurrentMark::drainAllSATBBuffers() { 2666 CMGlobalObjectClosure oc(this); 2667 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2668 satb_mq_set.set_closure(&oc); 2669 2670 while (satb_mq_set.apply_closure_to_completed_buffer()) { 2671 if (verbose_medium()) { 2672 gclog_or_tty->print_cr("[global] processed an SATB buffer"); 2673 } 2674 } 2675 2676 // no need to check whether we should do this, as this is only 2677 // called during an evacuation pause 2678 satb_mq_set.iterate_closure_all_threads(); 2679 2680 satb_mq_set.set_closure(NULL); 2681 assert(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2682 } 2683 2684 void ConcurrentMark::markPrev(oop p) { 2685 // Note we are overriding the read-only view of the prev map here, via 2686 // the cast. 2687 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p); 2688 } 2689 2690 void ConcurrentMark::clear(oop p) { 2691 assert(p != NULL && p->is_oop(), "expected an oop"); 2692 HeapWord* addr = (HeapWord*)p; 2693 assert(addr >= _nextMarkBitMap->startWord() || 2694 addr < _nextMarkBitMap->endWord(), "in a region"); 2695 2696 _nextMarkBitMap->clear(addr); 2697 } 2698 2699 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) { 2700 // Note we are overriding the read-only view of the prev map here, via 2701 // the cast. 2702 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2703 _nextMarkBitMap->clearRange(mr); 2704 } 2705 2706 HeapRegion* 2707 ConcurrentMark::claim_region(int task_num) { 2708 // "checkpoint" the finger 2709 HeapWord* finger = _finger; 2710 2711 // _heap_end will not change underneath our feet; it only changes at 2712 // yield points. 2713 while (finger < _heap_end) { 2714 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2715 2716 // Note on how this code handles humongous regions. In the 2717 // normal case the finger will reach the start of a "starts 2718 // humongous" (SH) region. Its end will either be the end of the 2719 // last "continues humongous" (CH) region in the sequence, or the 2720 // standard end of the SH region (if the SH is the only region in 2721 // the sequence). That way claim_region() will skip over the CH 2722 // regions. However, there is a subtle race between a CM thread 2723 // executing this method and a mutator thread doing a humongous 2724 // object allocation. The two are not mutually exclusive as the CM 2725 // thread does not need to hold the Heap_lock when it gets 2726 // here. So there is a chance that claim_region() will come across 2727 // a free region that's in the progress of becoming a SH or a CH 2728 // region. In the former case, it will either 2729 // a) Miss the update to the region's end, in which case it will 2730 // visit every subsequent CH region, will find their bitmaps 2731 // empty, and do nothing, or 2732 // b) Will observe the update of the region's end (in which case 2733 // it will skip the subsequent CH regions). 2734 // If it comes across a region that suddenly becomes CH, the 2735 // scenario will be similar to b). So, the race between 2736 // claim_region() and a humongous object allocation might force us 2737 // to do a bit of unnecessary work (due to some unnecessary bitmap 2738 // iterations) but it should not introduce and correctness issues. 2739 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2740 HeapWord* bottom = curr_region->bottom(); 2741 HeapWord* end = curr_region->end(); 2742 HeapWord* limit = curr_region->next_top_at_mark_start(); 2743 2744 if (verbose_low()) { 2745 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 2746 "["PTR_FORMAT", "PTR_FORMAT"), " 2747 "limit = "PTR_FORMAT, 2748 task_num, curr_region, bottom, end, limit); 2749 } 2750 2751 // Is the gap between reading the finger and doing the CAS too long? 2752 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2753 if (res == finger) { 2754 // we succeeded 2755 2756 // notice that _finger == end cannot be guaranteed here since, 2757 // someone else might have moved the finger even further 2758 assert(_finger >= end, "the finger should have moved forward"); 2759 2760 if (verbose_low()) { 2761 gclog_or_tty->print_cr("[%d] we were successful with region = " 2762 PTR_FORMAT, task_num, curr_region); 2763 } 2764 2765 if (limit > bottom) { 2766 if (verbose_low()) { 2767 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 2768 "returning it ", task_num, curr_region); 2769 } 2770 return curr_region; 2771 } else { 2772 assert(limit == bottom, 2773 "the region limit should be at bottom"); 2774 if (verbose_low()) { 2775 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 2776 "returning NULL", task_num, curr_region); 2777 } 2778 // we return NULL and the caller should try calling 2779 // claim_region() again. 2780 return NULL; 2781 } 2782 } else { 2783 assert(_finger > finger, "the finger should have moved forward"); 2784 if (verbose_low()) { 2785 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 2786 "global finger = "PTR_FORMAT", " 2787 "our finger = "PTR_FORMAT, 2788 task_num, _finger, finger); 2789 } 2790 2791 // read it again 2792 finger = _finger; 2793 } 2794 } 2795 2796 return NULL; 2797 } 2798 2799 bool ConcurrentMark::invalidate_aborted_regions_in_cset() { 2800 bool result = false; 2801 for (int i = 0; i < (int)_max_task_num; ++i) { 2802 CMTask* the_task = _tasks[i]; 2803 MemRegion mr = the_task->aborted_region(); 2804 if (mr.start() != NULL) { 2805 assert(mr.end() != NULL, "invariant"); 2806 assert(mr.word_size() > 0, "invariant"); 2807 HeapRegion* hr = _g1h->heap_region_containing(mr.start()); 2808 assert(hr != NULL, "invariant"); 2809 if (hr->in_collection_set()) { 2810 // The region points into the collection set 2811 the_task->set_aborted_region(MemRegion()); 2812 result = true; 2813 } 2814 } 2815 } 2816 return result; 2817 } 2818 2819 bool ConcurrentMark::has_aborted_regions() { 2820 for (int i = 0; i < (int)_max_task_num; ++i) { 2821 CMTask* the_task = _tasks[i]; 2822 MemRegion mr = the_task->aborted_region(); 2823 if (mr.start() != NULL) { 2824 assert(mr.end() != NULL, "invariant"); 2825 assert(mr.word_size() > 0, "invariant"); 2826 return true; 2827 } 2828 } 2829 return false; 2830 } 2831 2832 void ConcurrentMark::oops_do(OopClosure* cl) { 2833 if (_markStack.size() > 0 && verbose_low()) { 2834 gclog_or_tty->print_cr("[global] scanning the global marking stack, " 2835 "size = %d", _markStack.size()); 2836 } 2837 // we first iterate over the contents of the mark stack... 2838 _markStack.oops_do(cl); 2839 2840 for (int i = 0; i < (int)_max_task_num; ++i) { 2841 OopTaskQueue* queue = _task_queues->queue((int)i); 2842 2843 if (queue->size() > 0 && verbose_low()) { 2844 gclog_or_tty->print_cr("[global] scanning task queue of task %d, " 2845 "size = %d", i, queue->size()); 2846 } 2847 2848 // ...then over the contents of the all the task queues. 2849 queue->oops_do(cl); 2850 } 2851 2852 // Invalidate any entries, that are in the region stack, that 2853 // point into the collection set 2854 if (_regionStack.invalidate_entries_into_cset()) { 2855 // otherwise, any gray objects copied during the evacuation pause 2856 // might not be visited. 2857 assert(_should_gray_objects, "invariant"); 2858 } 2859 2860 // Invalidate any aborted regions, recorded in the individual CM 2861 // tasks, that point into the collection set. 2862 if (invalidate_aborted_regions_in_cset()) { 2863 // otherwise, any gray objects copied during the evacuation pause 2864 // might not be visited. 2865 assert(_should_gray_objects, "invariant"); 2866 } 2867 2868 } 2869 2870 void ConcurrentMark::clear_marking_state(bool clear_overflow) { 2871 _markStack.setEmpty(); 2872 _markStack.clear_overflow(); 2873 _regionStack.setEmpty(); 2874 _regionStack.clear_overflow(); 2875 if (clear_overflow) { 2876 clear_has_overflown(); 2877 } else { 2878 assert(has_overflown(), "pre-condition"); 2879 } 2880 _finger = _heap_start; 2881 2882 for (int i = 0; i < (int)_max_task_num; ++i) { 2883 OopTaskQueue* queue = _task_queues->queue(i); 2884 queue->set_empty(); 2885 // Clear any partial regions from the CMTasks 2886 _tasks[i]->clear_aborted_region(); 2887 } 2888 } 2889 2890 void ConcurrentMark::print_stats() { 2891 if (verbose_stats()) { 2892 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2893 for (size_t i = 0; i < _active_tasks; ++i) { 2894 _tasks[i]->print_stats(); 2895 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2896 } 2897 } 2898 } 2899 2900 // Closures used by ConcurrentMark::complete_marking_in_collection_set(). 2901 2902 class CSetMarkOopClosure: public OopClosure { 2903 friend class CSetMarkBitMapClosure; 2904 2905 G1CollectedHeap* _g1h; 2906 CMBitMap* _bm; 2907 ConcurrentMark* _cm; 2908 oop* _ms; 2909 jint* _array_ind_stack; 2910 int _ms_size; 2911 int _ms_ind; 2912 int _array_increment; 2913 int _worker_i; 2914 2915 bool push(oop obj, int arr_ind = 0) { 2916 if (_ms_ind == _ms_size) { 2917 gclog_or_tty->print_cr("Mark stack is full."); 2918 return false; 2919 } 2920 _ms[_ms_ind] = obj; 2921 if (obj->is_objArray()) { 2922 _array_ind_stack[_ms_ind] = arr_ind; 2923 } 2924 _ms_ind++; 2925 return true; 2926 } 2927 2928 oop pop() { 2929 if (_ms_ind == 0) { 2930 return NULL; 2931 } else { 2932 _ms_ind--; 2933 return _ms[_ms_ind]; 2934 } 2935 } 2936 2937 template <class T> bool drain() { 2938 while (_ms_ind > 0) { 2939 oop obj = pop(); 2940 assert(obj != NULL, "Since index was non-zero."); 2941 if (obj->is_objArray()) { 2942 jint arr_ind = _array_ind_stack[_ms_ind]; 2943 objArrayOop aobj = objArrayOop(obj); 2944 jint len = aobj->length(); 2945 jint next_arr_ind = arr_ind + _array_increment; 2946 if (next_arr_ind < len) { 2947 push(obj, next_arr_ind); 2948 } 2949 // Now process this portion of this one. 2950 int lim = MIN2(next_arr_ind, len); 2951 for (int j = arr_ind; j < lim; j++) { 2952 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j)); 2953 } 2954 } else { 2955 obj->oop_iterate(this); 2956 } 2957 if (abort()) return false; 2958 } 2959 return true; 2960 } 2961 2962 public: 2963 CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, int worker_i) : 2964 _g1h(G1CollectedHeap::heap()), 2965 _cm(cm), 2966 _bm(cm->nextMarkBitMap()), 2967 _ms_size(ms_size), _ms_ind(0), 2968 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)), 2969 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)), 2970 _array_increment(MAX2(ms_size/8, 16)), 2971 _worker_i(worker_i) { } 2972 2973 ~CSetMarkOopClosure() { 2974 FREE_C_HEAP_ARRAY(oop, _ms); 2975 FREE_C_HEAP_ARRAY(jint, _array_ind_stack); 2976 } 2977 2978 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2979 virtual void do_oop( oop* p) { do_oop_work(p); } 2980 2981 template <class T> void do_oop_work(T* p) { 2982 T heap_oop = oopDesc::load_heap_oop(p); 2983 if (oopDesc::is_null(heap_oop)) return; 2984 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); 2985 if (obj->is_forwarded()) { 2986 // If the object has already been forwarded, we have to make sure 2987 // that it's marked. So follow the forwarding pointer. Note that 2988 // this does the right thing for self-forwarding pointers in the 2989 // evacuation failure case. 2990 obj = obj->forwardee(); 2991 } 2992 HeapRegion* hr = _g1h->heap_region_containing(obj); 2993 if (hr != NULL) { 2994 if (hr->in_collection_set()) { 2995 if (_g1h->is_obj_ill(obj)) { 2996 if (_bm->parMark((HeapWord*)obj)) { 2997 if (!push(obj)) { 2998 gclog_or_tty->print_cr("Setting abort in CSetMarkOopClosure because push failed."); 2999 set_abort(); 3000 } 3001 } 3002 } 3003 } else { 3004 // Outside the collection set; we need to gray it 3005 _cm->deal_with_reference(obj); 3006 } 3007 } 3008 } 3009 }; 3010 3011 class CSetMarkBitMapClosure: public BitMapClosure { 3012 G1CollectedHeap* _g1h; 3013 CMBitMap* _bitMap; 3014 ConcurrentMark* _cm; 3015 CSetMarkOopClosure _oop_cl; 3016 int _worker_i; 3017 3018 public: 3019 CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_i) : 3020 _g1h(G1CollectedHeap::heap()), 3021 _bitMap(cm->nextMarkBitMap()), 3022 _oop_cl(cm, ms_size, worker_i), 3023 _worker_i(worker_i) { } 3024 3025 bool do_bit(size_t offset) { 3026 // convert offset into a HeapWord* 3027 HeapWord* addr = _bitMap->offsetToHeapWord(offset); 3028 assert(_bitMap->endWord() && addr < _bitMap->endWord(), 3029 "address out of range"); 3030 assert(_bitMap->isMarked(addr), "tautology"); 3031 oop obj = oop(addr); 3032 if (!obj->is_forwarded()) { 3033 if (!_oop_cl.push(obj)) return false; 3034 if (UseCompressedOops) { 3035 if (!_oop_cl.drain<narrowOop>()) return false; 3036 } else { 3037 if (!_oop_cl.drain<oop>()) return false; 3038 } 3039 } 3040 // Otherwise... 3041 return true; 3042 } 3043 }; 3044 3045 class CompleteMarkingInCSetHRClosure: public HeapRegionClosure { 3046 CMBitMap* _bm; 3047 CSetMarkBitMapClosure _bit_cl; 3048 int _worker_i; 3049 3050 enum SomePrivateConstants { 3051 MSSize = 1000 3052 }; 3053 3054 public: 3055 CompleteMarkingInCSetHRClosure(ConcurrentMark* cm, int worker_i) : 3056 _bm(cm->nextMarkBitMap()), 3057 _bit_cl(cm, MSSize, worker_i), 3058 _worker_i(worker_i) { } 3059 3060 bool doHeapRegion(HeapRegion* hr) { 3061 if (hr->claimHeapRegion(HeapRegion::CompleteMarkCSetClaimValue)) { 3062 // The current worker has successfully claimed the region. 3063 if (!hr->evacuation_failed()) { 3064 MemRegion mr = MemRegion(hr->bottom(), hr->next_top_at_mark_start()); 3065 if (!mr.is_empty()) { 3066 bool done = false; 3067 while (!done) { 3068 done = _bm->iterate(&_bit_cl, mr); 3069 } 3070 } 3071 } 3072 } 3073 return false; 3074 } 3075 }; 3076 3077 class SetClaimValuesInCSetHRClosure: public HeapRegionClosure { 3078 jint _claim_value; 3079 3080 public: 3081 SetClaimValuesInCSetHRClosure(jint claim_value) : 3082 _claim_value(claim_value) { } 3083 3084 bool doHeapRegion(HeapRegion* hr) { 3085 hr->set_claim_value(_claim_value); 3086 return false; 3087 } 3088 }; 3089 3090 class G1ParCompleteMarkInCSetTask: public AbstractGangTask { 3091 protected: 3092 G1CollectedHeap* _g1h; 3093 ConcurrentMark* _cm; 3094 3095 public: 3096 G1ParCompleteMarkInCSetTask(G1CollectedHeap* g1h, 3097 ConcurrentMark* cm) : 3098 AbstractGangTask("Complete Mark in CSet"), 3099 _g1h(g1h), _cm(cm) { } 3100 3101 void work(int worker_i) { 3102 CompleteMarkingInCSetHRClosure cmplt(_cm, worker_i); 3103 HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_i); 3104 _g1h->collection_set_iterate_from(hr, &cmplt); 3105 } 3106 }; 3107 3108 void ConcurrentMark::complete_marking_in_collection_set() { 3109 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 3110 3111 if (!g1h->mark_in_progress()) { 3112 g1h->g1_policy()->record_mark_closure_time(0.0); 3113 return; 3114 } 3115 3116 double start = os::elapsedTime(); 3117 G1ParCompleteMarkInCSetTask complete_mark_task(g1h, this); 3118 3119 assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity"); 3120 3121 if (G1CollectedHeap::use_parallel_gc_threads()) { 3122 int n_workers = g1h->workers()->active_workers(); 3123 g1h->set_par_threads(n_workers); 3124 g1h->workers()->run_task(&complete_mark_task); 3125 g1h->set_par_threads(0); 3126 } else { 3127 complete_mark_task.work(0); 3128 } 3129 3130 assert(g1h->check_cset_heap_region_claim_values(HeapRegion::CompleteMarkCSetClaimValue), "sanity"); 3131 3132 // Now reset the claim values in the regions in the collection set. 3133 SetClaimValuesInCSetHRClosure set_cv_cl(HeapRegion::InitialClaimValue); 3134 g1h->collection_set_iterate(&set_cv_cl); 3135 3136 assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity"); 3137 3138 double end_time = os::elapsedTime(); 3139 double elapsed_time_ms = (end_time - start) * 1000.0; 3140 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms); 3141 } 3142 3143 // The next two methods deal with the following optimisation. Some 3144 // objects are gray by being marked and located above the finger. If 3145 // they are copied, during an evacuation pause, below the finger then 3146 // the need to be pushed on the stack. The observation is that, if 3147 // there are no regions in the collection set located above the 3148 // finger, then the above cannot happen, hence we do not need to 3149 // explicitly gray any objects when copying them to below the 3150 // finger. The global stack will be scanned to ensure that, if it 3151 // points to objects being copied, it will update their 3152 // location. There is a tricky situation with the gray objects in 3153 // region stack that are being coped, however. See the comment in 3154 // newCSet(). 3155 3156 void ConcurrentMark::newCSet() { 3157 if (!concurrent_marking_in_progress()) { 3158 // nothing to do if marking is not in progress 3159 return; 3160 } 3161 3162 // find what the lowest finger is among the global and local fingers 3163 _min_finger = _finger; 3164 for (int i = 0; i < (int)_max_task_num; ++i) { 3165 CMTask* task = _tasks[i]; 3166 HeapWord* task_finger = task->finger(); 3167 if (task_finger != NULL && task_finger < _min_finger) { 3168 _min_finger = task_finger; 3169 } 3170 } 3171 3172 _should_gray_objects = false; 3173 3174 // This fixes a very subtle and fustrating bug. It might be the case 3175 // that, during en evacuation pause, heap regions that contain 3176 // objects that are gray (by being in regions contained in the 3177 // region stack) are included in the collection set. Since such gray 3178 // objects will be moved, and because it's not easy to redirect 3179 // region stack entries to point to a new location (because objects 3180 // in one region might be scattered to multiple regions after they 3181 // are copied), one option is to ensure that all marked objects 3182 // copied during a pause are pushed on the stack. Notice, however, 3183 // that this problem can only happen when the region stack is not 3184 // empty during an evacuation pause. So, we make the fix a bit less 3185 // conservative and ensure that regions are pushed on the stack, 3186 // irrespective whether all collection set regions are below the 3187 // finger, if the region stack is not empty. This is expected to be 3188 // a rare case, so I don't think it's necessary to be smarted about it. 3189 if (!region_stack_empty() || has_aborted_regions()) { 3190 _should_gray_objects = true; 3191 } 3192 } 3193 3194 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) { 3195 if (!concurrent_marking_in_progress()) return; 3196 3197 HeapWord* region_end = hr->end(); 3198 if (region_end > _min_finger) { 3199 _should_gray_objects = true; 3200 } 3201 } 3202 3203 // Resets the region fields of active CMTasks whose values point 3204 // into the collection set. 3205 void ConcurrentMark::reset_active_task_region_fields_in_cset() { 3206 assert(SafepointSynchronize::is_at_safepoint(), "should be in STW"); 3207 assert(parallel_marking_threads() <= _max_task_num, "sanity"); 3208 3209 for (int i = 0; i < (int)parallel_marking_threads(); i += 1) { 3210 CMTask* task = _tasks[i]; 3211 HeapWord* task_finger = task->finger(); 3212 if (task_finger != NULL) { 3213 assert(_g1h->is_in_g1_reserved(task_finger), "not in heap"); 3214 HeapRegion* finger_region = _g1h->heap_region_containing(task_finger); 3215 if (finger_region->in_collection_set()) { 3216 // The task's current region is in the collection set. 3217 // This region will be evacuated in the current GC and 3218 // the region fields in the task will be stale. 3219 task->giveup_current_region(); 3220 } 3221 } 3222 } 3223 } 3224 3225 // abandon current marking iteration due to a Full GC 3226 void ConcurrentMark::abort() { 3227 // Clear all marks to force marking thread to do nothing 3228 _nextMarkBitMap->clearAll(); 3229 // Empty mark stack 3230 clear_marking_state(); 3231 for (int i = 0; i < (int)_max_task_num; ++i) { 3232 _tasks[i]->clear_region_fields(); 3233 } 3234 _has_aborted = true; 3235 3236 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3237 satb_mq_set.abandon_partial_marking(); 3238 // This can be called either during or outside marking, we'll read 3239 // the expected_active value from the SATB queue set. 3240 satb_mq_set.set_active_all_threads( 3241 false, /* new active value */ 3242 satb_mq_set.is_active() /* expected_active */); 3243 } 3244 3245 static void print_ms_time_info(const char* prefix, const char* name, 3246 NumberSeq& ns) { 3247 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3248 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3249 if (ns.num() > 0) { 3250 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3251 prefix, ns.sd(), ns.maximum()); 3252 } 3253 } 3254 3255 void ConcurrentMark::print_summary_info() { 3256 gclog_or_tty->print_cr(" Concurrent marking:"); 3257 print_ms_time_info(" ", "init marks", _init_times); 3258 print_ms_time_info(" ", "remarks", _remark_times); 3259 { 3260 print_ms_time_info(" ", "final marks", _remark_mark_times); 3261 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3262 3263 } 3264 print_ms_time_info(" ", "cleanups", _cleanup_times); 3265 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3266 _total_counting_time, 3267 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3268 (double)_cleanup_times.num() 3269 : 0.0)); 3270 if (G1ScrubRemSets) { 3271 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3272 _total_rs_scrub_time, 3273 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3274 (double)_cleanup_times.num() 3275 : 0.0)); 3276 } 3277 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3278 (_init_times.sum() + _remark_times.sum() + 3279 _cleanup_times.sum())/1000.0); 3280 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3281 "(%8.2f s marking, %8.2f s counting).", 3282 cmThread()->vtime_accum(), 3283 cmThread()->vtime_mark_accum(), 3284 cmThread()->vtime_count_accum()); 3285 } 3286 3287 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3288 _parallel_workers->print_worker_threads_on(st); 3289 } 3290 3291 // Closures 3292 // XXX: there seems to be a lot of code duplication here; 3293 // should refactor and consolidate the shared code. 3294 3295 // This closure is used to mark refs into the CMS generation in 3296 // the CMS bit map. Called at the first checkpoint. 3297 3298 // We take a break if someone is trying to stop the world. 3299 bool ConcurrentMark::do_yield_check(int worker_i) { 3300 if (should_yield()) { 3301 if (worker_i == 0) { 3302 _g1h->g1_policy()->record_concurrent_pause(); 3303 } 3304 cmThread()->yield(); 3305 if (worker_i == 0) { 3306 _g1h->g1_policy()->record_concurrent_pause_end(); 3307 } 3308 return true; 3309 } else { 3310 return false; 3311 } 3312 } 3313 3314 bool ConcurrentMark::should_yield() { 3315 return cmThread()->should_yield(); 3316 } 3317 3318 bool ConcurrentMark::containing_card_is_marked(void* p) { 3319 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3320 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3321 } 3322 3323 bool ConcurrentMark::containing_cards_are_marked(void* start, 3324 void* last) { 3325 return containing_card_is_marked(start) && 3326 containing_card_is_marked(last); 3327 } 3328 3329 #ifndef PRODUCT 3330 // for debugging purposes 3331 void ConcurrentMark::print_finger() { 3332 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3333 _heap_start, _heap_end, _finger); 3334 for (int i = 0; i < (int) _max_task_num; ++i) { 3335 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 3336 } 3337 gclog_or_tty->print_cr(""); 3338 } 3339 #endif 3340 3341 void CMTask::scan_object(oop obj) { 3342 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3343 3344 if (_cm->verbose_high()) { 3345 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT, 3346 _task_id, (void*) obj); 3347 } 3348 3349 size_t obj_size = obj->size(); 3350 _words_scanned += obj_size; 3351 3352 obj->oop_iterate(_cm_oop_closure); 3353 statsOnly( ++_objs_scanned ); 3354 check_limits(); 3355 } 3356 3357 // Closure for iteration over bitmaps 3358 class CMBitMapClosure : public BitMapClosure { 3359 private: 3360 // the bitmap that is being iterated over 3361 CMBitMap* _nextMarkBitMap; 3362 ConcurrentMark* _cm; 3363 CMTask* _task; 3364 // true if we're scanning a heap region claimed by the task (so that 3365 // we move the finger along), false if we're not, i.e. currently when 3366 // scanning a heap region popped from the region stack (so that we 3367 // do not move the task finger along; it'd be a mistake if we did so). 3368 bool _scanning_heap_region; 3369 3370 public: 3371 CMBitMapClosure(CMTask *task, 3372 ConcurrentMark* cm, 3373 CMBitMap* nextMarkBitMap) 3374 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3375 3376 void set_scanning_heap_region(bool scanning_heap_region) { 3377 _scanning_heap_region = scanning_heap_region; 3378 } 3379 3380 bool do_bit(size_t offset) { 3381 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3382 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3383 assert( addr < _cm->finger(), "invariant"); 3384 3385 if (_scanning_heap_region) { 3386 statsOnly( _task->increase_objs_found_on_bitmap() ); 3387 assert(addr >= _task->finger(), "invariant"); 3388 // We move that task's local finger along. 3389 _task->move_finger_to(addr); 3390 } else { 3391 // We move the task's region finger along. 3392 _task->move_region_finger_to(addr); 3393 } 3394 3395 _task->scan_object(oop(addr)); 3396 // we only partially drain the local queue and global stack 3397 _task->drain_local_queue(true); 3398 _task->drain_global_stack(true); 3399 3400 // if the has_aborted flag has been raised, we need to bail out of 3401 // the iteration 3402 return !_task->has_aborted(); 3403 } 3404 }; 3405 3406 // Closure for iterating over objects, currently only used for 3407 // processing SATB buffers. 3408 class CMObjectClosure : public ObjectClosure { 3409 private: 3410 CMTask* _task; 3411 3412 public: 3413 void do_object(oop obj) { 3414 _task->deal_with_reference(obj); 3415 } 3416 3417 CMObjectClosure(CMTask* task) : _task(task) { } 3418 }; 3419 3420 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3421 ConcurrentMark* cm, 3422 CMTask* task) 3423 : _g1h(g1h), _cm(cm), _task(task) { 3424 assert(_ref_processor == NULL, "should be initialized to NULL"); 3425 3426 if (G1UseConcMarkReferenceProcessing) { 3427 _ref_processor = g1h->ref_processor_cm(); 3428 assert(_ref_processor != NULL, "should not be NULL"); 3429 } 3430 } 3431 3432 void CMTask::setup_for_region(HeapRegion* hr) { 3433 // Separated the asserts so that we know which one fires. 3434 assert(hr != NULL, 3435 "claim_region() should have filtered out continues humongous regions"); 3436 assert(!hr->continuesHumongous(), 3437 "claim_region() should have filtered out continues humongous regions"); 3438 3439 if (_cm->verbose_low()) { 3440 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 3441 _task_id, hr); 3442 } 3443 3444 _curr_region = hr; 3445 _finger = hr->bottom(); 3446 update_region_limit(); 3447 } 3448 3449 void CMTask::update_region_limit() { 3450 HeapRegion* hr = _curr_region; 3451 HeapWord* bottom = hr->bottom(); 3452 HeapWord* limit = hr->next_top_at_mark_start(); 3453 3454 if (limit == bottom) { 3455 if (_cm->verbose_low()) { 3456 gclog_or_tty->print_cr("[%d] found an empty region " 3457 "["PTR_FORMAT", "PTR_FORMAT")", 3458 _task_id, bottom, limit); 3459 } 3460 // The region was collected underneath our feet. 3461 // We set the finger to bottom to ensure that the bitmap 3462 // iteration that will follow this will not do anything. 3463 // (this is not a condition that holds when we set the region up, 3464 // as the region is not supposed to be empty in the first place) 3465 _finger = bottom; 3466 } else if (limit >= _region_limit) { 3467 assert(limit >= _finger, "peace of mind"); 3468 } else { 3469 assert(limit < _region_limit, "only way to get here"); 3470 // This can happen under some pretty unusual circumstances. An 3471 // evacuation pause empties the region underneath our feet (NTAMS 3472 // at bottom). We then do some allocation in the region (NTAMS 3473 // stays at bottom), followed by the region being used as a GC 3474 // alloc region (NTAMS will move to top() and the objects 3475 // originally below it will be grayed). All objects now marked in 3476 // the region are explicitly grayed, if below the global finger, 3477 // and we do not need in fact to scan anything else. So, we simply 3478 // set _finger to be limit to ensure that the bitmap iteration 3479 // doesn't do anything. 3480 _finger = limit; 3481 } 3482 3483 _region_limit = limit; 3484 } 3485 3486 void CMTask::giveup_current_region() { 3487 assert(_curr_region != NULL, "invariant"); 3488 if (_cm->verbose_low()) { 3489 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 3490 _task_id, _curr_region); 3491 } 3492 clear_region_fields(); 3493 } 3494 3495 void CMTask::clear_region_fields() { 3496 // Values for these three fields that indicate that we're not 3497 // holding on to a region. 3498 _curr_region = NULL; 3499 _finger = NULL; 3500 _region_limit = NULL; 3501 3502 _region_finger = NULL; 3503 } 3504 3505 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3506 if (cm_oop_closure == NULL) { 3507 assert(_cm_oop_closure != NULL, "invariant"); 3508 } else { 3509 assert(_cm_oop_closure == NULL, "invariant"); 3510 } 3511 _cm_oop_closure = cm_oop_closure; 3512 } 3513 3514 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3515 guarantee(nextMarkBitMap != NULL, "invariant"); 3516 3517 if (_cm->verbose_low()) { 3518 gclog_or_tty->print_cr("[%d] resetting", _task_id); 3519 } 3520 3521 _nextMarkBitMap = nextMarkBitMap; 3522 clear_region_fields(); 3523 assert(_aborted_region.is_empty(), "should have been cleared"); 3524 3525 _calls = 0; 3526 _elapsed_time_ms = 0.0; 3527 _termination_time_ms = 0.0; 3528 _termination_start_time_ms = 0.0; 3529 3530 #if _MARKING_STATS_ 3531 _local_pushes = 0; 3532 _local_pops = 0; 3533 _local_max_size = 0; 3534 _objs_scanned = 0; 3535 _global_pushes = 0; 3536 _global_pops = 0; 3537 _global_max_size = 0; 3538 _global_transfers_to = 0; 3539 _global_transfers_from = 0; 3540 _region_stack_pops = 0; 3541 _regions_claimed = 0; 3542 _objs_found_on_bitmap = 0; 3543 _satb_buffers_processed = 0; 3544 _steal_attempts = 0; 3545 _steals = 0; 3546 _aborted = 0; 3547 _aborted_overflow = 0; 3548 _aborted_cm_aborted = 0; 3549 _aborted_yield = 0; 3550 _aborted_timed_out = 0; 3551 _aborted_satb = 0; 3552 _aborted_termination = 0; 3553 #endif // _MARKING_STATS_ 3554 } 3555 3556 bool CMTask::should_exit_termination() { 3557 regular_clock_call(); 3558 // This is called when we are in the termination protocol. We should 3559 // quit if, for some reason, this task wants to abort or the global 3560 // stack is not empty (this means that we can get work from it). 3561 return !_cm->mark_stack_empty() || has_aborted(); 3562 } 3563 3564 void CMTask::reached_limit() { 3565 assert(_words_scanned >= _words_scanned_limit || 3566 _refs_reached >= _refs_reached_limit , 3567 "shouldn't have been called otherwise"); 3568 regular_clock_call(); 3569 } 3570 3571 void CMTask::regular_clock_call() { 3572 if (has_aborted()) return; 3573 3574 // First, we need to recalculate the words scanned and refs reached 3575 // limits for the next clock call. 3576 recalculate_limits(); 3577 3578 // During the regular clock call we do the following 3579 3580 // (1) If an overflow has been flagged, then we abort. 3581 if (_cm->has_overflown()) { 3582 set_has_aborted(); 3583 return; 3584 } 3585 3586 // If we are not concurrent (i.e. we're doing remark) we don't need 3587 // to check anything else. The other steps are only needed during 3588 // the concurrent marking phase. 3589 if (!concurrent()) return; 3590 3591 // (2) If marking has been aborted for Full GC, then we also abort. 3592 if (_cm->has_aborted()) { 3593 set_has_aborted(); 3594 statsOnly( ++_aborted_cm_aborted ); 3595 return; 3596 } 3597 3598 double curr_time_ms = os::elapsedVTime() * 1000.0; 3599 3600 // (3) If marking stats are enabled, then we update the step history. 3601 #if _MARKING_STATS_ 3602 if (_words_scanned >= _words_scanned_limit) { 3603 ++_clock_due_to_scanning; 3604 } 3605 if (_refs_reached >= _refs_reached_limit) { 3606 ++_clock_due_to_marking; 3607 } 3608 3609 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3610 _interval_start_time_ms = curr_time_ms; 3611 _all_clock_intervals_ms.add(last_interval_ms); 3612 3613 if (_cm->verbose_medium()) { 3614 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 3615 "scanned = %d%s, refs reached = %d%s", 3616 _task_id, last_interval_ms, 3617 _words_scanned, 3618 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3619 _refs_reached, 3620 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3621 } 3622 #endif // _MARKING_STATS_ 3623 3624 // (4) We check whether we should yield. If we have to, then we abort. 3625 if (_cm->should_yield()) { 3626 // We should yield. To do this we abort the task. The caller is 3627 // responsible for yielding. 3628 set_has_aborted(); 3629 statsOnly( ++_aborted_yield ); 3630 return; 3631 } 3632 3633 // (5) We check whether we've reached our time quota. If we have, 3634 // then we abort. 3635 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3636 if (elapsed_time_ms > _time_target_ms) { 3637 set_has_aborted(); 3638 _has_timed_out = true; 3639 statsOnly( ++_aborted_timed_out ); 3640 return; 3641 } 3642 3643 // (6) Finally, we check whether there are enough completed STAB 3644 // buffers available for processing. If there are, we abort. 3645 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3646 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3647 if (_cm->verbose_low()) { 3648 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 3649 _task_id); 3650 } 3651 // we do need to process SATB buffers, we'll abort and restart 3652 // the marking task to do so 3653 set_has_aborted(); 3654 statsOnly( ++_aborted_satb ); 3655 return; 3656 } 3657 } 3658 3659 void CMTask::recalculate_limits() { 3660 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3661 _words_scanned_limit = _real_words_scanned_limit; 3662 3663 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3664 _refs_reached_limit = _real_refs_reached_limit; 3665 } 3666 3667 void CMTask::decrease_limits() { 3668 // This is called when we believe that we're going to do an infrequent 3669 // operation which will increase the per byte scanned cost (i.e. move 3670 // entries to/from the global stack). It basically tries to decrease the 3671 // scanning limit so that the clock is called earlier. 3672 3673 if (_cm->verbose_medium()) { 3674 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 3675 } 3676 3677 _words_scanned_limit = _real_words_scanned_limit - 3678 3 * words_scanned_period / 4; 3679 _refs_reached_limit = _real_refs_reached_limit - 3680 3 * refs_reached_period / 4; 3681 } 3682 3683 void CMTask::move_entries_to_global_stack() { 3684 // local array where we'll store the entries that will be popped 3685 // from the local queue 3686 oop buffer[global_stack_transfer_size]; 3687 3688 int n = 0; 3689 oop obj; 3690 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3691 buffer[n] = obj; 3692 ++n; 3693 } 3694 3695 if (n > 0) { 3696 // we popped at least one entry from the local queue 3697 3698 statsOnly( ++_global_transfers_to; _local_pops += n ); 3699 3700 if (!_cm->mark_stack_push(buffer, n)) { 3701 if (_cm->verbose_low()) { 3702 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", 3703 _task_id); 3704 } 3705 set_has_aborted(); 3706 } else { 3707 // the transfer was successful 3708 3709 if (_cm->verbose_medium()) { 3710 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 3711 _task_id, n); 3712 } 3713 statsOnly( int tmp_size = _cm->mark_stack_size(); 3714 if (tmp_size > _global_max_size) { 3715 _global_max_size = tmp_size; 3716 } 3717 _global_pushes += n ); 3718 } 3719 } 3720 3721 // this operation was quite expensive, so decrease the limits 3722 decrease_limits(); 3723 } 3724 3725 void CMTask::get_entries_from_global_stack() { 3726 // local array where we'll store the entries that will be popped 3727 // from the global stack. 3728 oop buffer[global_stack_transfer_size]; 3729 int n; 3730 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3731 assert(n <= global_stack_transfer_size, 3732 "we should not pop more than the given limit"); 3733 if (n > 0) { 3734 // yes, we did actually pop at least one entry 3735 3736 statsOnly( ++_global_transfers_from; _global_pops += n ); 3737 if (_cm->verbose_medium()) { 3738 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 3739 _task_id, n); 3740 } 3741 for (int i = 0; i < n; ++i) { 3742 bool success = _task_queue->push(buffer[i]); 3743 // We only call this when the local queue is empty or under a 3744 // given target limit. So, we do not expect this push to fail. 3745 assert(success, "invariant"); 3746 } 3747 3748 statsOnly( int tmp_size = _task_queue->size(); 3749 if (tmp_size > _local_max_size) { 3750 _local_max_size = tmp_size; 3751 } 3752 _local_pushes += n ); 3753 } 3754 3755 // this operation was quite expensive, so decrease the limits 3756 decrease_limits(); 3757 } 3758 3759 void CMTask::drain_local_queue(bool partially) { 3760 if (has_aborted()) return; 3761 3762 // Decide what the target size is, depending whether we're going to 3763 // drain it partially (so that other tasks can steal if they run out 3764 // of things to do) or totally (at the very end). 3765 size_t target_size; 3766 if (partially) { 3767 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3768 } else { 3769 target_size = 0; 3770 } 3771 3772 if (_task_queue->size() > target_size) { 3773 if (_cm->verbose_high()) { 3774 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 3775 _task_id, target_size); 3776 } 3777 3778 oop obj; 3779 bool ret = _task_queue->pop_local(obj); 3780 while (ret) { 3781 statsOnly( ++_local_pops ); 3782 3783 if (_cm->verbose_high()) { 3784 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 3785 (void*) obj); 3786 } 3787 3788 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3789 assert(!_g1h->is_on_master_free_list( 3790 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3791 3792 scan_object(obj); 3793 3794 if (_task_queue->size() <= target_size || has_aborted()) { 3795 ret = false; 3796 } else { 3797 ret = _task_queue->pop_local(obj); 3798 } 3799 } 3800 3801 if (_cm->verbose_high()) { 3802 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 3803 _task_id, _task_queue->size()); 3804 } 3805 } 3806 } 3807 3808 void CMTask::drain_global_stack(bool partially) { 3809 if (has_aborted()) return; 3810 3811 // We have a policy to drain the local queue before we attempt to 3812 // drain the global stack. 3813 assert(partially || _task_queue->size() == 0, "invariant"); 3814 3815 // Decide what the target size is, depending whether we're going to 3816 // drain it partially (so that other tasks can steal if they run out 3817 // of things to do) or totally (at the very end). Notice that, 3818 // because we move entries from the global stack in chunks or 3819 // because another task might be doing the same, we might in fact 3820 // drop below the target. But, this is not a problem. 3821 size_t target_size; 3822 if (partially) { 3823 target_size = _cm->partial_mark_stack_size_target(); 3824 } else { 3825 target_size = 0; 3826 } 3827 3828 if (_cm->mark_stack_size() > target_size) { 3829 if (_cm->verbose_low()) { 3830 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 3831 _task_id, target_size); 3832 } 3833 3834 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3835 get_entries_from_global_stack(); 3836 drain_local_queue(partially); 3837 } 3838 3839 if (_cm->verbose_low()) { 3840 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 3841 _task_id, _cm->mark_stack_size()); 3842 } 3843 } 3844 } 3845 3846 // SATB Queue has several assumptions on whether to call the par or 3847 // non-par versions of the methods. this is why some of the code is 3848 // replicated. We should really get rid of the single-threaded version 3849 // of the code to simplify things. 3850 void CMTask::drain_satb_buffers() { 3851 if (has_aborted()) return; 3852 3853 // We set this so that the regular clock knows that we're in the 3854 // middle of draining buffers and doesn't set the abort flag when it 3855 // notices that SATB buffers are available for draining. It'd be 3856 // very counter productive if it did that. :-) 3857 _draining_satb_buffers = true; 3858 3859 CMObjectClosure oc(this); 3860 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3861 if (G1CollectedHeap::use_parallel_gc_threads()) { 3862 satb_mq_set.set_par_closure(_task_id, &oc); 3863 } else { 3864 satb_mq_set.set_closure(&oc); 3865 } 3866 3867 // This keeps claiming and applying the closure to completed buffers 3868 // until we run out of buffers or we need to abort. 3869 if (G1CollectedHeap::use_parallel_gc_threads()) { 3870 while (!has_aborted() && 3871 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 3872 if (_cm->verbose_medium()) { 3873 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3874 } 3875 statsOnly( ++_satb_buffers_processed ); 3876 regular_clock_call(); 3877 } 3878 } else { 3879 while (!has_aborted() && 3880 satb_mq_set.apply_closure_to_completed_buffer()) { 3881 if (_cm->verbose_medium()) { 3882 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3883 } 3884 statsOnly( ++_satb_buffers_processed ); 3885 regular_clock_call(); 3886 } 3887 } 3888 3889 if (!concurrent() && !has_aborted()) { 3890 // We should only do this during remark. 3891 if (G1CollectedHeap::use_parallel_gc_threads()) { 3892 satb_mq_set.par_iterate_closure_all_threads(_task_id); 3893 } else { 3894 satb_mq_set.iterate_closure_all_threads(); 3895 } 3896 } 3897 3898 _draining_satb_buffers = false; 3899 3900 assert(has_aborted() || 3901 concurrent() || 3902 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3903 3904 if (G1CollectedHeap::use_parallel_gc_threads()) { 3905 satb_mq_set.set_par_closure(_task_id, NULL); 3906 } else { 3907 satb_mq_set.set_closure(NULL); 3908 } 3909 3910 // again, this was a potentially expensive operation, decrease the 3911 // limits to get the regular clock call early 3912 decrease_limits(); 3913 } 3914 3915 void CMTask::drain_region_stack(BitMapClosure* bc) { 3916 if (has_aborted()) return; 3917 3918 assert(_region_finger == NULL, 3919 "it should be NULL when we're not scanning a region"); 3920 3921 if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) { 3922 if (_cm->verbose_low()) { 3923 gclog_or_tty->print_cr("[%d] draining region stack, size = %d", 3924 _task_id, _cm->region_stack_size()); 3925 } 3926 3927 MemRegion mr; 3928 3929 if (!_aborted_region.is_empty()) { 3930 mr = _aborted_region; 3931 _aborted_region = MemRegion(); 3932 3933 if (_cm->verbose_low()) { 3934 gclog_or_tty->print_cr("[%d] scanning aborted region " 3935 "[ " PTR_FORMAT ", " PTR_FORMAT " )", 3936 _task_id, mr.start(), mr.end()); 3937 } 3938 } else { 3939 mr = _cm->region_stack_pop_lock_free(); 3940 // it returns MemRegion() if the pop fails 3941 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 3942 } 3943 3944 while (mr.start() != NULL) { 3945 if (_cm->verbose_medium()) { 3946 gclog_or_tty->print_cr("[%d] we are scanning region " 3947 "["PTR_FORMAT", "PTR_FORMAT")", 3948 _task_id, mr.start(), mr.end()); 3949 } 3950 3951 assert(mr.end() <= _cm->finger(), 3952 "otherwise the region shouldn't be on the stack"); 3953 assert(!mr.is_empty(), "Only non-empty regions live on the region stack"); 3954 if (_nextMarkBitMap->iterate(bc, mr)) { 3955 assert(!has_aborted(), 3956 "cannot abort the task without aborting the bitmap iteration"); 3957 3958 // We finished iterating over the region without aborting. 3959 regular_clock_call(); 3960 if (has_aborted()) { 3961 mr = MemRegion(); 3962 } else { 3963 mr = _cm->region_stack_pop_lock_free(); 3964 // it returns MemRegion() if the pop fails 3965 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 3966 } 3967 } else { 3968 assert(has_aborted(), "currently the only way to do so"); 3969 3970 // The only way to abort the bitmap iteration is to return 3971 // false from the do_bit() method. However, inside the 3972 // do_bit() method we move the _region_finger to point to the 3973 // object currently being looked at. So, if we bail out, we 3974 // have definitely set _region_finger to something non-null. 3975 assert(_region_finger != NULL, "invariant"); 3976 3977 // Make sure that any previously aborted region has been 3978 // cleared. 3979 assert(_aborted_region.is_empty(), "aborted region not cleared"); 3980 3981 // The iteration was actually aborted. So now _region_finger 3982 // points to the address of the object we last scanned. If we 3983 // leave it there, when we restart this task, we will rescan 3984 // the object. It is easy to avoid this. We move the finger by 3985 // enough to point to the next possible object header (the 3986 // bitmap knows by how much we need to move it as it knows its 3987 // granularity). 3988 MemRegion newRegion = 3989 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end()); 3990 3991 if (!newRegion.is_empty()) { 3992 if (_cm->verbose_low()) { 3993 gclog_or_tty->print_cr("[%d] recording unscanned region" 3994 "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask", 3995 _task_id, 3996 newRegion.start(), newRegion.end()); 3997 } 3998 // Now record the part of the region we didn't scan to 3999 // make sure this task scans it later. 4000 _aborted_region = newRegion; 4001 } 4002 // break from while 4003 mr = MemRegion(); 4004 } 4005 _region_finger = NULL; 4006 } 4007 4008 if (_cm->verbose_low()) { 4009 gclog_or_tty->print_cr("[%d] drained region stack, size = %d", 4010 _task_id, _cm->region_stack_size()); 4011 } 4012 } 4013 } 4014 4015 void CMTask::print_stats() { 4016 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 4017 _task_id, _calls); 4018 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 4019 _elapsed_time_ms, _termination_time_ms); 4020 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4021 _step_times_ms.num(), _step_times_ms.avg(), 4022 _step_times_ms.sd()); 4023 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4024 _step_times_ms.maximum(), _step_times_ms.sum()); 4025 4026 #if _MARKING_STATS_ 4027 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4028 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 4029 _all_clock_intervals_ms.sd()); 4030 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4031 _all_clock_intervals_ms.maximum(), 4032 _all_clock_intervals_ms.sum()); 4033 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 4034 _clock_due_to_scanning, _clock_due_to_marking); 4035 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 4036 _objs_scanned, _objs_found_on_bitmap); 4037 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 4038 _local_pushes, _local_pops, _local_max_size); 4039 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 4040 _global_pushes, _global_pops, _global_max_size); 4041 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 4042 _global_transfers_to,_global_transfers_from); 4043 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d", 4044 _regions_claimed, _region_stack_pops); 4045 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 4046 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 4047 _steal_attempts, _steals); 4048 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 4049 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 4050 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 4051 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 4052 _aborted_timed_out, _aborted_satb, _aborted_termination); 4053 #endif // _MARKING_STATS_ 4054 } 4055 4056 /***************************************************************************** 4057 4058 The do_marking_step(time_target_ms) method is the building block 4059 of the parallel marking framework. It can be called in parallel 4060 with other invocations of do_marking_step() on different tasks 4061 (but only one per task, obviously) and concurrently with the 4062 mutator threads, or during remark, hence it eliminates the need 4063 for two versions of the code. When called during remark, it will 4064 pick up from where the task left off during the concurrent marking 4065 phase. Interestingly, tasks are also claimable during evacuation 4066 pauses too, since do_marking_step() ensures that it aborts before 4067 it needs to yield. 4068 4069 The data structures that is uses to do marking work are the 4070 following: 4071 4072 (1) Marking Bitmap. If there are gray objects that appear only 4073 on the bitmap (this happens either when dealing with an overflow 4074 or when the initial marking phase has simply marked the roots 4075 and didn't push them on the stack), then tasks claim heap 4076 regions whose bitmap they then scan to find gray objects. A 4077 global finger indicates where the end of the last claimed region 4078 is. A local finger indicates how far into the region a task has 4079 scanned. The two fingers are used to determine how to gray an 4080 object (i.e. whether simply marking it is OK, as it will be 4081 visited by a task in the future, or whether it needs to be also 4082 pushed on a stack). 4083 4084 (2) Local Queue. The local queue of the task which is accessed 4085 reasonably efficiently by the task. Other tasks can steal from 4086 it when they run out of work. Throughout the marking phase, a 4087 task attempts to keep its local queue short but not totally 4088 empty, so that entries are available for stealing by other 4089 tasks. Only when there is no more work, a task will totally 4090 drain its local queue. 4091 4092 (3) Global Mark Stack. This handles local queue overflow. During 4093 marking only sets of entries are moved between it and the local 4094 queues, as access to it requires a mutex and more fine-grain 4095 interaction with it which might cause contention. If it 4096 overflows, then the marking phase should restart and iterate 4097 over the bitmap to identify gray objects. Throughout the marking 4098 phase, tasks attempt to keep the global mark stack at a small 4099 length but not totally empty, so that entries are available for 4100 popping by other tasks. Only when there is no more work, tasks 4101 will totally drain the global mark stack. 4102 4103 (4) Global Region Stack. Entries on it correspond to areas of 4104 the bitmap that need to be scanned since they contain gray 4105 objects. Pushes on the region stack only happen during 4106 evacuation pauses and typically correspond to areas covered by 4107 GC LABS. If it overflows, then the marking phase should restart 4108 and iterate over the bitmap to identify gray objects. Tasks will 4109 try to totally drain the region stack as soon as possible. 4110 4111 (5) SATB Buffer Queue. This is where completed SATB buffers are 4112 made available. Buffers are regularly removed from this queue 4113 and scanned for roots, so that the queue doesn't get too 4114 long. During remark, all completed buffers are processed, as 4115 well as the filled in parts of any uncompleted buffers. 4116 4117 The do_marking_step() method tries to abort when the time target 4118 has been reached. There are a few other cases when the 4119 do_marking_step() method also aborts: 4120 4121 (1) When the marking phase has been aborted (after a Full GC). 4122 4123 (2) When a global overflow (either on the global stack or the 4124 region stack) has been triggered. Before the task aborts, it 4125 will actually sync up with the other tasks to ensure that all 4126 the marking data structures (local queues, stacks, fingers etc.) 4127 are re-initialised so that when do_marking_step() completes, 4128 the marking phase can immediately restart. 4129 4130 (3) When enough completed SATB buffers are available. The 4131 do_marking_step() method only tries to drain SATB buffers right 4132 at the beginning. So, if enough buffers are available, the 4133 marking step aborts and the SATB buffers are processed at 4134 the beginning of the next invocation. 4135 4136 (4) To yield. when we have to yield then we abort and yield 4137 right at the end of do_marking_step(). This saves us from a lot 4138 of hassle as, by yielding we might allow a Full GC. If this 4139 happens then objects will be compacted underneath our feet, the 4140 heap might shrink, etc. We save checking for this by just 4141 aborting and doing the yield right at the end. 4142 4143 From the above it follows that the do_marking_step() method should 4144 be called in a loop (or, otherwise, regularly) until it completes. 4145 4146 If a marking step completes without its has_aborted() flag being 4147 true, it means it has completed the current marking phase (and 4148 also all other marking tasks have done so and have all synced up). 4149 4150 A method called regular_clock_call() is invoked "regularly" (in 4151 sub ms intervals) throughout marking. It is this clock method that 4152 checks all the abort conditions which were mentioned above and 4153 decides when the task should abort. A work-based scheme is used to 4154 trigger this clock method: when the number of object words the 4155 marking phase has scanned or the number of references the marking 4156 phase has visited reach a given limit. Additional invocations to 4157 the method clock have been planted in a few other strategic places 4158 too. The initial reason for the clock method was to avoid calling 4159 vtime too regularly, as it is quite expensive. So, once it was in 4160 place, it was natural to piggy-back all the other conditions on it 4161 too and not constantly check them throughout the code. 4162 4163 *****************************************************************************/ 4164 4165 void CMTask::do_marking_step(double time_target_ms, 4166 bool do_stealing, 4167 bool do_termination) { 4168 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 4169 assert(concurrent() == _cm->concurrent(), "they should be the same"); 4170 4171 assert(concurrent() || _cm->region_stack_empty(), 4172 "the region stack should have been cleared before remark"); 4173 assert(concurrent() || !_cm->has_aborted_regions(), 4174 "aborted regions should have been cleared before remark"); 4175 assert(_region_finger == NULL, 4176 "this should be non-null only when a region is being scanned"); 4177 4178 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4179 assert(_task_queues != NULL, "invariant"); 4180 assert(_task_queue != NULL, "invariant"); 4181 assert(_task_queues->queue(_task_id) == _task_queue, "invariant"); 4182 4183 assert(!_claimed, 4184 "only one thread should claim this task at any one time"); 4185 4186 // OK, this doesn't safeguard again all possible scenarios, as it is 4187 // possible for two threads to set the _claimed flag at the same 4188 // time. But it is only for debugging purposes anyway and it will 4189 // catch most problems. 4190 _claimed = true; 4191 4192 _start_time_ms = os::elapsedVTime() * 1000.0; 4193 statsOnly( _interval_start_time_ms = _start_time_ms ); 4194 4195 double diff_prediction_ms = 4196 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4197 _time_target_ms = time_target_ms - diff_prediction_ms; 4198 4199 // set up the variables that are used in the work-based scheme to 4200 // call the regular clock method 4201 _words_scanned = 0; 4202 _refs_reached = 0; 4203 recalculate_limits(); 4204 4205 // clear all flags 4206 clear_has_aborted(); 4207 _has_timed_out = false; 4208 _draining_satb_buffers = false; 4209 4210 ++_calls; 4211 4212 if (_cm->verbose_low()) { 4213 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 4214 "target = %1.2lfms >>>>>>>>>>", 4215 _task_id, _calls, _time_target_ms); 4216 } 4217 4218 // Set up the bitmap and oop closures. Anything that uses them is 4219 // eventually called from this method, so it is OK to allocate these 4220 // statically. 4221 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4222 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 4223 set_cm_oop_closure(&cm_oop_closure); 4224 4225 if (_cm->has_overflown()) { 4226 // This can happen if the region stack or the mark stack overflows 4227 // during a GC pause and this task, after a yield point, 4228 // restarts. We have to abort as we need to get into the overflow 4229 // protocol which happens right at the end of this task. 4230 set_has_aborted(); 4231 } 4232 4233 // First drain any available SATB buffers. After this, we will not 4234 // look at SATB buffers before the next invocation of this method. 4235 // If enough completed SATB buffers are queued up, the regular clock 4236 // will abort this task so that it restarts. 4237 drain_satb_buffers(); 4238 // ...then partially drain the local queue and the global stack 4239 drain_local_queue(true); 4240 drain_global_stack(true); 4241 4242 // Then totally drain the region stack. We will not look at 4243 // it again before the next invocation of this method. Entries on 4244 // the region stack are only added during evacuation pauses, for 4245 // which we have to yield. When we do, we abort the task anyway so 4246 // it will look at the region stack again when it restarts. 4247 bitmap_closure.set_scanning_heap_region(false); 4248 drain_region_stack(&bitmap_closure); 4249 // ...then partially drain the local queue and the global stack 4250 drain_local_queue(true); 4251 drain_global_stack(true); 4252 4253 do { 4254 if (!has_aborted() && _curr_region != NULL) { 4255 // This means that we're already holding on to a region. 4256 assert(_finger != NULL, "if region is not NULL, then the finger " 4257 "should not be NULL either"); 4258 4259 // We might have restarted this task after an evacuation pause 4260 // which might have evacuated the region we're holding on to 4261 // underneath our feet. Let's read its limit again to make sure 4262 // that we do not iterate over a region of the heap that 4263 // contains garbage (update_region_limit() will also move 4264 // _finger to the start of the region if it is found empty). 4265 update_region_limit(); 4266 // We will start from _finger not from the start of the region, 4267 // as we might be restarting this task after aborting half-way 4268 // through scanning this region. In this case, _finger points to 4269 // the address where we last found a marked object. If this is a 4270 // fresh region, _finger points to start(). 4271 MemRegion mr = MemRegion(_finger, _region_limit); 4272 4273 if (_cm->verbose_low()) { 4274 gclog_or_tty->print_cr("[%d] we're scanning part " 4275 "["PTR_FORMAT", "PTR_FORMAT") " 4276 "of region "PTR_FORMAT, 4277 _task_id, _finger, _region_limit, _curr_region); 4278 } 4279 4280 // Let's iterate over the bitmap of the part of the 4281 // region that is left. 4282 bitmap_closure.set_scanning_heap_region(true); 4283 if (mr.is_empty() || 4284 _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4285 // We successfully completed iterating over the region. Now, 4286 // let's give up the region. 4287 giveup_current_region(); 4288 regular_clock_call(); 4289 } else { 4290 assert(has_aborted(), "currently the only way to do so"); 4291 // The only way to abort the bitmap iteration is to return 4292 // false from the do_bit() method. However, inside the 4293 // do_bit() method we move the _finger to point to the 4294 // object currently being looked at. So, if we bail out, we 4295 // have definitely set _finger to something non-null. 4296 assert(_finger != NULL, "invariant"); 4297 4298 // Region iteration was actually aborted. So now _finger 4299 // points to the address of the object we last scanned. If we 4300 // leave it there, when we restart this task, we will rescan 4301 // the object. It is easy to avoid this. We move the finger by 4302 // enough to point to the next possible object header (the 4303 // bitmap knows by how much we need to move it as it knows its 4304 // granularity). 4305 assert(_finger < _region_limit, "invariant"); 4306 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 4307 // Check if bitmap iteration was aborted while scanning the last object 4308 if (new_finger >= _region_limit) { 4309 giveup_current_region(); 4310 } else { 4311 move_finger_to(new_finger); 4312 } 4313 } 4314 } 4315 // At this point we have either completed iterating over the 4316 // region we were holding on to, or we have aborted. 4317 4318 // We then partially drain the local queue and the global stack. 4319 // (Do we really need this?) 4320 drain_local_queue(true); 4321 drain_global_stack(true); 4322 4323 // Read the note on the claim_region() method on why it might 4324 // return NULL with potentially more regions available for 4325 // claiming and why we have to check out_of_regions() to determine 4326 // whether we're done or not. 4327 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4328 // We are going to try to claim a new region. We should have 4329 // given up on the previous one. 4330 // Separated the asserts so that we know which one fires. 4331 assert(_curr_region == NULL, "invariant"); 4332 assert(_finger == NULL, "invariant"); 4333 assert(_region_limit == NULL, "invariant"); 4334 if (_cm->verbose_low()) { 4335 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 4336 } 4337 HeapRegion* claimed_region = _cm->claim_region(_task_id); 4338 if (claimed_region != NULL) { 4339 // Yes, we managed to claim one 4340 statsOnly( ++_regions_claimed ); 4341 4342 if (_cm->verbose_low()) { 4343 gclog_or_tty->print_cr("[%d] we successfully claimed " 4344 "region "PTR_FORMAT, 4345 _task_id, claimed_region); 4346 } 4347 4348 setup_for_region(claimed_region); 4349 assert(_curr_region == claimed_region, "invariant"); 4350 } 4351 // It is important to call the regular clock here. It might take 4352 // a while to claim a region if, for example, we hit a large 4353 // block of empty regions. So we need to call the regular clock 4354 // method once round the loop to make sure it's called 4355 // frequently enough. 4356 regular_clock_call(); 4357 } 4358 4359 if (!has_aborted() && _curr_region == NULL) { 4360 assert(_cm->out_of_regions(), 4361 "at this point we should be out of regions"); 4362 } 4363 } while ( _curr_region != NULL && !has_aborted()); 4364 4365 if (!has_aborted()) { 4366 // We cannot check whether the global stack is empty, since other 4367 // tasks might be pushing objects to it concurrently. We also cannot 4368 // check if the region stack is empty because if a thread is aborting 4369 // it can push a partially done region back. 4370 assert(_cm->out_of_regions(), 4371 "at this point we should be out of regions"); 4372 4373 if (_cm->verbose_low()) { 4374 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 4375 } 4376 4377 // Try to reduce the number of available SATB buffers so that 4378 // remark has less work to do. 4379 drain_satb_buffers(); 4380 } 4381 4382 // Since we've done everything else, we can now totally drain the 4383 // local queue and global stack. 4384 drain_local_queue(false); 4385 drain_global_stack(false); 4386 4387 // Attempt at work stealing from other task's queues. 4388 if (do_stealing && !has_aborted()) { 4389 // We have not aborted. This means that we have finished all that 4390 // we could. Let's try to do some stealing... 4391 4392 // We cannot check whether the global stack is empty, since other 4393 // tasks might be pushing objects to it concurrently. We also cannot 4394 // check if the region stack is empty because if a thread is aborting 4395 // it can push a partially done region back. 4396 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4397 "only way to reach here"); 4398 4399 if (_cm->verbose_low()) { 4400 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 4401 } 4402 4403 while (!has_aborted()) { 4404 oop obj; 4405 statsOnly( ++_steal_attempts ); 4406 4407 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 4408 if (_cm->verbose_medium()) { 4409 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 4410 _task_id, (void*) obj); 4411 } 4412 4413 statsOnly( ++_steals ); 4414 4415 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4416 "any stolen object should be marked"); 4417 scan_object(obj); 4418 4419 // And since we're towards the end, let's totally drain the 4420 // local queue and global stack. 4421 drain_local_queue(false); 4422 drain_global_stack(false); 4423 } else { 4424 break; 4425 } 4426 } 4427 } 4428 4429 // If we are about to wrap up and go into termination, check if we 4430 // should raise the overflow flag. 4431 if (do_termination && !has_aborted()) { 4432 if (_cm->force_overflow()->should_force()) { 4433 _cm->set_has_overflown(); 4434 regular_clock_call(); 4435 } 4436 } 4437 4438 // We still haven't aborted. Now, let's try to get into the 4439 // termination protocol. 4440 if (do_termination && !has_aborted()) { 4441 // We cannot check whether the global stack is empty, since other 4442 // tasks might be concurrently pushing objects on it. We also cannot 4443 // check if the region stack is empty because if a thread is aborting 4444 // it can push a partially done region back. 4445 // Separated the asserts so that we know which one fires. 4446 assert(_cm->out_of_regions(), "only way to reach here"); 4447 assert(_task_queue->size() == 0, "only way to reach here"); 4448 4449 if (_cm->verbose_low()) { 4450 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 4451 } 4452 4453 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4454 // The CMTask class also extends the TerminatorTerminator class, 4455 // hence its should_exit_termination() method will also decide 4456 // whether to exit the termination protocol or not. 4457 bool finished = _cm->terminator()->offer_termination(this); 4458 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4459 _termination_time_ms += 4460 termination_end_time_ms - _termination_start_time_ms; 4461 4462 if (finished) { 4463 // We're all done. 4464 4465 if (_task_id == 0) { 4466 // let's allow task 0 to do this 4467 if (concurrent()) { 4468 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4469 // we need to set this to false before the next 4470 // safepoint. This way we ensure that the marking phase 4471 // doesn't observe any more heap expansions. 4472 _cm->clear_concurrent_marking_in_progress(); 4473 } 4474 } 4475 4476 // We can now guarantee that the global stack is empty, since 4477 // all other tasks have finished. We separated the guarantees so 4478 // that, if a condition is false, we can immediately find out 4479 // which one. 4480 guarantee(_cm->out_of_regions(), "only way to reach here"); 4481 guarantee(_aborted_region.is_empty(), "only way to reach here"); 4482 guarantee(_cm->region_stack_empty(), "only way to reach here"); 4483 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4484 guarantee(_task_queue->size() == 0, "only way to reach here"); 4485 guarantee(!_cm->has_overflown(), "only way to reach here"); 4486 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4487 guarantee(!_cm->region_stack_overflow(), "only way to reach here"); 4488 4489 if (_cm->verbose_low()) { 4490 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 4491 } 4492 } else { 4493 // Apparently there's more work to do. Let's abort this task. It 4494 // will restart it and we can hopefully find more things to do. 4495 4496 if (_cm->verbose_low()) { 4497 gclog_or_tty->print_cr("[%d] apparently there is more work to do", 4498 _task_id); 4499 } 4500 4501 set_has_aborted(); 4502 statsOnly( ++_aborted_termination ); 4503 } 4504 } 4505 4506 // Mainly for debugging purposes to make sure that a pointer to the 4507 // closure which was statically allocated in this frame doesn't 4508 // escape it by accident. 4509 set_cm_oop_closure(NULL); 4510 double end_time_ms = os::elapsedVTime() * 1000.0; 4511 double elapsed_time_ms = end_time_ms - _start_time_ms; 4512 // Update the step history. 4513 _step_times_ms.add(elapsed_time_ms); 4514 4515 if (has_aborted()) { 4516 // The task was aborted for some reason. 4517 4518 statsOnly( ++_aborted ); 4519 4520 if (_has_timed_out) { 4521 double diff_ms = elapsed_time_ms - _time_target_ms; 4522 // Keep statistics of how well we did with respect to hitting 4523 // our target only if we actually timed out (if we aborted for 4524 // other reasons, then the results might get skewed). 4525 _marking_step_diffs_ms.add(diff_ms); 4526 } 4527 4528 if (_cm->has_overflown()) { 4529 // This is the interesting one. We aborted because a global 4530 // overflow was raised. This means we have to restart the 4531 // marking phase and start iterating over regions. However, in 4532 // order to do this we have to make sure that all tasks stop 4533 // what they are doing and re-initialise in a safe manner. We 4534 // will achieve this with the use of two barrier sync points. 4535 4536 if (_cm->verbose_low()) { 4537 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 4538 } 4539 4540 _cm->enter_first_sync_barrier(_task_id); 4541 // When we exit this sync barrier we know that all tasks have 4542 // stopped doing marking work. So, it's now safe to 4543 // re-initialise our data structures. At the end of this method, 4544 // task 0 will clear the global data structures. 4545 4546 statsOnly( ++_aborted_overflow ); 4547 4548 // We clear the local state of this task... 4549 clear_region_fields(); 4550 4551 // ...and enter the second barrier. 4552 _cm->enter_second_sync_barrier(_task_id); 4553 // At this point everything has bee re-initialised and we're 4554 // ready to restart. 4555 } 4556 4557 if (_cm->verbose_low()) { 4558 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4559 "elapsed = %1.2lfms <<<<<<<<<<", 4560 _task_id, _time_target_ms, elapsed_time_ms); 4561 if (_cm->has_aborted()) { 4562 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 4563 _task_id); 4564 } 4565 } 4566 } else { 4567 if (_cm->verbose_low()) { 4568 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4569 "elapsed = %1.2lfms <<<<<<<<<<", 4570 _task_id, _time_target_ms, elapsed_time_ms); 4571 } 4572 } 4573 4574 _claimed = false; 4575 } 4576 4577 CMTask::CMTask(int task_id, 4578 ConcurrentMark* cm, 4579 CMTaskQueue* task_queue, 4580 CMTaskQueueSet* task_queues) 4581 : _g1h(G1CollectedHeap::heap()), 4582 _task_id(task_id), _cm(cm), 4583 _claimed(false), 4584 _nextMarkBitMap(NULL), _hash_seed(17), 4585 _task_queue(task_queue), 4586 _task_queues(task_queues), 4587 _cm_oop_closure(NULL), 4588 _aborted_region(MemRegion()) { 4589 guarantee(task_queue != NULL, "invariant"); 4590 guarantee(task_queues != NULL, "invariant"); 4591 4592 statsOnly( _clock_due_to_scanning = 0; 4593 _clock_due_to_marking = 0 ); 4594 4595 _marking_step_diffs_ms.add(0.5); 4596 } 4597 4598 // These are formatting macros that are used below to ensure 4599 // consistent formatting. The *_H_* versions are used to format the 4600 // header for a particular value and they should be kept consistent 4601 // with the corresponding macro. Also note that most of the macros add 4602 // the necessary white space (as a prefix) which makes them a bit 4603 // easier to compose. 4604 4605 // All the output lines are prefixed with this string to be able to 4606 // identify them easily in a large log file. 4607 #define G1PPRL_LINE_PREFIX "###" 4608 4609 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4610 #ifdef _LP64 4611 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4612 #else // _LP64 4613 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4614 #endif // _LP64 4615 4616 // For per-region info 4617 #define G1PPRL_TYPE_FORMAT " %-4s" 4618 #define G1PPRL_TYPE_H_FORMAT " %4s" 4619 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4620 #define G1PPRL_BYTE_H_FORMAT " %9s" 4621 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4622 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4623 4624 // For summary info 4625 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4626 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4627 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4628 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4629 4630 G1PrintRegionLivenessInfoClosure:: 4631 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4632 : _out(out), 4633 _total_used_bytes(0), _total_capacity_bytes(0), 4634 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4635 _hum_used_bytes(0), _hum_capacity_bytes(0), 4636 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) { 4637 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4638 MemRegion g1_committed = g1h->g1_committed(); 4639 MemRegion g1_reserved = g1h->g1_reserved(); 4640 double now = os::elapsedTime(); 4641 4642 // Print the header of the output. 4643 _out->cr(); 4644 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4645 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4646 G1PPRL_SUM_ADDR_FORMAT("committed") 4647 G1PPRL_SUM_ADDR_FORMAT("reserved") 4648 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4649 g1_committed.start(), g1_committed.end(), 4650 g1_reserved.start(), g1_reserved.end(), 4651 HeapRegion::GrainBytes); 4652 _out->print_cr(G1PPRL_LINE_PREFIX); 4653 _out->print_cr(G1PPRL_LINE_PREFIX 4654 G1PPRL_TYPE_H_FORMAT 4655 G1PPRL_ADDR_BASE_H_FORMAT 4656 G1PPRL_BYTE_H_FORMAT 4657 G1PPRL_BYTE_H_FORMAT 4658 G1PPRL_BYTE_H_FORMAT 4659 G1PPRL_DOUBLE_H_FORMAT, 4660 "type", "address-range", 4661 "used", "prev-live", "next-live", "gc-eff"); 4662 _out->print_cr(G1PPRL_LINE_PREFIX 4663 G1PPRL_TYPE_H_FORMAT 4664 G1PPRL_ADDR_BASE_H_FORMAT 4665 G1PPRL_BYTE_H_FORMAT 4666 G1PPRL_BYTE_H_FORMAT 4667 G1PPRL_BYTE_H_FORMAT 4668 G1PPRL_DOUBLE_H_FORMAT, 4669 "", "", 4670 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)"); 4671 } 4672 4673 // It takes as a parameter a reference to one of the _hum_* fields, it 4674 // deduces the corresponding value for a region in a humongous region 4675 // series (either the region size, or what's left if the _hum_* field 4676 // is < the region size), and updates the _hum_* field accordingly. 4677 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4678 size_t bytes = 0; 4679 // The > 0 check is to deal with the prev and next live bytes which 4680 // could be 0. 4681 if (*hum_bytes > 0) { 4682 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4683 *hum_bytes -= bytes; 4684 } 4685 return bytes; 4686 } 4687 4688 // It deduces the values for a region in a humongous region series 4689 // from the _hum_* fields and updates those accordingly. It assumes 4690 // that that _hum_* fields have already been set up from the "starts 4691 // humongous" region and we visit the regions in address order. 4692 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4693 size_t* capacity_bytes, 4694 size_t* prev_live_bytes, 4695 size_t* next_live_bytes) { 4696 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4697 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4698 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4699 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4700 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4701 } 4702 4703 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4704 const char* type = ""; 4705 HeapWord* bottom = r->bottom(); 4706 HeapWord* end = r->end(); 4707 size_t capacity_bytes = r->capacity(); 4708 size_t used_bytes = r->used(); 4709 size_t prev_live_bytes = r->live_bytes(); 4710 size_t next_live_bytes = r->next_live_bytes(); 4711 double gc_eff = r->gc_efficiency(); 4712 if (r->used() == 0) { 4713 type = "FREE"; 4714 } else if (r->is_survivor()) { 4715 type = "SURV"; 4716 } else if (r->is_young()) { 4717 type = "EDEN"; 4718 } else if (r->startsHumongous()) { 4719 type = "HUMS"; 4720 4721 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4722 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4723 "they should have been zeroed after the last time we used them"); 4724 // Set up the _hum_* fields. 4725 _hum_capacity_bytes = capacity_bytes; 4726 _hum_used_bytes = used_bytes; 4727 _hum_prev_live_bytes = prev_live_bytes; 4728 _hum_next_live_bytes = next_live_bytes; 4729 get_hum_bytes(&used_bytes, &capacity_bytes, 4730 &prev_live_bytes, &next_live_bytes); 4731 end = bottom + HeapRegion::GrainWords; 4732 } else if (r->continuesHumongous()) { 4733 type = "HUMC"; 4734 get_hum_bytes(&used_bytes, &capacity_bytes, 4735 &prev_live_bytes, &next_live_bytes); 4736 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4737 } else { 4738 type = "OLD"; 4739 } 4740 4741 _total_used_bytes += used_bytes; 4742 _total_capacity_bytes += capacity_bytes; 4743 _total_prev_live_bytes += prev_live_bytes; 4744 _total_next_live_bytes += next_live_bytes; 4745 4746 // Print a line for this particular region. 4747 _out->print_cr(G1PPRL_LINE_PREFIX 4748 G1PPRL_TYPE_FORMAT 4749 G1PPRL_ADDR_BASE_FORMAT 4750 G1PPRL_BYTE_FORMAT 4751 G1PPRL_BYTE_FORMAT 4752 G1PPRL_BYTE_FORMAT 4753 G1PPRL_DOUBLE_FORMAT, 4754 type, bottom, end, 4755 used_bytes, prev_live_bytes, next_live_bytes, gc_eff); 4756 4757 return false; 4758 } 4759 4760 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4761 // Print the footer of the output. 4762 _out->print_cr(G1PPRL_LINE_PREFIX); 4763 _out->print_cr(G1PPRL_LINE_PREFIX 4764 " SUMMARY" 4765 G1PPRL_SUM_MB_FORMAT("capacity") 4766 G1PPRL_SUM_MB_PERC_FORMAT("used") 4767 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4768 G1PPRL_SUM_MB_PERC_FORMAT("next-live"), 4769 bytes_to_mb(_total_capacity_bytes), 4770 bytes_to_mb(_total_used_bytes), 4771 perc(_total_used_bytes, _total_capacity_bytes), 4772 bytes_to_mb(_total_prev_live_bytes), 4773 perc(_total_prev_live_bytes, _total_capacity_bytes), 4774 bytes_to_mb(_total_next_live_bytes), 4775 perc(_total_next_live_bytes, _total_capacity_bytes)); 4776 _out->cr(); 4777 }