1 /* 2 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1RemSet.hpp" 32 #include "gc_implementation/g1/heapRegionRemSet.hpp" 33 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 34 #include "memory/genOopClosures.inline.hpp" 35 #include "memory/referencePolicy.hpp" 36 #include "memory/resourceArea.hpp" 37 #include "oops/oop.inline.hpp" 38 #include "runtime/handles.inline.hpp" 39 #include "runtime/java.hpp" 40 41 // 42 // CMS Bit Map Wrapper 43 44 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter): 45 _bm((uintptr_t*)NULL,0), 46 _shifter(shifter) { 47 _bmStartWord = (HeapWord*)(rs.base()); 48 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 49 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 50 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 51 52 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map"); 53 // For now we'll just commit all of the bit map up fromt. 54 // Later on we'll try to be more parsimonious with swap. 55 guarantee(_virtual_space.initialize(brs, brs.size()), 56 "couldn't reseve backing store for CMS bit map"); 57 assert(_virtual_space.committed_size() == brs.size(), 58 "didn't reserve backing store for all of CMS bit map?"); 59 _bm.set_map((uintptr_t*)_virtual_space.low()); 60 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 61 _bmWordSize, "inconsistency in bit map sizing"); 62 _bm.set_size(_bmWordSize >> _shifter); 63 } 64 65 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 66 HeapWord* limit) const { 67 // First we must round addr *up* to a possible object boundary. 68 addr = (HeapWord*)align_size_up((intptr_t)addr, 69 HeapWordSize << _shifter); 70 size_t addrOffset = heapWordToOffset(addr); 71 if (limit == NULL) limit = _bmStartWord + _bmWordSize; 72 size_t limitOffset = heapWordToOffset(limit); 73 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 74 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 75 assert(nextAddr >= addr, "get_next_one postcondition"); 76 assert(nextAddr == limit || isMarked(nextAddr), 77 "get_next_one postcondition"); 78 return nextAddr; 79 } 80 81 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 82 HeapWord* limit) const { 83 size_t addrOffset = heapWordToOffset(addr); 84 if (limit == NULL) limit = _bmStartWord + _bmWordSize; 85 size_t limitOffset = heapWordToOffset(limit); 86 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 87 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 88 assert(nextAddr >= addr, "get_next_one postcondition"); 89 assert(nextAddr == limit || !isMarked(nextAddr), 90 "get_next_one postcondition"); 91 return nextAddr; 92 } 93 94 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 95 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 96 return (int) (diff >> _shifter); 97 } 98 99 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) { 100 HeapWord* left = MAX2(_bmStartWord, mr.start()); 101 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end()); 102 if (right > left) { 103 // Right-open interval [leftOffset, rightOffset). 104 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right)); 105 } else { 106 return true; 107 } 108 } 109 110 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap, 111 size_t from_start_index, 112 HeapWord* to_start_word, 113 size_t word_num) { 114 _bm.mostly_disjoint_range_union(from_bitmap, 115 from_start_index, 116 heapWordToOffset(to_start_word), 117 word_num); 118 } 119 120 #ifndef PRODUCT 121 bool CMBitMapRO::covers(ReservedSpace rs) const { 122 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 123 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize, 124 "size inconsistency"); 125 return _bmStartWord == (HeapWord*)(rs.base()) && 126 _bmWordSize == rs.size()>>LogHeapWordSize; 127 } 128 #endif 129 130 void CMBitMap::clearAll() { 131 _bm.clear(); 132 return; 133 } 134 135 void CMBitMap::markRange(MemRegion mr) { 136 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 137 assert(!mr.is_empty(), "unexpected empty region"); 138 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 139 ((HeapWord *) mr.end())), 140 "markRange memory region end is not card aligned"); 141 // convert address range into offset range 142 _bm.at_put_range(heapWordToOffset(mr.start()), 143 heapWordToOffset(mr.end()), true); 144 } 145 146 void CMBitMap::clearRange(MemRegion mr) { 147 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 148 assert(!mr.is_empty(), "unexpected empty region"); 149 // convert address range into offset range 150 _bm.at_put_range(heapWordToOffset(mr.start()), 151 heapWordToOffset(mr.end()), false); 152 } 153 154 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 155 HeapWord* end_addr) { 156 HeapWord* start = getNextMarkedWordAddress(addr); 157 start = MIN2(start, end_addr); 158 HeapWord* end = getNextUnmarkedWordAddress(start); 159 end = MIN2(end, end_addr); 160 assert(start <= end, "Consistency check"); 161 MemRegion mr(start, end); 162 if (!mr.is_empty()) { 163 clearRange(mr); 164 } 165 return mr; 166 } 167 168 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 169 _base(NULL), _cm(cm) 170 #ifdef ASSERT 171 , _drain_in_progress(false) 172 , _drain_in_progress_yields(false) 173 #endif 174 {} 175 176 void CMMarkStack::allocate(size_t size) { 177 _base = NEW_C_HEAP_ARRAY(oop, size); 178 if (_base == NULL) 179 vm_exit_during_initialization("Failed to allocate " 180 "CM region mark stack"); 181 _index = 0; 182 // QQQQ cast ... 183 _capacity = (jint) size; 184 _oops_do_bound = -1; 185 NOT_PRODUCT(_max_depth = 0); 186 } 187 188 CMMarkStack::~CMMarkStack() { 189 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); 190 } 191 192 void CMMarkStack::par_push(oop ptr) { 193 while (true) { 194 if (isFull()) { 195 _overflow = true; 196 return; 197 } 198 // Otherwise... 199 jint index = _index; 200 jint next_index = index+1; 201 jint res = Atomic::cmpxchg(next_index, &_index, index); 202 if (res == index) { 203 _base[index] = ptr; 204 // Note that we don't maintain this atomically. We could, but it 205 // doesn't seem necessary. 206 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 207 return; 208 } 209 // Otherwise, we need to try again. 210 } 211 } 212 213 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 214 while (true) { 215 if (isFull()) { 216 _overflow = true; 217 return; 218 } 219 // Otherwise... 220 jint index = _index; 221 jint next_index = index + n; 222 if (next_index > _capacity) { 223 _overflow = true; 224 return; 225 } 226 jint res = Atomic::cmpxchg(next_index, &_index, index); 227 if (res == index) { 228 for (int i = 0; i < n; i++) { 229 int ind = index + i; 230 assert(ind < _capacity, "By overflow test above."); 231 _base[ind] = ptr_arr[i]; 232 } 233 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 234 return; 235 } 236 // Otherwise, we need to try again. 237 } 238 } 239 240 241 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 242 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 243 jint start = _index; 244 jint next_index = start + n; 245 if (next_index > _capacity) { 246 _overflow = true; 247 return; 248 } 249 // Otherwise. 250 _index = next_index; 251 for (int i = 0; i < n; i++) { 252 int ind = start + i; 253 assert(ind < _capacity, "By overflow test above."); 254 _base[ind] = ptr_arr[i]; 255 } 256 } 257 258 259 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 260 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 261 jint index = _index; 262 if (index == 0) { 263 *n = 0; 264 return false; 265 } else { 266 int k = MIN2(max, index); 267 jint new_ind = index - k; 268 for (int j = 0; j < k; j++) { 269 ptr_arr[j] = _base[new_ind + j]; 270 } 271 _index = new_ind; 272 *n = k; 273 return true; 274 } 275 } 276 277 278 CMRegionStack::CMRegionStack() : _base(NULL) {} 279 280 void CMRegionStack::allocate(size_t size) { 281 _base = NEW_C_HEAP_ARRAY(MemRegion, size); 282 if (_base == NULL) 283 vm_exit_during_initialization("Failed to allocate " 284 "CM region mark stack"); 285 _index = 0; 286 // QQQQ cast ... 287 _capacity = (jint) size; 288 } 289 290 CMRegionStack::~CMRegionStack() { 291 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); 292 } 293 294 void CMRegionStack::push_lock_free(MemRegion mr) { 295 assert(mr.word_size() > 0, "Precondition"); 296 while (true) { 297 jint index = _index; 298 299 if (index >= _capacity) { 300 _overflow = true; 301 return; 302 } 303 // Otherwise... 304 jint next_index = index+1; 305 jint res = Atomic::cmpxchg(next_index, &_index, index); 306 if (res == index) { 307 _base[index] = mr; 308 return; 309 } 310 // Otherwise, we need to try again. 311 } 312 } 313 314 // Lock-free pop of the region stack. Called during the concurrent 315 // marking / remark phases. Should only be called in tandem with 316 // other lock-free pops. 317 MemRegion CMRegionStack::pop_lock_free() { 318 while (true) { 319 jint index = _index; 320 321 if (index == 0) { 322 return MemRegion(); 323 } 324 // Otherwise... 325 jint next_index = index-1; 326 jint res = Atomic::cmpxchg(next_index, &_index, index); 327 if (res == index) { 328 MemRegion mr = _base[next_index]; 329 if (mr.start() != NULL) { 330 assert(mr.end() != NULL, "invariant"); 331 assert(mr.word_size() > 0, "invariant"); 332 return mr; 333 } else { 334 // that entry was invalidated... let's skip it 335 assert(mr.end() == NULL, "invariant"); 336 } 337 } 338 // Otherwise, we need to try again. 339 } 340 } 341 342 #if 0 343 // The routines that manipulate the region stack with a lock are 344 // not currently used. They should be retained, however, as a 345 // diagnostic aid. 346 347 void CMRegionStack::push_with_lock(MemRegion mr) { 348 assert(mr.word_size() > 0, "Precondition"); 349 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 350 351 if (isFull()) { 352 _overflow = true; 353 return; 354 } 355 356 _base[_index] = mr; 357 _index += 1; 358 } 359 360 MemRegion CMRegionStack::pop_with_lock() { 361 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 362 363 while (true) { 364 if (_index == 0) { 365 return MemRegion(); 366 } 367 _index -= 1; 368 369 MemRegion mr = _base[_index]; 370 if (mr.start() != NULL) { 371 assert(mr.end() != NULL, "invariant"); 372 assert(mr.word_size() > 0, "invariant"); 373 return mr; 374 } else { 375 // that entry was invalidated... let's skip it 376 assert(mr.end() == NULL, "invariant"); 377 } 378 } 379 } 380 #endif 381 382 bool CMRegionStack::invalidate_entries_into_cset() { 383 bool result = false; 384 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 385 for (int i = 0; i < _oops_do_bound; ++i) { 386 MemRegion mr = _base[i]; 387 if (mr.start() != NULL) { 388 assert(mr.end() != NULL, "invariant"); 389 assert(mr.word_size() > 0, "invariant"); 390 HeapRegion* hr = g1h->heap_region_containing(mr.start()); 391 assert(hr != NULL, "invariant"); 392 if (hr->in_collection_set()) { 393 // The region points into the collection set 394 _base[i] = MemRegion(); 395 result = true; 396 } 397 } else { 398 // that entry was invalidated... let's skip it 399 assert(mr.end() == NULL, "invariant"); 400 } 401 } 402 return result; 403 } 404 405 template<class OopClosureClass> 406 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 407 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 408 || SafepointSynchronize::is_at_safepoint(), 409 "Drain recursion must be yield-safe."); 410 bool res = true; 411 debug_only(_drain_in_progress = true); 412 debug_only(_drain_in_progress_yields = yield_after); 413 while (!isEmpty()) { 414 oop newOop = pop(); 415 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 416 assert(newOop->is_oop(), "Expected an oop"); 417 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 418 "only grey objects on this stack"); 419 // iterate over the oops in this oop, marking and pushing 420 // the ones in CMS generation. 421 newOop->oop_iterate(cl); 422 if (yield_after && _cm->do_yield_check()) { 423 res = false; break; 424 } 425 } 426 debug_only(_drain_in_progress = false); 427 return res; 428 } 429 430 void CMMarkStack::oops_do(OopClosure* f) { 431 if (_index == 0) return; 432 assert(_oops_do_bound != -1 && _oops_do_bound <= _index, 433 "Bound must be set."); 434 for (int i = 0; i < _oops_do_bound; i++) { 435 f->do_oop(&_base[i]); 436 } 437 _oops_do_bound = -1; 438 } 439 440 bool ConcurrentMark::not_yet_marked(oop obj) const { 441 return (_g1h->is_obj_ill(obj) 442 || (_g1h->is_in_permanent(obj) 443 && !nextMarkBitMap()->isMarked((HeapWord*)obj))); 444 } 445 446 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 447 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 448 #endif // _MSC_VER 449 450 ConcurrentMark::ConcurrentMark(ReservedSpace rs, 451 int max_regions) : 452 _markBitMap1(rs, MinObjAlignment - 1), 453 _markBitMap2(rs, MinObjAlignment - 1), 454 455 _parallel_marking_threads(0), 456 _sleep_factor(0.0), 457 _marking_task_overhead(1.0), 458 _cleanup_sleep_factor(0.0), 459 _cleanup_task_overhead(1.0), 460 _region_bm(max_regions, false /* in_resource_area*/), 461 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 462 CardTableModRefBS::card_shift, 463 false /* in_resource_area*/), 464 _prevMarkBitMap(&_markBitMap1), 465 _nextMarkBitMap(&_markBitMap2), 466 _at_least_one_mark_complete(false), 467 468 _markStack(this), 469 _regionStack(), 470 // _finger set in set_non_marking_state 471 472 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)), 473 // _active_tasks set in set_non_marking_state 474 // _tasks set inside the constructor 475 _task_queues(new CMTaskQueueSet((int) _max_task_num)), 476 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 477 478 _has_overflown(false), 479 _concurrent(false), 480 _has_aborted(false), 481 _restart_for_overflow(false), 482 _concurrent_marking_in_progress(false), 483 _should_gray_objects(false), 484 485 // _verbose_level set below 486 487 _init_times(), 488 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 489 _cleanup_times(), 490 _total_counting_time(0.0), 491 _total_rs_scrub_time(0.0), 492 493 _parallel_workers(NULL) 494 { 495 CMVerboseLevel verbose_level = 496 (CMVerboseLevel) G1MarkingVerboseLevel; 497 if (verbose_level < no_verbose) 498 verbose_level = no_verbose; 499 if (verbose_level > high_verbose) 500 verbose_level = high_verbose; 501 _verbose_level = verbose_level; 502 503 if (verbose_low()) 504 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 505 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 506 507 _markStack.allocate(MarkStackSize); 508 _regionStack.allocate(G1MarkRegionStackSize); 509 510 // Create & start a ConcurrentMark thread. 511 _cmThread = new ConcurrentMarkThread(this); 512 assert(cmThread() != NULL, "CM Thread should have been created"); 513 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 514 515 _g1h = G1CollectedHeap::heap(); 516 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 517 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 518 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 519 520 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 521 satb_qs.set_buffer_size(G1SATBBufferSize); 522 523 int size = (int) MAX2(ParallelGCThreads, (size_t)1); 524 _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size); 525 for (int i = 0 ; i < size; i++) { 526 _par_cleanup_thread_state[i] = new ParCleanupThreadState; 527 } 528 529 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num); 530 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num); 531 532 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 533 _active_tasks = _max_task_num; 534 for (int i = 0; i < (int) _max_task_num; ++i) { 535 CMTaskQueue* task_queue = new CMTaskQueue(); 536 task_queue->initialize(); 537 _task_queues->register_queue(i, task_queue); 538 539 _tasks[i] = new CMTask(i, this, task_queue, _task_queues); 540 _accum_task_vtime[i] = 0.0; 541 } 542 543 if (ConcGCThreads > ParallelGCThreads) { 544 vm_exit_during_initialization("Can't have more ConcGCThreads " 545 "than ParallelGCThreads."); 546 } 547 if (ParallelGCThreads == 0) { 548 // if we are not running with any parallel GC threads we will not 549 // spawn any marking threads either 550 _parallel_marking_threads = 0; 551 _sleep_factor = 0.0; 552 _marking_task_overhead = 1.0; 553 } else { 554 if (ConcGCThreads > 0) { 555 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 556 // if both are set 557 558 _parallel_marking_threads = ConcGCThreads; 559 _sleep_factor = 0.0; 560 _marking_task_overhead = 1.0; 561 } else if (G1MarkingOverheadPercent > 0) { 562 // we will calculate the number of parallel marking threads 563 // based on a target overhead with respect to the soft real-time 564 // goal 565 566 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 567 double overall_cm_overhead = 568 (double) MaxGCPauseMillis * marking_overhead / 569 (double) GCPauseIntervalMillis; 570 double cpu_ratio = 1.0 / (double) os::processor_count(); 571 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 572 double marking_task_overhead = 573 overall_cm_overhead / marking_thread_num * 574 (double) os::processor_count(); 575 double sleep_factor = 576 (1.0 - marking_task_overhead) / marking_task_overhead; 577 578 _parallel_marking_threads = (size_t) marking_thread_num; 579 _sleep_factor = sleep_factor; 580 _marking_task_overhead = marking_task_overhead; 581 } else { 582 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1); 583 _sleep_factor = 0.0; 584 _marking_task_overhead = 1.0; 585 } 586 587 if (parallel_marking_threads() > 1) 588 _cleanup_task_overhead = 1.0; 589 else 590 _cleanup_task_overhead = marking_task_overhead(); 591 _cleanup_sleep_factor = 592 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 593 594 #if 0 595 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 596 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 597 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 598 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 599 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 600 #endif 601 602 guarantee(parallel_marking_threads() > 0, "peace of mind"); 603 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 604 (int) _parallel_marking_threads, false, true); 605 if (_parallel_workers == NULL) { 606 vm_exit_during_initialization("Failed necessary allocation."); 607 } else { 608 _parallel_workers->initialize_workers(); 609 } 610 } 611 612 // so that the call below can read a sensible value 613 _heap_start = (HeapWord*) rs.base(); 614 set_non_marking_state(); 615 } 616 617 void ConcurrentMark::update_g1_committed(bool force) { 618 // If concurrent marking is not in progress, then we do not need to 619 // update _heap_end. This has a subtle and important 620 // side-effect. Imagine that two evacuation pauses happen between 621 // marking completion and remark. The first one can grow the 622 // heap (hence now the finger is below the heap end). Then, the 623 // second one could unnecessarily push regions on the region 624 // stack. This causes the invariant that the region stack is empty 625 // at the beginning of remark to be false. By ensuring that we do 626 // not observe heap expansions after marking is complete, then we do 627 // not have this problem. 628 if (!concurrent_marking_in_progress() && !force) 629 return; 630 631 MemRegion committed = _g1h->g1_committed(); 632 assert(committed.start() == _heap_start, "start shouldn't change"); 633 HeapWord* new_end = committed.end(); 634 if (new_end > _heap_end) { 635 // The heap has been expanded. 636 637 _heap_end = new_end; 638 } 639 // Notice that the heap can also shrink. However, this only happens 640 // during a Full GC (at least currently) and the entire marking 641 // phase will bail out and the task will not be restarted. So, let's 642 // do nothing. 643 } 644 645 void ConcurrentMark::reset() { 646 // Starting values for these two. This should be called in a STW 647 // phase. CM will be notified of any future g1_committed expansions 648 // will be at the end of evacuation pauses, when tasks are 649 // inactive. 650 MemRegion committed = _g1h->g1_committed(); 651 _heap_start = committed.start(); 652 _heap_end = committed.end(); 653 654 // Separated the asserts so that we know which one fires. 655 assert(_heap_start != NULL, "heap bounds should look ok"); 656 assert(_heap_end != NULL, "heap bounds should look ok"); 657 assert(_heap_start < _heap_end, "heap bounds should look ok"); 658 659 // reset all the marking data structures and any necessary flags 660 clear_marking_state(); 661 662 if (verbose_low()) 663 gclog_or_tty->print_cr("[global] resetting"); 664 665 // We do reset all of them, since different phases will use 666 // different number of active threads. So, it's easiest to have all 667 // of them ready. 668 for (int i = 0; i < (int) _max_task_num; ++i) { 669 _tasks[i]->reset(_nextMarkBitMap); 670 } 671 672 // we need this to make sure that the flag is on during the evac 673 // pause with initial mark piggy-backed 674 set_concurrent_marking_in_progress(); 675 } 676 677 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) { 678 assert(active_tasks <= _max_task_num, "we should not have more"); 679 680 _active_tasks = active_tasks; 681 // Need to update the three data structures below according to the 682 // number of active threads for this phase. 683 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 684 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 685 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 686 687 _concurrent = concurrent; 688 // We propagate this to all tasks, not just the active ones. 689 for (int i = 0; i < (int) _max_task_num; ++i) 690 _tasks[i]->set_concurrent(concurrent); 691 692 if (concurrent) { 693 set_concurrent_marking_in_progress(); 694 } else { 695 // We currently assume that the concurrent flag has been set to 696 // false before we start remark. At this point we should also be 697 // in a STW phase. 698 assert(!concurrent_marking_in_progress(), "invariant"); 699 assert(_finger == _heap_end, "only way to get here"); 700 update_g1_committed(true); 701 } 702 } 703 704 void ConcurrentMark::set_non_marking_state() { 705 // We set the global marking state to some default values when we're 706 // not doing marking. 707 clear_marking_state(); 708 _active_tasks = 0; 709 clear_concurrent_marking_in_progress(); 710 } 711 712 ConcurrentMark::~ConcurrentMark() { 713 int size = (int) MAX2(ParallelGCThreads, (size_t)1); 714 for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i]; 715 FREE_C_HEAP_ARRAY(ParCleanupThreadState*, 716 _par_cleanup_thread_state); 717 718 for (int i = 0; i < (int) _max_task_num; ++i) { 719 delete _task_queues->queue(i); 720 delete _tasks[i]; 721 } 722 delete _task_queues; 723 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num); 724 } 725 726 // This closure is used to mark refs into the g1 generation 727 // from external roots in the CMS bit map. 728 // Called at the first checkpoint. 729 // 730 731 void ConcurrentMark::clearNextBitmap() { 732 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 733 G1CollectorPolicy* g1p = g1h->g1_policy(); 734 735 // Make sure that the concurrent mark thread looks to still be in 736 // the current cycle. 737 guarantee(cmThread()->during_cycle(), "invariant"); 738 739 // We are finishing up the current cycle by clearing the next 740 // marking bitmap and getting it ready for the next cycle. During 741 // this time no other cycle can start. So, let's make sure that this 742 // is the case. 743 guarantee(!g1h->mark_in_progress(), "invariant"); 744 745 // clear the mark bitmap (no grey objects to start with). 746 // We need to do this in chunks and offer to yield in between 747 // each chunk. 748 HeapWord* start = _nextMarkBitMap->startWord(); 749 HeapWord* end = _nextMarkBitMap->endWord(); 750 HeapWord* cur = start; 751 size_t chunkSize = M; 752 while (cur < end) { 753 HeapWord* next = cur + chunkSize; 754 if (next > end) 755 next = end; 756 MemRegion mr(cur,next); 757 _nextMarkBitMap->clearRange(mr); 758 cur = next; 759 do_yield_check(); 760 761 // Repeat the asserts from above. We'll do them as asserts here to 762 // minimize their overhead on the product. However, we'll have 763 // them as guarantees at the beginning / end of the bitmap 764 // clearing to get some checking in the product. 765 assert(cmThread()->during_cycle(), "invariant"); 766 assert(!g1h->mark_in_progress(), "invariant"); 767 } 768 769 // Repeat the asserts from above. 770 guarantee(cmThread()->during_cycle(), "invariant"); 771 guarantee(!g1h->mark_in_progress(), "invariant"); 772 } 773 774 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 775 public: 776 bool doHeapRegion(HeapRegion* r) { 777 if (!r->continuesHumongous()) { 778 r->note_start_of_marking(true); 779 } 780 return false; 781 } 782 }; 783 784 void ConcurrentMark::checkpointRootsInitialPre() { 785 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 786 G1CollectorPolicy* g1p = g1h->g1_policy(); 787 788 _has_aborted = false; 789 790 #ifndef PRODUCT 791 if (G1PrintReachableAtInitialMark) { 792 print_reachable("at-cycle-start", 793 true /* use_prev_marking */, true /* all */); 794 } 795 #endif 796 797 // Initialise marking structures. This has to be done in a STW phase. 798 reset(); 799 } 800 801 class CMMarkRootsClosure: public OopsInGenClosure { 802 private: 803 ConcurrentMark* _cm; 804 G1CollectedHeap* _g1h; 805 bool _do_barrier; 806 807 public: 808 CMMarkRootsClosure(ConcurrentMark* cm, 809 G1CollectedHeap* g1h, 810 bool do_barrier) : _cm(cm), _g1h(g1h), 811 _do_barrier(do_barrier) { } 812 813 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 814 virtual void do_oop( oop* p) { do_oop_work(p); } 815 816 template <class T> void do_oop_work(T* p) { 817 T heap_oop = oopDesc::load_heap_oop(p); 818 if (!oopDesc::is_null(heap_oop)) { 819 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); 820 assert(obj->is_oop() || obj->mark() == NULL, 821 "expected an oop, possibly with mark word displaced"); 822 HeapWord* addr = (HeapWord*)obj; 823 if (_g1h->is_in_g1_reserved(addr)) { 824 _cm->grayRoot(obj); 825 } 826 } 827 if (_do_barrier) { 828 assert(!_g1h->is_in_g1_reserved(p), 829 "Should be called on external roots"); 830 do_barrier(p); 831 } 832 } 833 }; 834 835 void ConcurrentMark::checkpointRootsInitialPost() { 836 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 837 838 // For each region note start of marking. 839 NoteStartOfMarkHRClosure startcl; 840 g1h->heap_region_iterate(&startcl); 841 842 // Start weak-reference discovery. 843 ReferenceProcessor* rp = g1h->ref_processor(); 844 rp->verify_no_references_recorded(); 845 rp->enable_discovery(); // enable ("weak") refs discovery 846 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 847 848 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 849 // This is the start of the marking cycle, we're expected all 850 // threads to have SATB queues with active set to false. 851 satb_mq_set.set_active_all_threads(true, /* new active value */ 852 false /* expected_active */); 853 854 // update_g1_committed() will be called at the end of an evac pause 855 // when marking is on. So, it's also called at the end of the 856 // initial-mark pause to update the heap end, if the heap expands 857 // during it. No need to call it here. 858 } 859 860 // Checkpoint the roots into this generation from outside 861 // this generation. [Note this initial checkpoint need only 862 // be approximate -- we'll do a catch up phase subsequently.] 863 void ConcurrentMark::checkpointRootsInitial() { 864 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped"); 865 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 866 867 double start = os::elapsedTime(); 868 869 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 870 g1p->record_concurrent_mark_init_start(); 871 checkpointRootsInitialPre(); 872 873 // YSR: when concurrent precleaning is in place, we'll 874 // need to clear the cached card table here 875 876 ResourceMark rm; 877 HandleMark hm; 878 879 g1h->ensure_parsability(false); 880 g1h->perm_gen()->save_marks(); 881 882 CMMarkRootsClosure notOlder(this, g1h, false); 883 CMMarkRootsClosure older(this, g1h, true); 884 885 g1h->set_marking_started(); 886 g1h->rem_set()->prepare_for_younger_refs_iterate(false); 887 888 g1h->process_strong_roots(true, // activate StrongRootsScope 889 false, // fake perm gen collection 890 SharedHeap::SO_AllClasses, 891 ¬Older, // Regular roots 892 NULL, // do not visit active blobs 893 &older // Perm Gen Roots 894 ); 895 checkpointRootsInitialPost(); 896 897 // Statistics. 898 double end = os::elapsedTime(); 899 _init_times.add((end - start) * 1000.0); 900 901 g1p->record_concurrent_mark_init_end(); 902 } 903 904 /* 905 Notice that in the next two methods, we actually leave the STS 906 during the barrier sync and join it immediately afterwards. If we 907 do not do this, this then the following deadlock can occur: one 908 thread could be in the barrier sync code, waiting for the other 909 thread to also sync up, whereas another one could be trying to 910 yield, while also waiting for the other threads to sync up too. 911 912 Because the thread that does the sync barrier has left the STS, it 913 is possible to be suspended for a Full GC or an evacuation pause 914 could occur. This is actually safe, since the entering the sync 915 barrier is one of the last things do_marking_step() does, and it 916 doesn't manipulate any data structures afterwards. 917 */ 918 919 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 920 if (verbose_low()) 921 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 922 923 ConcurrentGCThread::stsLeave(); 924 _first_overflow_barrier_sync.enter(); 925 ConcurrentGCThread::stsJoin(); 926 // at this point everyone should have synced up and not be doing any 927 // more work 928 929 if (verbose_low()) 930 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 931 932 // let task 0 do this 933 if (task_num == 0) { 934 // task 0 is responsible for clearing the global data structures 935 clear_marking_state(); 936 937 if (PrintGC) { 938 gclog_or_tty->date_stamp(PrintGCDateStamps); 939 gclog_or_tty->stamp(PrintGCTimeStamps); 940 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 941 } 942 } 943 944 // after this, each task should reset its own data structures then 945 // then go into the second barrier 946 } 947 948 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 949 if (verbose_low()) 950 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 951 952 ConcurrentGCThread::stsLeave(); 953 _second_overflow_barrier_sync.enter(); 954 ConcurrentGCThread::stsJoin(); 955 // at this point everything should be re-initialised and ready to go 956 957 if (verbose_low()) 958 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 959 } 960 961 void ConcurrentMark::grayRoot(oop p) { 962 HeapWord* addr = (HeapWord*) p; 963 // We can't really check against _heap_start and _heap_end, since it 964 // is possible during an evacuation pause with piggy-backed 965 // initial-mark that the committed space is expanded during the 966 // pause without CM observing this change. So the assertions below 967 // is a bit conservative; but better than nothing. 968 assert(_g1h->g1_committed().contains(addr), 969 "address should be within the heap bounds"); 970 971 if (!_nextMarkBitMap->isMarked(addr)) 972 _nextMarkBitMap->parMark(addr); 973 } 974 975 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) { 976 // The objects on the region have already been marked "in bulk" by 977 // the caller. We only need to decide whether to push the region on 978 // the region stack or not. 979 980 if (!concurrent_marking_in_progress() || !_should_gray_objects) 981 // We're done with marking and waiting for remark. We do not need to 982 // push anything else on the region stack. 983 return; 984 985 HeapWord* finger = _finger; 986 987 if (verbose_low()) 988 gclog_or_tty->print_cr("[global] attempting to push " 989 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at " 990 PTR_FORMAT, mr.start(), mr.end(), finger); 991 992 if (mr.start() < finger) { 993 // The finger is always heap region aligned and it is not possible 994 // for mr to span heap regions. 995 assert(mr.end() <= finger, "invariant"); 996 997 // Separated the asserts so that we know which one fires. 998 assert(mr.start() <= mr.end(), 999 "region boundaries should fall within the committed space"); 1000 assert(_heap_start <= mr.start(), 1001 "region boundaries should fall within the committed space"); 1002 assert(mr.end() <= _heap_end, 1003 "region boundaries should fall within the committed space"); 1004 if (verbose_low()) 1005 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") " 1006 "below the finger, pushing it", 1007 mr.start(), mr.end()); 1008 1009 if (!region_stack_push_lock_free(mr)) { 1010 if (verbose_low()) 1011 gclog_or_tty->print_cr("[global] region stack has overflown."); 1012 } 1013 } 1014 } 1015 1016 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) { 1017 // The object is not marked by the caller. We need to at least mark 1018 // it and maybe push in on the stack. 1019 1020 HeapWord* addr = (HeapWord*)p; 1021 if (!_nextMarkBitMap->isMarked(addr)) { 1022 // We definitely need to mark it, irrespective whether we bail out 1023 // because we're done with marking. 1024 if (_nextMarkBitMap->parMark(addr)) { 1025 if (!concurrent_marking_in_progress() || !_should_gray_objects) 1026 // If we're done with concurrent marking and we're waiting for 1027 // remark, then we're not pushing anything on the stack. 1028 return; 1029 1030 // No OrderAccess:store_load() is needed. It is implicit in the 1031 // CAS done in parMark(addr) above 1032 HeapWord* finger = _finger; 1033 1034 if (addr < finger) { 1035 if (!mark_stack_push(oop(addr))) { 1036 if (verbose_low()) 1037 gclog_or_tty->print_cr("[global] global stack overflow " 1038 "during parMark"); 1039 } 1040 } 1041 } 1042 } 1043 } 1044 1045 class CMConcurrentMarkingTask: public AbstractGangTask { 1046 private: 1047 ConcurrentMark* _cm; 1048 ConcurrentMarkThread* _cmt; 1049 1050 public: 1051 void work(int worker_i) { 1052 assert(Thread::current()->is_ConcurrentGC_thread(), 1053 "this should only be done by a conc GC thread"); 1054 1055 double start_vtime = os::elapsedVTime(); 1056 1057 ConcurrentGCThread::stsJoin(); 1058 1059 assert((size_t) worker_i < _cm->active_tasks(), "invariant"); 1060 CMTask* the_task = _cm->task(worker_i); 1061 the_task->record_start_time(); 1062 if (!_cm->has_aborted()) { 1063 do { 1064 double start_vtime_sec = os::elapsedVTime(); 1065 double start_time_sec = os::elapsedTime(); 1066 the_task->do_marking_step(10.0); 1067 double end_time_sec = os::elapsedTime(); 1068 double end_vtime_sec = os::elapsedVTime(); 1069 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1070 double elapsed_time_sec = end_time_sec - start_time_sec; 1071 _cm->clear_has_overflown(); 1072 1073 bool ret = _cm->do_yield_check(worker_i); 1074 1075 jlong sleep_time_ms; 1076 if (!_cm->has_aborted() && the_task->has_aborted()) { 1077 sleep_time_ms = 1078 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1079 ConcurrentGCThread::stsLeave(); 1080 os::sleep(Thread::current(), sleep_time_ms, false); 1081 ConcurrentGCThread::stsJoin(); 1082 } 1083 double end_time2_sec = os::elapsedTime(); 1084 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1085 1086 #if 0 1087 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1088 "overhead %1.4lf", 1089 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1090 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1091 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1092 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1093 #endif 1094 } while (!_cm->has_aborted() && the_task->has_aborted()); 1095 } 1096 the_task->record_end_time(); 1097 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1098 1099 ConcurrentGCThread::stsLeave(); 1100 1101 double end_vtime = os::elapsedVTime(); 1102 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime); 1103 } 1104 1105 CMConcurrentMarkingTask(ConcurrentMark* cm, 1106 ConcurrentMarkThread* cmt) : 1107 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1108 1109 ~CMConcurrentMarkingTask() { } 1110 }; 1111 1112 void ConcurrentMark::markFromRoots() { 1113 // we might be tempted to assert that: 1114 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1115 // "inconsistent argument?"); 1116 // However that wouldn't be right, because it's possible that 1117 // a safepoint is indeed in progress as a younger generation 1118 // stop-the-world GC happens even as we mark in this generation. 1119 1120 _restart_for_overflow = false; 1121 1122 set_phase(MAX2((size_t) 1, parallel_marking_threads()), true); 1123 1124 CMConcurrentMarkingTask markingTask(this, cmThread()); 1125 if (parallel_marking_threads() > 0) 1126 _parallel_workers->run_task(&markingTask); 1127 else 1128 markingTask.work(0); 1129 print_stats(); 1130 } 1131 1132 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1133 // world is stopped at this checkpoint 1134 assert(SafepointSynchronize::is_at_safepoint(), 1135 "world should be stopped"); 1136 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1137 1138 // If a full collection has happened, we shouldn't do this. 1139 if (has_aborted()) { 1140 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1141 return; 1142 } 1143 1144 if (VerifyDuringGC) { 1145 HandleMark hm; // handle scope 1146 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1147 Universe::heap()->prepare_for_verify(); 1148 Universe::verify(true, false, true); 1149 } 1150 1151 G1CollectorPolicy* g1p = g1h->g1_policy(); 1152 g1p->record_concurrent_mark_remark_start(); 1153 1154 double start = os::elapsedTime(); 1155 1156 checkpointRootsFinalWork(); 1157 1158 double mark_work_end = os::elapsedTime(); 1159 1160 weakRefsWork(clear_all_soft_refs); 1161 1162 if (has_overflown()) { 1163 // Oops. We overflowed. Restart concurrent marking. 1164 _restart_for_overflow = true; 1165 // Clear the flag. We do not need it any more. 1166 clear_has_overflown(); 1167 if (G1TraceMarkStackOverflow) 1168 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1169 } else { 1170 // We're done with marking. 1171 // This is the end of the marking cycle, we're expected all 1172 // threads to have SATB queues with active set to true. 1173 JavaThread::satb_mark_queue_set().set_active_all_threads( 1174 false, /* new active value */ 1175 true /* expected_active */); 1176 1177 if (VerifyDuringGC) { 1178 HandleMark hm; // handle scope 1179 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1180 Universe::heap()->prepare_for_verify(); 1181 Universe::heap()->verify(/* allow_dirty */ true, 1182 /* silent */ false, 1183 /* use_prev_marking */ false); 1184 } 1185 } 1186 1187 #if VERIFY_OBJS_PROCESSED 1188 _scan_obj_cl.objs_processed = 0; 1189 ThreadLocalObjQueue::objs_enqueued = 0; 1190 #endif 1191 1192 // Statistics 1193 double now = os::elapsedTime(); 1194 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1195 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1196 _remark_times.add((now - start) * 1000.0); 1197 1198 g1p->record_concurrent_mark_remark_end(); 1199 } 1200 1201 1202 #define CARD_BM_TEST_MODE 0 1203 1204 class CalcLiveObjectsClosure: public HeapRegionClosure { 1205 1206 CMBitMapRO* _bm; 1207 ConcurrentMark* _cm; 1208 bool _changed; 1209 bool _yield; 1210 size_t _words_done; 1211 size_t _tot_live; 1212 size_t _tot_used; 1213 size_t _regions_done; 1214 double _start_vtime_sec; 1215 1216 BitMap* _region_bm; 1217 BitMap* _card_bm; 1218 intptr_t _bottom_card_num; 1219 bool _final; 1220 1221 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { 1222 for (intptr_t i = start_card_num; i <= last_card_num; i++) { 1223 #if CARD_BM_TEST_MODE 1224 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set."); 1225 #else 1226 _card_bm->par_at_put(i - _bottom_card_num, 1); 1227 #endif 1228 } 1229 } 1230 1231 public: 1232 CalcLiveObjectsClosure(bool final, 1233 CMBitMapRO *bm, ConcurrentMark *cm, 1234 BitMap* region_bm, BitMap* card_bm) : 1235 _bm(bm), _cm(cm), _changed(false), _yield(true), 1236 _words_done(0), _tot_live(0), _tot_used(0), 1237 _region_bm(region_bm), _card_bm(card_bm),_final(final), 1238 _regions_done(0), _start_vtime_sec(0.0) 1239 { 1240 _bottom_card_num = 1241 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> 1242 CardTableModRefBS::card_shift); 1243 } 1244 1245 // It takes a region that's not empty (i.e., it has at least one 1246 // live object in it and sets its corresponding bit on the region 1247 // bitmap to 1. If the region is "starts humongous" it will also set 1248 // to 1 the bits on the region bitmap that correspond to its 1249 // associated "continues humongous" regions. 1250 void set_bit_for_region(HeapRegion* hr) { 1251 assert(!hr->continuesHumongous(), "should have filtered those out"); 1252 1253 size_t index = hr->hrs_index(); 1254 if (!hr->startsHumongous()) { 1255 // Normal (non-humongous) case: just set the bit. 1256 _region_bm->par_at_put((BitMap::idx_t) index, true); 1257 } else { 1258 // Starts humongous case: calculate how many regions are part of 1259 // this humongous region and then set the bit range. It might 1260 // have been a bit more efficient to look at the object that 1261 // spans these humongous regions to calculate their number from 1262 // the object's size. However, it's a good idea to calculate 1263 // this based on the metadata itself, and not the region 1264 // contents, so that this code is not aware of what goes into 1265 // the humongous regions (in case this changes in the future). 1266 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1267 size_t end_index = index + 1; 1268 while (end_index < g1h->n_regions()) { 1269 HeapRegion* chr = g1h->region_at(end_index); 1270 if (!chr->continuesHumongous()) { 1271 break; 1272 } 1273 end_index += 1; 1274 } 1275 _region_bm->par_at_put_range((BitMap::idx_t) index, 1276 (BitMap::idx_t) end_index, true); 1277 } 1278 } 1279 1280 bool doHeapRegion(HeapRegion* hr) { 1281 if (!_final && _regions_done == 0) 1282 _start_vtime_sec = os::elapsedVTime(); 1283 1284 if (hr->continuesHumongous()) { 1285 // We will ignore these here and process them when their 1286 // associated "starts humongous" region is processed (see 1287 // set_bit_for_heap_region()). Note that we cannot rely on their 1288 // associated "starts humongous" region to have their bit set to 1289 // 1 since, due to the region chunking in the parallel region 1290 // iteration, a "continues humongous" region might be visited 1291 // before its associated "starts humongous". 1292 return false; 1293 } 1294 1295 HeapWord* nextTop = hr->next_top_at_mark_start(); 1296 HeapWord* start = hr->top_at_conc_mark_count(); 1297 assert(hr->bottom() <= start && start <= hr->end() && 1298 hr->bottom() <= nextTop && nextTop <= hr->end() && 1299 start <= nextTop, 1300 "Preconditions."); 1301 // Otherwise, record the number of word's we'll examine. 1302 size_t words_done = (nextTop - start); 1303 // Find the first marked object at or after "start". 1304 start = _bm->getNextMarkedWordAddress(start, nextTop); 1305 size_t marked_bytes = 0; 1306 1307 // Below, the term "card num" means the result of shifting an address 1308 // by the card shift -- address 0 corresponds to card number 0. One 1309 // must subtract the card num of the bottom of the heap to obtain a 1310 // card table index. 1311 // The first card num of the sequence of live cards currently being 1312 // constructed. -1 ==> no sequence. 1313 intptr_t start_card_num = -1; 1314 // The last card num of the sequence of live cards currently being 1315 // constructed. -1 ==> no sequence. 1316 intptr_t last_card_num = -1; 1317 1318 while (start < nextTop) { 1319 if (_yield && _cm->do_yield_check()) { 1320 // We yielded. It might be for a full collection, in which case 1321 // all bets are off; terminate the traversal. 1322 if (_cm->has_aborted()) { 1323 _changed = false; 1324 return true; 1325 } else { 1326 // Otherwise, it might be a collection pause, and the region 1327 // we're looking at might be in the collection set. We'll 1328 // abandon this region. 1329 return false; 1330 } 1331 } 1332 oop obj = oop(start); 1333 int obj_sz = obj->size(); 1334 // The card num of the start of the current object. 1335 intptr_t obj_card_num = 1336 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); 1337 1338 HeapWord* obj_last = start + obj_sz - 1; 1339 intptr_t obj_last_card_num = 1340 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift); 1341 1342 if (obj_card_num != last_card_num) { 1343 if (start_card_num == -1) { 1344 assert(last_card_num == -1, "Both or neither."); 1345 start_card_num = obj_card_num; 1346 } else { 1347 assert(last_card_num != -1, "Both or neither."); 1348 assert(obj_card_num >= last_card_num, "Inv"); 1349 if ((obj_card_num - last_card_num) > 1) { 1350 // Mark the last run, and start a new one. 1351 mark_card_num_range(start_card_num, last_card_num); 1352 start_card_num = obj_card_num; 1353 } 1354 } 1355 #if CARD_BM_TEST_MODE 1356 /* 1357 gclog_or_tty->print_cr("Setting bits from %d/%d.", 1358 obj_card_num - _bottom_card_num, 1359 obj_last_card_num - _bottom_card_num); 1360 */ 1361 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) { 1362 _card_bm->par_at_put(j - _bottom_card_num, 1); 1363 } 1364 #endif 1365 } 1366 // In any case, we set the last card num. 1367 last_card_num = obj_last_card_num; 1368 1369 marked_bytes += (size_t)obj_sz * HeapWordSize; 1370 // Find the next marked object after this one. 1371 start = _bm->getNextMarkedWordAddress(start + 1, nextTop); 1372 _changed = true; 1373 } 1374 // Handle the last range, if any. 1375 if (start_card_num != -1) 1376 mark_card_num_range(start_card_num, last_card_num); 1377 if (_final) { 1378 // Mark the allocated-since-marking portion... 1379 HeapWord* tp = hr->top(); 1380 if (nextTop < tp) { 1381 start_card_num = 1382 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift); 1383 last_card_num = 1384 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift); 1385 mark_card_num_range(start_card_num, last_card_num); 1386 // This definitely means the region has live objects. 1387 set_bit_for_region(hr); 1388 } 1389 } 1390 1391 hr->add_to_marked_bytes(marked_bytes); 1392 // Update the live region bitmap. 1393 if (marked_bytes > 0) { 1394 set_bit_for_region(hr); 1395 } 1396 hr->set_top_at_conc_mark_count(nextTop); 1397 _tot_live += hr->next_live_bytes(); 1398 _tot_used += hr->used(); 1399 _words_done = words_done; 1400 1401 if (!_final) { 1402 ++_regions_done; 1403 if (_regions_done % 10 == 0) { 1404 double end_vtime_sec = os::elapsedVTime(); 1405 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec; 1406 if (elapsed_vtime_sec > (10.0 / 1000.0)) { 1407 jlong sleep_time_ms = 1408 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0); 1409 os::sleep(Thread::current(), sleep_time_ms, false); 1410 _start_vtime_sec = end_vtime_sec; 1411 } 1412 } 1413 } 1414 1415 return false; 1416 } 1417 1418 bool changed() { return _changed; } 1419 void reset() { _changed = false; _words_done = 0; } 1420 void no_yield() { _yield = false; } 1421 size_t words_done() { return _words_done; } 1422 size_t tot_live() { return _tot_live; } 1423 size_t tot_used() { return _tot_used; } 1424 }; 1425 1426 1427 void ConcurrentMark::calcDesiredRegions() { 1428 _region_bm.clear(); 1429 _card_bm.clear(); 1430 CalcLiveObjectsClosure calccl(false /*final*/, 1431 nextMarkBitMap(), this, 1432 &_region_bm, &_card_bm); 1433 G1CollectedHeap *g1h = G1CollectedHeap::heap(); 1434 g1h->heap_region_iterate(&calccl); 1435 1436 do { 1437 calccl.reset(); 1438 g1h->heap_region_iterate(&calccl); 1439 } while (calccl.changed()); 1440 } 1441 1442 class G1ParFinalCountTask: public AbstractGangTask { 1443 protected: 1444 G1CollectedHeap* _g1h; 1445 CMBitMap* _bm; 1446 size_t _n_workers; 1447 size_t *_live_bytes; 1448 size_t *_used_bytes; 1449 BitMap* _region_bm; 1450 BitMap* _card_bm; 1451 public: 1452 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm, 1453 BitMap* region_bm, BitMap* card_bm) : 1454 AbstractGangTask("G1 final counting"), _g1h(g1h), 1455 _bm(bm), _region_bm(region_bm), _card_bm(card_bm) 1456 { 1457 if (ParallelGCThreads > 0) 1458 _n_workers = _g1h->workers()->total_workers(); 1459 else 1460 _n_workers = 1; 1461 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1462 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1463 } 1464 1465 ~G1ParFinalCountTask() { 1466 FREE_C_HEAP_ARRAY(size_t, _live_bytes); 1467 FREE_C_HEAP_ARRAY(size_t, _used_bytes); 1468 } 1469 1470 void work(int i) { 1471 CalcLiveObjectsClosure calccl(true /*final*/, 1472 _bm, _g1h->concurrent_mark(), 1473 _region_bm, _card_bm); 1474 calccl.no_yield(); 1475 if (G1CollectedHeap::use_parallel_gc_threads()) { 1476 _g1h->heap_region_par_iterate_chunked(&calccl, i, 1477 HeapRegion::FinalCountClaimValue); 1478 } else { 1479 _g1h->heap_region_iterate(&calccl); 1480 } 1481 assert(calccl.complete(), "Shouldn't have yielded!"); 1482 1483 assert((size_t) i < _n_workers, "invariant"); 1484 _live_bytes[i] = calccl.tot_live(); 1485 _used_bytes[i] = calccl.tot_used(); 1486 } 1487 size_t live_bytes() { 1488 size_t live_bytes = 0; 1489 for (size_t i = 0; i < _n_workers; ++i) 1490 live_bytes += _live_bytes[i]; 1491 return live_bytes; 1492 } 1493 size_t used_bytes() { 1494 size_t used_bytes = 0; 1495 for (size_t i = 0; i < _n_workers; ++i) 1496 used_bytes += _used_bytes[i]; 1497 return used_bytes; 1498 } 1499 }; 1500 1501 class G1ParNoteEndTask; 1502 1503 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1504 G1CollectedHeap* _g1; 1505 int _worker_num; 1506 size_t _max_live_bytes; 1507 size_t _regions_claimed; 1508 size_t _freed_bytes; 1509 size_t _cleared_h_regions; 1510 size_t _freed_regions; 1511 UncleanRegionList* _unclean_region_list; 1512 double _claimed_region_time; 1513 double _max_region_time; 1514 1515 public: 1516 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1517 UncleanRegionList* list, 1518 int worker_num); 1519 size_t freed_bytes() { return _freed_bytes; } 1520 size_t cleared_h_regions() { return _cleared_h_regions; } 1521 size_t freed_regions() { return _freed_regions; } 1522 UncleanRegionList* unclean_region_list() { 1523 return _unclean_region_list; 1524 } 1525 1526 bool doHeapRegion(HeapRegion *r); 1527 1528 size_t max_live_bytes() { return _max_live_bytes; } 1529 size_t regions_claimed() { return _regions_claimed; } 1530 double claimed_region_time_sec() { return _claimed_region_time; } 1531 double max_region_time_sec() { return _max_region_time; } 1532 }; 1533 1534 class G1ParNoteEndTask: public AbstractGangTask { 1535 friend class G1NoteEndOfConcMarkClosure; 1536 protected: 1537 G1CollectedHeap* _g1h; 1538 size_t _max_live_bytes; 1539 size_t _freed_bytes; 1540 ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state; 1541 public: 1542 G1ParNoteEndTask(G1CollectedHeap* g1h, 1543 ConcurrentMark::ParCleanupThreadState** 1544 par_cleanup_thread_state) : 1545 AbstractGangTask("G1 note end"), _g1h(g1h), 1546 _max_live_bytes(0), _freed_bytes(0), 1547 _par_cleanup_thread_state(par_cleanup_thread_state) 1548 {} 1549 1550 void work(int i) { 1551 double start = os::elapsedTime(); 1552 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, 1553 &_par_cleanup_thread_state[i]->list, 1554 i); 1555 if (G1CollectedHeap::use_parallel_gc_threads()) { 1556 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, 1557 HeapRegion::NoteEndClaimValue); 1558 } else { 1559 _g1h->heap_region_iterate(&g1_note_end); 1560 } 1561 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1562 1563 // Now finish up freeing the current thread's regions. 1564 _g1h->finish_free_region_work(g1_note_end.freed_bytes(), 1565 g1_note_end.cleared_h_regions(), 1566 0, NULL); 1567 { 1568 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1569 _max_live_bytes += g1_note_end.max_live_bytes(); 1570 _freed_bytes += g1_note_end.freed_bytes(); 1571 } 1572 double end = os::elapsedTime(); 1573 if (G1PrintParCleanupStats) { 1574 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] " 1575 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n", 1576 i, start, end, (end-start)*1000.0, 1577 g1_note_end.regions_claimed(), 1578 g1_note_end.claimed_region_time_sec()*1000.0, 1579 g1_note_end.max_region_time_sec()*1000.0); 1580 } 1581 } 1582 size_t max_live_bytes() { return _max_live_bytes; } 1583 size_t freed_bytes() { return _freed_bytes; } 1584 }; 1585 1586 class G1ParScrubRemSetTask: public AbstractGangTask { 1587 protected: 1588 G1RemSet* _g1rs; 1589 BitMap* _region_bm; 1590 BitMap* _card_bm; 1591 public: 1592 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1593 BitMap* region_bm, BitMap* card_bm) : 1594 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1595 _region_bm(region_bm), _card_bm(card_bm) 1596 {} 1597 1598 void work(int i) { 1599 if (G1CollectedHeap::use_parallel_gc_threads()) { 1600 _g1rs->scrub_par(_region_bm, _card_bm, i, 1601 HeapRegion::ScrubRemSetClaimValue); 1602 } else { 1603 _g1rs->scrub(_region_bm, _card_bm); 1604 } 1605 } 1606 1607 }; 1608 1609 G1NoteEndOfConcMarkClosure:: 1610 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1611 UncleanRegionList* list, 1612 int worker_num) 1613 : _g1(g1), _worker_num(worker_num), 1614 _max_live_bytes(0), _regions_claimed(0), 1615 _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0), 1616 _claimed_region_time(0.0), _max_region_time(0.0), 1617 _unclean_region_list(list) 1618 {} 1619 1620 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) { 1621 // We use a claim value of zero here because all regions 1622 // were claimed with value 1 in the FinalCount task. 1623 r->reset_gc_time_stamp(); 1624 if (!r->continuesHumongous()) { 1625 double start = os::elapsedTime(); 1626 _regions_claimed++; 1627 r->note_end_of_marking(); 1628 _max_live_bytes += r->max_live_bytes(); 1629 _g1->free_region_if_totally_empty_work(r, 1630 _freed_bytes, 1631 _cleared_h_regions, 1632 _freed_regions, 1633 _unclean_region_list, 1634 true /*par*/); 1635 double region_time = (os::elapsedTime() - start); 1636 _claimed_region_time += region_time; 1637 if (region_time > _max_region_time) _max_region_time = region_time; 1638 } 1639 return false; 1640 } 1641 1642 void ConcurrentMark::cleanup() { 1643 // world is stopped at this checkpoint 1644 assert(SafepointSynchronize::is_at_safepoint(), 1645 "world should be stopped"); 1646 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1647 1648 // If a full collection has happened, we shouldn't do this. 1649 if (has_aborted()) { 1650 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1651 return; 1652 } 1653 1654 if (VerifyDuringGC) { 1655 HandleMark hm; // handle scope 1656 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1657 Universe::heap()->prepare_for_verify(); 1658 Universe::verify(/* allow dirty */ true, 1659 /* silent */ false, 1660 /* prev marking */ true); 1661 } 1662 1663 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1664 g1p->record_concurrent_mark_cleanup_start(); 1665 1666 double start = os::elapsedTime(); 1667 1668 // Do counting once more with the world stopped for good measure. 1669 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), 1670 &_region_bm, &_card_bm); 1671 if (G1CollectedHeap::use_parallel_gc_threads()) { 1672 assert(g1h->check_heap_region_claim_values( 1673 HeapRegion::InitialClaimValue), 1674 "sanity check"); 1675 1676 int n_workers = g1h->workers()->total_workers(); 1677 g1h->set_par_threads(n_workers); 1678 g1h->workers()->run_task(&g1_par_count_task); 1679 g1h->set_par_threads(0); 1680 1681 assert(g1h->check_heap_region_claim_values( 1682 HeapRegion::FinalCountClaimValue), 1683 "sanity check"); 1684 } else { 1685 g1_par_count_task.work(0); 1686 } 1687 1688 size_t known_garbage_bytes = 1689 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes(); 1690 #if 0 1691 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf", 1692 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024), 1693 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024), 1694 (double) known_garbage_bytes / (double) (1024 * 1024)); 1695 #endif // 0 1696 g1p->set_known_garbage_bytes(known_garbage_bytes); 1697 1698 size_t start_used_bytes = g1h->used(); 1699 _at_least_one_mark_complete = true; 1700 g1h->set_marking_complete(); 1701 1702 double count_end = os::elapsedTime(); 1703 double this_final_counting_time = (count_end - start); 1704 if (G1PrintParCleanupStats) { 1705 gclog_or_tty->print_cr("Cleanup:"); 1706 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms", 1707 this_final_counting_time*1000.0); 1708 } 1709 _total_counting_time += this_final_counting_time; 1710 1711 // Install newly created mark bitMap as "prev". 1712 swapMarkBitMaps(); 1713 1714 g1h->reset_gc_time_stamp(); 1715 1716 // Note end of marking in all heap regions. 1717 double note_end_start = os::elapsedTime(); 1718 G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state); 1719 if (G1CollectedHeap::use_parallel_gc_threads()) { 1720 int n_workers = g1h->workers()->total_workers(); 1721 g1h->set_par_threads(n_workers); 1722 g1h->workers()->run_task(&g1_par_note_end_task); 1723 g1h->set_par_threads(0); 1724 1725 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 1726 "sanity check"); 1727 } else { 1728 g1_par_note_end_task.work(0); 1729 } 1730 g1h->set_unclean_regions_coming(true); 1731 double note_end_end = os::elapsedTime(); 1732 // Tell the mutators that there might be unclean regions coming... 1733 if (G1PrintParCleanupStats) { 1734 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.", 1735 (note_end_end - note_end_start)*1000.0); 1736 } 1737 1738 1739 // call below, since it affects the metric by which we sort the heap 1740 // regions. 1741 if (G1ScrubRemSets) { 1742 double rs_scrub_start = os::elapsedTime(); 1743 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1744 if (G1CollectedHeap::use_parallel_gc_threads()) { 1745 int n_workers = g1h->workers()->total_workers(); 1746 g1h->set_par_threads(n_workers); 1747 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1748 g1h->set_par_threads(0); 1749 1750 assert(g1h->check_heap_region_claim_values( 1751 HeapRegion::ScrubRemSetClaimValue), 1752 "sanity check"); 1753 } else { 1754 g1_par_scrub_rs_task.work(0); 1755 } 1756 1757 double rs_scrub_end = os::elapsedTime(); 1758 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1759 _total_rs_scrub_time += this_rs_scrub_time; 1760 } 1761 1762 // this will also free any regions totally full of garbage objects, 1763 // and sort the regions. 1764 g1h->g1_policy()->record_concurrent_mark_cleanup_end( 1765 g1_par_note_end_task.freed_bytes(), 1766 g1_par_note_end_task.max_live_bytes()); 1767 1768 // Statistics. 1769 double end = os::elapsedTime(); 1770 _cleanup_times.add((end - start) * 1000.0); 1771 1772 // G1CollectedHeap::heap()->print(); 1773 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d", 1774 // G1CollectedHeap::heap()->get_gc_time_stamp()); 1775 1776 if (PrintGC || PrintGCDetails) { 1777 g1h->print_size_transition(gclog_or_tty, 1778 start_used_bytes, 1779 g1h->used(), 1780 g1h->capacity()); 1781 } 1782 1783 size_t cleaned_up_bytes = start_used_bytes - g1h->used(); 1784 g1p->decrease_known_garbage_bytes(cleaned_up_bytes); 1785 1786 // We need to make this be a "collection" so any collection pause that 1787 // races with it goes around and waits for completeCleanup to finish. 1788 g1h->increment_total_collections(); 1789 1790 if (VerifyDuringGC) { 1791 HandleMark hm; // handle scope 1792 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1793 Universe::heap()->prepare_for_verify(); 1794 Universe::verify(/* allow dirty */ true, 1795 /* silent */ false, 1796 /* prev marking */ true); 1797 } 1798 } 1799 1800 void ConcurrentMark::completeCleanup() { 1801 // A full collection intervened. 1802 if (has_aborted()) return; 1803 1804 int first = 0; 1805 int last = (int)MAX2(ParallelGCThreads, (size_t)1); 1806 for (int t = 0; t < last; t++) { 1807 UncleanRegionList* list = &_par_cleanup_thread_state[t]->list; 1808 assert(list->well_formed(), "Inv"); 1809 HeapRegion* hd = list->hd(); 1810 while (hd != NULL) { 1811 // Now finish up the other stuff. 1812 hd->rem_set()->clear(); 1813 HeapRegion* next_hd = hd->next_from_unclean_list(); 1814 (void)list->pop(); 1815 assert(list->hd() == next_hd, "how not?"); 1816 _g1h->put_region_on_unclean_list(hd); 1817 if (!hd->isHumongous()) { 1818 // Add this to the _free_regions count by 1. 1819 _g1h->finish_free_region_work(0, 0, 1, NULL); 1820 } 1821 hd = list->hd(); 1822 assert(hd == next_hd, "how not?"); 1823 } 1824 } 1825 } 1826 1827 1828 class G1CMIsAliveClosure: public BoolObjectClosure { 1829 G1CollectedHeap* _g1; 1830 public: 1831 G1CMIsAliveClosure(G1CollectedHeap* g1) : 1832 _g1(g1) 1833 {} 1834 1835 void do_object(oop obj) { 1836 assert(false, "not to be invoked"); 1837 } 1838 bool do_object_b(oop obj) { 1839 HeapWord* addr = (HeapWord*)obj; 1840 return addr != NULL && 1841 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1842 } 1843 }; 1844 1845 class G1CMKeepAliveClosure: public OopClosure { 1846 G1CollectedHeap* _g1; 1847 ConcurrentMark* _cm; 1848 CMBitMap* _bitMap; 1849 public: 1850 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm, 1851 CMBitMap* bitMap) : 1852 _g1(g1), _cm(cm), 1853 _bitMap(bitMap) {} 1854 1855 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1856 virtual void do_oop( oop* p) { do_oop_work(p); } 1857 1858 template <class T> void do_oop_work(T* p) { 1859 oop thisOop = oopDesc::load_decode_heap_oop(p); 1860 HeapWord* addr = (HeapWord*)thisOop; 1861 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) { 1862 _bitMap->mark(addr); 1863 _cm->mark_stack_push(thisOop); 1864 } 1865 } 1866 }; 1867 1868 class G1CMDrainMarkingStackClosure: public VoidClosure { 1869 CMMarkStack* _markStack; 1870 CMBitMap* _bitMap; 1871 G1CMKeepAliveClosure* _oopClosure; 1872 public: 1873 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack, 1874 G1CMKeepAliveClosure* oopClosure) : 1875 _bitMap(bitMap), 1876 _markStack(markStack), 1877 _oopClosure(oopClosure) 1878 {} 1879 1880 void do_void() { 1881 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false); 1882 } 1883 }; 1884 1885 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 1886 ResourceMark rm; 1887 HandleMark hm; 1888 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1889 ReferenceProcessor* rp = g1h->ref_processor(); 1890 1891 // Process weak references. 1892 rp->setup_policy(clear_all_soft_refs); 1893 assert(_markStack.isEmpty(), "mark stack should be empty"); 1894 1895 G1CMIsAliveClosure g1IsAliveClosure (g1h); 1896 G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap()); 1897 G1CMDrainMarkingStackClosure 1898 g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack, 1899 &g1KeepAliveClosure); 1900 1901 // XXXYYY Also: copy the parallel ref processing code from CMS. 1902 rp->process_discovered_references(&g1IsAliveClosure, 1903 &g1KeepAliveClosure, 1904 &g1DrainMarkingStackClosure, 1905 NULL); 1906 assert(_markStack.overflow() || _markStack.isEmpty(), 1907 "mark stack should be empty (unless it overflowed)"); 1908 if (_markStack.overflow()) { 1909 set_has_overflown(); 1910 } 1911 1912 rp->enqueue_discovered_references(); 1913 rp->verify_no_references_recorded(); 1914 assert(!rp->discovery_enabled(), "should have been disabled"); 1915 1916 // Now clean up stale oops in SymbolTable and StringTable 1917 SymbolTable::unlink(&g1IsAliveClosure); 1918 StringTable::unlink(&g1IsAliveClosure); 1919 } 1920 1921 void ConcurrentMark::swapMarkBitMaps() { 1922 CMBitMapRO* temp = _prevMarkBitMap; 1923 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 1924 _nextMarkBitMap = (CMBitMap*) temp; 1925 } 1926 1927 class CMRemarkTask: public AbstractGangTask { 1928 private: 1929 ConcurrentMark *_cm; 1930 1931 public: 1932 void work(int worker_i) { 1933 // Since all available tasks are actually started, we should 1934 // only proceed if we're supposed to be actived. 1935 if ((size_t)worker_i < _cm->active_tasks()) { 1936 CMTask* task = _cm->task(worker_i); 1937 task->record_start_time(); 1938 do { 1939 task->do_marking_step(1000000000.0 /* something very large */); 1940 } while (task->has_aborted() && !_cm->has_overflown()); 1941 // If we overflow, then we do not want to restart. We instead 1942 // want to abort remark and do concurrent marking again. 1943 task->record_end_time(); 1944 } 1945 } 1946 1947 CMRemarkTask(ConcurrentMark* cm) : 1948 AbstractGangTask("Par Remark"), _cm(cm) { } 1949 }; 1950 1951 void ConcurrentMark::checkpointRootsFinalWork() { 1952 ResourceMark rm; 1953 HandleMark hm; 1954 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1955 1956 g1h->ensure_parsability(false); 1957 1958 if (G1CollectedHeap::use_parallel_gc_threads()) { 1959 G1CollectedHeap::StrongRootsScope srs(g1h); 1960 // this is remark, so we'll use up all available threads 1961 int active_workers = ParallelGCThreads; 1962 set_phase(active_workers, false); 1963 1964 CMRemarkTask remarkTask(this); 1965 // We will start all available threads, even if we decide that the 1966 // active_workers will be fewer. The extra ones will just bail out 1967 // immediately. 1968 int n_workers = g1h->workers()->total_workers(); 1969 g1h->set_par_threads(n_workers); 1970 g1h->workers()->run_task(&remarkTask); 1971 g1h->set_par_threads(0); 1972 } else { 1973 G1CollectedHeap::StrongRootsScope srs(g1h); 1974 // this is remark, so we'll use up all available threads 1975 int active_workers = 1; 1976 set_phase(active_workers, false); 1977 1978 CMRemarkTask remarkTask(this); 1979 // We will start all available threads, even if we decide that the 1980 // active_workers will be fewer. The extra ones will just bail out 1981 // immediately. 1982 remarkTask.work(0); 1983 } 1984 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1985 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 1986 1987 print_stats(); 1988 1989 if (!restart_for_overflow()) 1990 set_non_marking_state(); 1991 1992 #if VERIFY_OBJS_PROCESSED 1993 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 1994 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 1995 _scan_obj_cl.objs_processed, 1996 ThreadLocalObjQueue::objs_enqueued); 1997 guarantee(_scan_obj_cl.objs_processed == 1998 ThreadLocalObjQueue::objs_enqueued, 1999 "Different number of objs processed and enqueued."); 2000 } 2001 #endif 2002 } 2003 2004 #ifndef PRODUCT 2005 2006 class PrintReachableOopClosure: public OopClosure { 2007 private: 2008 G1CollectedHeap* _g1h; 2009 CMBitMapRO* _bitmap; 2010 outputStream* _out; 2011 bool _use_prev_marking; 2012 bool _all; 2013 2014 public: 2015 PrintReachableOopClosure(CMBitMapRO* bitmap, 2016 outputStream* out, 2017 bool use_prev_marking, 2018 bool all) : 2019 _g1h(G1CollectedHeap::heap()), 2020 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { } 2021 2022 void do_oop(narrowOop* p) { do_oop_work(p); } 2023 void do_oop( oop* p) { do_oop_work(p); } 2024 2025 template <class T> void do_oop_work(T* p) { 2026 oop obj = oopDesc::load_decode_heap_oop(p); 2027 const char* str = NULL; 2028 const char* str2 = ""; 2029 2030 if (obj == NULL) { 2031 str = ""; 2032 } else if (!_g1h->is_in_g1_reserved(obj)) { 2033 str = " O"; 2034 } else { 2035 HeapRegion* hr = _g1h->heap_region_containing(obj); 2036 guarantee(hr != NULL, "invariant"); 2037 bool over_tams = false; 2038 if (_use_prev_marking) { 2039 over_tams = hr->obj_allocated_since_prev_marking(obj); 2040 } else { 2041 over_tams = hr->obj_allocated_since_next_marking(obj); 2042 } 2043 bool marked = _bitmap->isMarked((HeapWord*) obj); 2044 2045 if (over_tams) { 2046 str = " >"; 2047 if (marked) { 2048 str2 = " AND MARKED"; 2049 } 2050 } else if (marked) { 2051 str = " M"; 2052 } else { 2053 str = " NOT"; 2054 } 2055 } 2056 2057 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2058 p, (void*) obj, str, str2); 2059 } 2060 }; 2061 2062 class PrintReachableObjectClosure : public ObjectClosure { 2063 private: 2064 CMBitMapRO* _bitmap; 2065 outputStream* _out; 2066 bool _use_prev_marking; 2067 bool _all; 2068 HeapRegion* _hr; 2069 2070 public: 2071 PrintReachableObjectClosure(CMBitMapRO* bitmap, 2072 outputStream* out, 2073 bool use_prev_marking, 2074 bool all, 2075 HeapRegion* hr) : 2076 _bitmap(bitmap), _out(out), 2077 _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { } 2078 2079 void do_object(oop o) { 2080 bool over_tams; 2081 if (_use_prev_marking) { 2082 over_tams = _hr->obj_allocated_since_prev_marking(o); 2083 } else { 2084 over_tams = _hr->obj_allocated_since_next_marking(o); 2085 } 2086 bool marked = _bitmap->isMarked((HeapWord*) o); 2087 bool print_it = _all || over_tams || marked; 2088 2089 if (print_it) { 2090 _out->print_cr(" "PTR_FORMAT"%s", 2091 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2092 PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all); 2093 o->oop_iterate(&oopCl); 2094 } 2095 } 2096 }; 2097 2098 class PrintReachableRegionClosure : public HeapRegionClosure { 2099 private: 2100 CMBitMapRO* _bitmap; 2101 outputStream* _out; 2102 bool _use_prev_marking; 2103 bool _all; 2104 2105 public: 2106 bool doHeapRegion(HeapRegion* hr) { 2107 HeapWord* b = hr->bottom(); 2108 HeapWord* e = hr->end(); 2109 HeapWord* t = hr->top(); 2110 HeapWord* p = NULL; 2111 if (_use_prev_marking) { 2112 p = hr->prev_top_at_mark_start(); 2113 } else { 2114 p = hr->next_top_at_mark_start(); 2115 } 2116 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2117 "TAMS: "PTR_FORMAT, b, e, t, p); 2118 _out->cr(); 2119 2120 HeapWord* from = b; 2121 HeapWord* to = t; 2122 2123 if (to > from) { 2124 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2125 _out->cr(); 2126 PrintReachableObjectClosure ocl(_bitmap, _out, 2127 _use_prev_marking, _all, hr); 2128 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2129 _out->cr(); 2130 } 2131 2132 return false; 2133 } 2134 2135 PrintReachableRegionClosure(CMBitMapRO* bitmap, 2136 outputStream* out, 2137 bool use_prev_marking, 2138 bool all) : 2139 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { } 2140 }; 2141 2142 void ConcurrentMark::print_reachable(const char* str, 2143 bool use_prev_marking, 2144 bool all) { 2145 gclog_or_tty->cr(); 2146 gclog_or_tty->print_cr("== Doing heap dump... "); 2147 2148 if (G1PrintReachableBaseFile == NULL) { 2149 gclog_or_tty->print_cr(" #### error: no base file defined"); 2150 return; 2151 } 2152 2153 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2154 (JVM_MAXPATHLEN - 1)) { 2155 gclog_or_tty->print_cr(" #### error: file name too long"); 2156 return; 2157 } 2158 2159 char file_name[JVM_MAXPATHLEN]; 2160 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2161 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2162 2163 fileStream fout(file_name); 2164 if (!fout.is_open()) { 2165 gclog_or_tty->print_cr(" #### error: could not open file"); 2166 return; 2167 } 2168 2169 outputStream* out = &fout; 2170 2171 CMBitMapRO* bitmap = NULL; 2172 if (use_prev_marking) { 2173 bitmap = _prevMarkBitMap; 2174 } else { 2175 bitmap = _nextMarkBitMap; 2176 } 2177 2178 out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS"); 2179 out->cr(); 2180 2181 out->print_cr("--- ITERATING OVER REGIONS"); 2182 out->cr(); 2183 PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all); 2184 _g1h->heap_region_iterate(&rcl); 2185 out->cr(); 2186 2187 gclog_or_tty->print_cr(" done"); 2188 gclog_or_tty->flush(); 2189 } 2190 2191 #endif // PRODUCT 2192 2193 // This note is for drainAllSATBBuffers and the code in between. 2194 // In the future we could reuse a task to do this work during an 2195 // evacuation pause (since now tasks are not active and can be claimed 2196 // during an evacuation pause). This was a late change to the code and 2197 // is currently not being taken advantage of. 2198 2199 class CMGlobalObjectClosure : public ObjectClosure { 2200 private: 2201 ConcurrentMark* _cm; 2202 2203 public: 2204 void do_object(oop obj) { 2205 _cm->deal_with_reference(obj); 2206 } 2207 2208 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { } 2209 }; 2210 2211 void ConcurrentMark::deal_with_reference(oop obj) { 2212 if (verbose_high()) 2213 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT, 2214 (void*) obj); 2215 2216 2217 HeapWord* objAddr = (HeapWord*) obj; 2218 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error"); 2219 if (_g1h->is_in_g1_reserved(objAddr)) { 2220 assert(obj != NULL, "is_in_g1_reserved should ensure this"); 2221 HeapRegion* hr = _g1h->heap_region_containing(obj); 2222 if (_g1h->is_obj_ill(obj, hr)) { 2223 if (verbose_high()) 2224 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered " 2225 "marked", (void*) obj); 2226 2227 // we need to mark it first 2228 if (_nextMarkBitMap->parMark(objAddr)) { 2229 // No OrderAccess:store_load() is needed. It is implicit in the 2230 // CAS done in parMark(objAddr) above 2231 HeapWord* finger = _finger; 2232 if (objAddr < finger) { 2233 if (verbose_high()) 2234 gclog_or_tty->print_cr("[global] below the global finger " 2235 "("PTR_FORMAT"), pushing it", finger); 2236 if (!mark_stack_push(obj)) { 2237 if (verbose_low()) 2238 gclog_or_tty->print_cr("[global] global stack overflow during " 2239 "deal_with_reference"); 2240 } 2241 } 2242 } 2243 } 2244 } 2245 } 2246 2247 void ConcurrentMark::drainAllSATBBuffers() { 2248 CMGlobalObjectClosure oc(this); 2249 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2250 satb_mq_set.set_closure(&oc); 2251 2252 while (satb_mq_set.apply_closure_to_completed_buffer()) { 2253 if (verbose_medium()) 2254 gclog_or_tty->print_cr("[global] processed an SATB buffer"); 2255 } 2256 2257 // no need to check whether we should do this, as this is only 2258 // called during an evacuation pause 2259 satb_mq_set.iterate_closure_all_threads(); 2260 2261 satb_mq_set.set_closure(NULL); 2262 assert(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2263 } 2264 2265 void ConcurrentMark::markPrev(oop p) { 2266 // Note we are overriding the read-only view of the prev map here, via 2267 // the cast. 2268 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p); 2269 } 2270 2271 void ConcurrentMark::clear(oop p) { 2272 assert(p != NULL && p->is_oop(), "expected an oop"); 2273 HeapWord* addr = (HeapWord*)p; 2274 assert(addr >= _nextMarkBitMap->startWord() || 2275 addr < _nextMarkBitMap->endWord(), "in a region"); 2276 2277 _nextMarkBitMap->clear(addr); 2278 } 2279 2280 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) { 2281 // Note we are overriding the read-only view of the prev map here, via 2282 // the cast. 2283 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2284 _nextMarkBitMap->clearRange(mr); 2285 } 2286 2287 HeapRegion* 2288 ConcurrentMark::claim_region(int task_num) { 2289 // "checkpoint" the finger 2290 HeapWord* finger = _finger; 2291 2292 // _heap_end will not change underneath our feet; it only changes at 2293 // yield points. 2294 while (finger < _heap_end) { 2295 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2296 2297 // is the gap between reading the finger and doing the CAS too long? 2298 2299 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 2300 HeapWord* bottom = curr_region->bottom(); 2301 HeapWord* end = curr_region->end(); 2302 HeapWord* limit = curr_region->next_top_at_mark_start(); 2303 2304 if (verbose_low()) 2305 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 2306 "["PTR_FORMAT", "PTR_FORMAT"), " 2307 "limit = "PTR_FORMAT, 2308 task_num, curr_region, bottom, end, limit); 2309 2310 HeapWord* res = 2311 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2312 if (res == finger) { 2313 // we succeeded 2314 2315 // notice that _finger == end cannot be guaranteed here since, 2316 // someone else might have moved the finger even further 2317 assert(_finger >= end, "the finger should have moved forward"); 2318 2319 if (verbose_low()) 2320 gclog_or_tty->print_cr("[%d] we were successful with region = " 2321 PTR_FORMAT, task_num, curr_region); 2322 2323 if (limit > bottom) { 2324 if (verbose_low()) 2325 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 2326 "returning it ", task_num, curr_region); 2327 return curr_region; 2328 } else { 2329 assert(limit == bottom, 2330 "the region limit should be at bottom"); 2331 if (verbose_low()) 2332 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 2333 "returning NULL", task_num, curr_region); 2334 // we return NULL and the caller should try calling 2335 // claim_region() again. 2336 return NULL; 2337 } 2338 } else { 2339 assert(_finger > finger, "the finger should have moved forward"); 2340 if (verbose_low()) 2341 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 2342 "global finger = "PTR_FORMAT", " 2343 "our finger = "PTR_FORMAT, 2344 task_num, _finger, finger); 2345 2346 // read it again 2347 finger = _finger; 2348 } 2349 } 2350 2351 return NULL; 2352 } 2353 2354 bool ConcurrentMark::invalidate_aborted_regions_in_cset() { 2355 bool result = false; 2356 for (int i = 0; i < (int)_max_task_num; ++i) { 2357 CMTask* the_task = _tasks[i]; 2358 MemRegion mr = the_task->aborted_region(); 2359 if (mr.start() != NULL) { 2360 assert(mr.end() != NULL, "invariant"); 2361 assert(mr.word_size() > 0, "invariant"); 2362 HeapRegion* hr = _g1h->heap_region_containing(mr.start()); 2363 assert(hr != NULL, "invariant"); 2364 if (hr->in_collection_set()) { 2365 // The region points into the collection set 2366 the_task->set_aborted_region(MemRegion()); 2367 result = true; 2368 } 2369 } 2370 } 2371 return result; 2372 } 2373 2374 bool ConcurrentMark::has_aborted_regions() { 2375 for (int i = 0; i < (int)_max_task_num; ++i) { 2376 CMTask* the_task = _tasks[i]; 2377 MemRegion mr = the_task->aborted_region(); 2378 if (mr.start() != NULL) { 2379 assert(mr.end() != NULL, "invariant"); 2380 assert(mr.word_size() > 0, "invariant"); 2381 return true; 2382 } 2383 } 2384 return false; 2385 } 2386 2387 void ConcurrentMark::oops_do(OopClosure* cl) { 2388 if (_markStack.size() > 0 && verbose_low()) 2389 gclog_or_tty->print_cr("[global] scanning the global marking stack, " 2390 "size = %d", _markStack.size()); 2391 // we first iterate over the contents of the mark stack... 2392 _markStack.oops_do(cl); 2393 2394 for (int i = 0; i < (int)_max_task_num; ++i) { 2395 OopTaskQueue* queue = _task_queues->queue((int)i); 2396 2397 if (queue->size() > 0 && verbose_low()) 2398 gclog_or_tty->print_cr("[global] scanning task queue of task %d, " 2399 "size = %d", i, queue->size()); 2400 2401 // ...then over the contents of the all the task queues. 2402 queue->oops_do(cl); 2403 } 2404 2405 // Invalidate any entries, that are in the region stack, that 2406 // point into the collection set 2407 if (_regionStack.invalidate_entries_into_cset()) { 2408 // otherwise, any gray objects copied during the evacuation pause 2409 // might not be visited. 2410 assert(_should_gray_objects, "invariant"); 2411 } 2412 2413 // Invalidate any aborted regions, recorded in the individual CM 2414 // tasks, that point into the collection set. 2415 if (invalidate_aborted_regions_in_cset()) { 2416 // otherwise, any gray objects copied during the evacuation pause 2417 // might not be visited. 2418 assert(_should_gray_objects, "invariant"); 2419 } 2420 2421 } 2422 2423 void ConcurrentMark::clear_marking_state() { 2424 _markStack.setEmpty(); 2425 _markStack.clear_overflow(); 2426 _regionStack.setEmpty(); 2427 _regionStack.clear_overflow(); 2428 clear_has_overflown(); 2429 _finger = _heap_start; 2430 2431 for (int i = 0; i < (int)_max_task_num; ++i) { 2432 OopTaskQueue* queue = _task_queues->queue(i); 2433 queue->set_empty(); 2434 } 2435 } 2436 2437 void ConcurrentMark::print_stats() { 2438 if (verbose_stats()) { 2439 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2440 for (size_t i = 0; i < _active_tasks; ++i) { 2441 _tasks[i]->print_stats(); 2442 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2443 } 2444 } 2445 } 2446 2447 class CSMarkOopClosure: public OopClosure { 2448 friend class CSMarkBitMapClosure; 2449 2450 G1CollectedHeap* _g1h; 2451 CMBitMap* _bm; 2452 ConcurrentMark* _cm; 2453 oop* _ms; 2454 jint* _array_ind_stack; 2455 int _ms_size; 2456 int _ms_ind; 2457 int _array_increment; 2458 2459 bool push(oop obj, int arr_ind = 0) { 2460 if (_ms_ind == _ms_size) { 2461 gclog_or_tty->print_cr("Mark stack is full."); 2462 return false; 2463 } 2464 _ms[_ms_ind] = obj; 2465 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind; 2466 _ms_ind++; 2467 return true; 2468 } 2469 2470 oop pop() { 2471 if (_ms_ind == 0) return NULL; 2472 else { 2473 _ms_ind--; 2474 return _ms[_ms_ind]; 2475 } 2476 } 2477 2478 template <class T> bool drain() { 2479 while (_ms_ind > 0) { 2480 oop obj = pop(); 2481 assert(obj != NULL, "Since index was non-zero."); 2482 if (obj->is_objArray()) { 2483 jint arr_ind = _array_ind_stack[_ms_ind]; 2484 objArrayOop aobj = objArrayOop(obj); 2485 jint len = aobj->length(); 2486 jint next_arr_ind = arr_ind + _array_increment; 2487 if (next_arr_ind < len) { 2488 push(obj, next_arr_ind); 2489 } 2490 // Now process this portion of this one. 2491 int lim = MIN2(next_arr_ind, len); 2492 for (int j = arr_ind; j < lim; j++) { 2493 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j)); 2494 } 2495 2496 } else { 2497 obj->oop_iterate(this); 2498 } 2499 if (abort()) return false; 2500 } 2501 return true; 2502 } 2503 2504 public: 2505 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) : 2506 _g1h(G1CollectedHeap::heap()), 2507 _cm(cm), 2508 _bm(cm->nextMarkBitMap()), 2509 _ms_size(ms_size), _ms_ind(0), 2510 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)), 2511 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)), 2512 _array_increment(MAX2(ms_size/8, 16)) 2513 {} 2514 2515 ~CSMarkOopClosure() { 2516 FREE_C_HEAP_ARRAY(oop, _ms); 2517 FREE_C_HEAP_ARRAY(jint, _array_ind_stack); 2518 } 2519 2520 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2521 virtual void do_oop( oop* p) { do_oop_work(p); } 2522 2523 template <class T> void do_oop_work(T* p) { 2524 T heap_oop = oopDesc::load_heap_oop(p); 2525 if (oopDesc::is_null(heap_oop)) return; 2526 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); 2527 if (obj->is_forwarded()) { 2528 // If the object has already been forwarded, we have to make sure 2529 // that it's marked. So follow the forwarding pointer. Note that 2530 // this does the right thing for self-forwarding pointers in the 2531 // evacuation failure case. 2532 obj = obj->forwardee(); 2533 } 2534 HeapRegion* hr = _g1h->heap_region_containing(obj); 2535 if (hr != NULL) { 2536 if (hr->in_collection_set()) { 2537 if (_g1h->is_obj_ill(obj)) { 2538 _bm->mark((HeapWord*)obj); 2539 if (!push(obj)) { 2540 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed."); 2541 set_abort(); 2542 } 2543 } 2544 } else { 2545 // Outside the collection set; we need to gray it 2546 _cm->deal_with_reference(obj); 2547 } 2548 } 2549 } 2550 }; 2551 2552 class CSMarkBitMapClosure: public BitMapClosure { 2553 G1CollectedHeap* _g1h; 2554 CMBitMap* _bitMap; 2555 ConcurrentMark* _cm; 2556 CSMarkOopClosure _oop_cl; 2557 public: 2558 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) : 2559 _g1h(G1CollectedHeap::heap()), 2560 _bitMap(cm->nextMarkBitMap()), 2561 _oop_cl(cm, ms_size) 2562 {} 2563 2564 ~CSMarkBitMapClosure() {} 2565 2566 bool do_bit(size_t offset) { 2567 // convert offset into a HeapWord* 2568 HeapWord* addr = _bitMap->offsetToHeapWord(offset); 2569 assert(_bitMap->endWord() && addr < _bitMap->endWord(), 2570 "address out of range"); 2571 assert(_bitMap->isMarked(addr), "tautology"); 2572 oop obj = oop(addr); 2573 if (!obj->is_forwarded()) { 2574 if (!_oop_cl.push(obj)) return false; 2575 if (UseCompressedOops) { 2576 if (!_oop_cl.drain<narrowOop>()) return false; 2577 } else { 2578 if (!_oop_cl.drain<oop>()) return false; 2579 } 2580 } 2581 // Otherwise... 2582 return true; 2583 } 2584 }; 2585 2586 2587 class CompleteMarkingInCSHRClosure: public HeapRegionClosure { 2588 CMBitMap* _bm; 2589 CSMarkBitMapClosure _bit_cl; 2590 enum SomePrivateConstants { 2591 MSSize = 1000 2592 }; 2593 bool _completed; 2594 public: 2595 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) : 2596 _bm(cm->nextMarkBitMap()), 2597 _bit_cl(cm, MSSize), 2598 _completed(true) 2599 {} 2600 2601 ~CompleteMarkingInCSHRClosure() {} 2602 2603 bool doHeapRegion(HeapRegion* r) { 2604 if (!r->evacuation_failed()) { 2605 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start()); 2606 if (!mr.is_empty()) { 2607 if (!_bm->iterate(&_bit_cl, mr)) { 2608 _completed = false; 2609 return true; 2610 } 2611 } 2612 } 2613 return false; 2614 } 2615 2616 bool completed() { return _completed; } 2617 }; 2618 2619 class ClearMarksInHRClosure: public HeapRegionClosure { 2620 CMBitMap* _bm; 2621 public: 2622 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { } 2623 2624 bool doHeapRegion(HeapRegion* r) { 2625 if (!r->used_region().is_empty() && !r->evacuation_failed()) { 2626 MemRegion usedMR = r->used_region(); 2627 _bm->clearRange(r->used_region()); 2628 } 2629 return false; 2630 } 2631 }; 2632 2633 void ConcurrentMark::complete_marking_in_collection_set() { 2634 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2635 2636 if (!g1h->mark_in_progress()) { 2637 g1h->g1_policy()->record_mark_closure_time(0.0); 2638 return; 2639 } 2640 2641 int i = 1; 2642 double start = os::elapsedTime(); 2643 while (true) { 2644 i++; 2645 CompleteMarkingInCSHRClosure cmplt(this); 2646 g1h->collection_set_iterate(&cmplt); 2647 if (cmplt.completed()) break; 2648 } 2649 double end_time = os::elapsedTime(); 2650 double elapsed_time_ms = (end_time - start) * 1000.0; 2651 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms); 2652 2653 ClearMarksInHRClosure clr(nextMarkBitMap()); 2654 g1h->collection_set_iterate(&clr); 2655 } 2656 2657 // The next two methods deal with the following optimisation. Some 2658 // objects are gray by being marked and located above the finger. If 2659 // they are copied, during an evacuation pause, below the finger then 2660 // the need to be pushed on the stack. The observation is that, if 2661 // there are no regions in the collection set located above the 2662 // finger, then the above cannot happen, hence we do not need to 2663 // explicitly gray any objects when copying them to below the 2664 // finger. The global stack will be scanned to ensure that, if it 2665 // points to objects being copied, it will update their 2666 // location. There is a tricky situation with the gray objects in 2667 // region stack that are being coped, however. See the comment in 2668 // newCSet(). 2669 2670 void ConcurrentMark::newCSet() { 2671 if (!concurrent_marking_in_progress()) 2672 // nothing to do if marking is not in progress 2673 return; 2674 2675 // find what the lowest finger is among the global and local fingers 2676 _min_finger = _finger; 2677 for (int i = 0; i < (int)_max_task_num; ++i) { 2678 CMTask* task = _tasks[i]; 2679 HeapWord* task_finger = task->finger(); 2680 if (task_finger != NULL && task_finger < _min_finger) 2681 _min_finger = task_finger; 2682 } 2683 2684 _should_gray_objects = false; 2685 2686 // This fixes a very subtle and fustrating bug. It might be the case 2687 // that, during en evacuation pause, heap regions that contain 2688 // objects that are gray (by being in regions contained in the 2689 // region stack) are included in the collection set. Since such gray 2690 // objects will be moved, and because it's not easy to redirect 2691 // region stack entries to point to a new location (because objects 2692 // in one region might be scattered to multiple regions after they 2693 // are copied), one option is to ensure that all marked objects 2694 // copied during a pause are pushed on the stack. Notice, however, 2695 // that this problem can only happen when the region stack is not 2696 // empty during an evacuation pause. So, we make the fix a bit less 2697 // conservative and ensure that regions are pushed on the stack, 2698 // irrespective whether all collection set regions are below the 2699 // finger, if the region stack is not empty. This is expected to be 2700 // a rare case, so I don't think it's necessary to be smarted about it. 2701 if (!region_stack_empty() || has_aborted_regions()) 2702 _should_gray_objects = true; 2703 } 2704 2705 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) { 2706 if (!concurrent_marking_in_progress()) 2707 return; 2708 2709 HeapWord* region_end = hr->end(); 2710 if (region_end > _min_finger) 2711 _should_gray_objects = true; 2712 } 2713 2714 // abandon current marking iteration due to a Full GC 2715 void ConcurrentMark::abort() { 2716 // Clear all marks to force marking thread to do nothing 2717 _nextMarkBitMap->clearAll(); 2718 // Empty mark stack 2719 clear_marking_state(); 2720 for (int i = 0; i < (int)_max_task_num; ++i) { 2721 _tasks[i]->clear_region_fields(); 2722 _tasks[i]->clear_aborted_region(); 2723 } 2724 _has_aborted = true; 2725 2726 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2727 satb_mq_set.abandon_partial_marking(); 2728 // This can be called either during or outside marking, we'll read 2729 // the expected_active value from the SATB queue set. 2730 satb_mq_set.set_active_all_threads( 2731 false, /* new active value */ 2732 satb_mq_set.is_active() /* expected_active */); 2733 } 2734 2735 static void print_ms_time_info(const char* prefix, const char* name, 2736 NumberSeq& ns) { 2737 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2738 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2739 if (ns.num() > 0) { 2740 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2741 prefix, ns.sd(), ns.maximum()); 2742 } 2743 } 2744 2745 void ConcurrentMark::print_summary_info() { 2746 gclog_or_tty->print_cr(" Concurrent marking:"); 2747 print_ms_time_info(" ", "init marks", _init_times); 2748 print_ms_time_info(" ", "remarks", _remark_times); 2749 { 2750 print_ms_time_info(" ", "final marks", _remark_mark_times); 2751 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2752 2753 } 2754 print_ms_time_info(" ", "cleanups", _cleanup_times); 2755 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 2756 _total_counting_time, 2757 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 2758 (double)_cleanup_times.num() 2759 : 0.0)); 2760 if (G1ScrubRemSets) { 2761 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2762 _total_rs_scrub_time, 2763 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 2764 (double)_cleanup_times.num() 2765 : 0.0)); 2766 } 2767 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 2768 (_init_times.sum() + _remark_times.sum() + 2769 _cleanup_times.sum())/1000.0); 2770 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 2771 "(%8.2f s marking, %8.2f s counting).", 2772 cmThread()->vtime_accum(), 2773 cmThread()->vtime_mark_accum(), 2774 cmThread()->vtime_count_accum()); 2775 } 2776 2777 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2778 _parallel_workers->print_worker_threads_on(st); 2779 } 2780 2781 // Closures 2782 // XXX: there seems to be a lot of code duplication here; 2783 // should refactor and consolidate the shared code. 2784 2785 // This closure is used to mark refs into the CMS generation in 2786 // the CMS bit map. Called at the first checkpoint. 2787 2788 // We take a break if someone is trying to stop the world. 2789 bool ConcurrentMark::do_yield_check(int worker_i) { 2790 if (should_yield()) { 2791 if (worker_i == 0) 2792 _g1h->g1_policy()->record_concurrent_pause(); 2793 cmThread()->yield(); 2794 if (worker_i == 0) 2795 _g1h->g1_policy()->record_concurrent_pause_end(); 2796 return true; 2797 } else { 2798 return false; 2799 } 2800 } 2801 2802 bool ConcurrentMark::should_yield() { 2803 return cmThread()->should_yield(); 2804 } 2805 2806 bool ConcurrentMark::containing_card_is_marked(void* p) { 2807 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 2808 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 2809 } 2810 2811 bool ConcurrentMark::containing_cards_are_marked(void* start, 2812 void* last) { 2813 return 2814 containing_card_is_marked(start) && 2815 containing_card_is_marked(last); 2816 } 2817 2818 #ifndef PRODUCT 2819 // for debugging purposes 2820 void ConcurrentMark::print_finger() { 2821 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 2822 _heap_start, _heap_end, _finger); 2823 for (int i = 0; i < (int) _max_task_num; ++i) { 2824 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 2825 } 2826 gclog_or_tty->print_cr(""); 2827 } 2828 #endif 2829 2830 // Closure for iteration over bitmaps 2831 class CMBitMapClosure : public BitMapClosure { 2832 private: 2833 // the bitmap that is being iterated over 2834 CMBitMap* _nextMarkBitMap; 2835 ConcurrentMark* _cm; 2836 CMTask* _task; 2837 // true if we're scanning a heap region claimed by the task (so that 2838 // we move the finger along), false if we're not, i.e. currently when 2839 // scanning a heap region popped from the region stack (so that we 2840 // do not move the task finger along; it'd be a mistake if we did so). 2841 bool _scanning_heap_region; 2842 2843 public: 2844 CMBitMapClosure(CMTask *task, 2845 ConcurrentMark* cm, 2846 CMBitMap* nextMarkBitMap) 2847 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 2848 2849 void set_scanning_heap_region(bool scanning_heap_region) { 2850 _scanning_heap_region = scanning_heap_region; 2851 } 2852 2853 bool do_bit(size_t offset) { 2854 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 2855 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 2856 assert( addr < _cm->finger(), "invariant"); 2857 2858 if (_scanning_heap_region) { 2859 statsOnly( _task->increase_objs_found_on_bitmap() ); 2860 assert(addr >= _task->finger(), "invariant"); 2861 // We move that task's local finger along. 2862 _task->move_finger_to(addr); 2863 } else { 2864 // We move the task's region finger along. 2865 _task->move_region_finger_to(addr); 2866 } 2867 2868 _task->scan_object(oop(addr)); 2869 // we only partially drain the local queue and global stack 2870 _task->drain_local_queue(true); 2871 _task->drain_global_stack(true); 2872 2873 // if the has_aborted flag has been raised, we need to bail out of 2874 // the iteration 2875 return !_task->has_aborted(); 2876 } 2877 }; 2878 2879 // Closure for iterating over objects, currently only used for 2880 // processing SATB buffers. 2881 class CMObjectClosure : public ObjectClosure { 2882 private: 2883 CMTask* _task; 2884 2885 public: 2886 void do_object(oop obj) { 2887 _task->deal_with_reference(obj); 2888 } 2889 2890 CMObjectClosure(CMTask* task) : _task(task) { } 2891 }; 2892 2893 // Closure for iterating over object fields 2894 class CMOopClosure : public OopClosure { 2895 private: 2896 G1CollectedHeap* _g1h; 2897 ConcurrentMark* _cm; 2898 CMTask* _task; 2899 2900 public: 2901 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2902 virtual void do_oop( oop* p) { do_oop_work(p); } 2903 2904 template <class T> void do_oop_work(T* p) { 2905 assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant"); 2906 assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(), 2907 "invariant"); 2908 2909 oop obj = oopDesc::load_decode_heap_oop(p); 2910 if (_cm->verbose_high()) 2911 gclog_or_tty->print_cr("[%d] we're looking at location " 2912 "*"PTR_FORMAT" = "PTR_FORMAT, 2913 _task->task_id(), p, (void*) obj); 2914 _task->deal_with_reference(obj); 2915 } 2916 2917 CMOopClosure(G1CollectedHeap* g1h, 2918 ConcurrentMark* cm, 2919 CMTask* task) 2920 : _g1h(g1h), _cm(cm), _task(task) { } 2921 }; 2922 2923 void CMTask::setup_for_region(HeapRegion* hr) { 2924 // Separated the asserts so that we know which one fires. 2925 assert(hr != NULL, 2926 "claim_region() should have filtered out continues humongous regions"); 2927 assert(!hr->continuesHumongous(), 2928 "claim_region() should have filtered out continues humongous regions"); 2929 2930 if (_cm->verbose_low()) 2931 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 2932 _task_id, hr); 2933 2934 _curr_region = hr; 2935 _finger = hr->bottom(); 2936 update_region_limit(); 2937 } 2938 2939 void CMTask::update_region_limit() { 2940 HeapRegion* hr = _curr_region; 2941 HeapWord* bottom = hr->bottom(); 2942 HeapWord* limit = hr->next_top_at_mark_start(); 2943 2944 if (limit == bottom) { 2945 if (_cm->verbose_low()) 2946 gclog_or_tty->print_cr("[%d] found an empty region " 2947 "["PTR_FORMAT", "PTR_FORMAT")", 2948 _task_id, bottom, limit); 2949 // The region was collected underneath our feet. 2950 // We set the finger to bottom to ensure that the bitmap 2951 // iteration that will follow this will not do anything. 2952 // (this is not a condition that holds when we set the region up, 2953 // as the region is not supposed to be empty in the first place) 2954 _finger = bottom; 2955 } else if (limit >= _region_limit) { 2956 assert(limit >= _finger, "peace of mind"); 2957 } else { 2958 assert(limit < _region_limit, "only way to get here"); 2959 // This can happen under some pretty unusual circumstances. An 2960 // evacuation pause empties the region underneath our feet (NTAMS 2961 // at bottom). We then do some allocation in the region (NTAMS 2962 // stays at bottom), followed by the region being used as a GC 2963 // alloc region (NTAMS will move to top() and the objects 2964 // originally below it will be grayed). All objects now marked in 2965 // the region are explicitly grayed, if below the global finger, 2966 // and we do not need in fact to scan anything else. So, we simply 2967 // set _finger to be limit to ensure that the bitmap iteration 2968 // doesn't do anything. 2969 _finger = limit; 2970 } 2971 2972 _region_limit = limit; 2973 } 2974 2975 void CMTask::giveup_current_region() { 2976 assert(_curr_region != NULL, "invariant"); 2977 if (_cm->verbose_low()) 2978 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 2979 _task_id, _curr_region); 2980 clear_region_fields(); 2981 } 2982 2983 void CMTask::clear_region_fields() { 2984 // Values for these three fields that indicate that we're not 2985 // holding on to a region. 2986 _curr_region = NULL; 2987 _finger = NULL; 2988 _region_limit = NULL; 2989 2990 _region_finger = NULL; 2991 } 2992 2993 void CMTask::reset(CMBitMap* nextMarkBitMap) { 2994 guarantee(nextMarkBitMap != NULL, "invariant"); 2995 2996 if (_cm->verbose_low()) 2997 gclog_or_tty->print_cr("[%d] resetting", _task_id); 2998 2999 _nextMarkBitMap = nextMarkBitMap; 3000 clear_region_fields(); 3001 clear_aborted_region(); 3002 3003 _calls = 0; 3004 _elapsed_time_ms = 0.0; 3005 _termination_time_ms = 0.0; 3006 _termination_start_time_ms = 0.0; 3007 3008 #if _MARKING_STATS_ 3009 _local_pushes = 0; 3010 _local_pops = 0; 3011 _local_max_size = 0; 3012 _objs_scanned = 0; 3013 _global_pushes = 0; 3014 _global_pops = 0; 3015 _global_max_size = 0; 3016 _global_transfers_to = 0; 3017 _global_transfers_from = 0; 3018 _region_stack_pops = 0; 3019 _regions_claimed = 0; 3020 _objs_found_on_bitmap = 0; 3021 _satb_buffers_processed = 0; 3022 _steal_attempts = 0; 3023 _steals = 0; 3024 _aborted = 0; 3025 _aborted_overflow = 0; 3026 _aborted_cm_aborted = 0; 3027 _aborted_yield = 0; 3028 _aborted_timed_out = 0; 3029 _aborted_satb = 0; 3030 _aborted_termination = 0; 3031 #endif // _MARKING_STATS_ 3032 } 3033 3034 bool CMTask::should_exit_termination() { 3035 regular_clock_call(); 3036 // This is called when we are in the termination protocol. We should 3037 // quit if, for some reason, this task wants to abort or the global 3038 // stack is not empty (this means that we can get work from it). 3039 return !_cm->mark_stack_empty() || has_aborted(); 3040 } 3041 3042 // This determines whether the method below will check both the local 3043 // and global fingers when determining whether to push on the stack a 3044 // gray object (value 1) or whether it will only check the global one 3045 // (value 0). The tradeoffs are that the former will be a bit more 3046 // accurate and possibly push less on the stack, but it might also be 3047 // a little bit slower. 3048 3049 #define _CHECK_BOTH_FINGERS_ 1 3050 3051 void CMTask::deal_with_reference(oop obj) { 3052 if (_cm->verbose_high()) 3053 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT, 3054 _task_id, (void*) obj); 3055 3056 ++_refs_reached; 3057 3058 HeapWord* objAddr = (HeapWord*) obj; 3059 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error"); 3060 if (_g1h->is_in_g1_reserved(objAddr)) { 3061 assert(obj != NULL, "is_in_g1_reserved should ensure this"); 3062 HeapRegion* hr = _g1h->heap_region_containing(obj); 3063 if (_g1h->is_obj_ill(obj, hr)) { 3064 if (_cm->verbose_high()) 3065 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked", 3066 _task_id, (void*) obj); 3067 3068 // we need to mark it first 3069 if (_nextMarkBitMap->parMark(objAddr)) { 3070 // No OrderAccess:store_load() is needed. It is implicit in the 3071 // CAS done in parMark(objAddr) above 3072 HeapWord* global_finger = _cm->finger(); 3073 3074 #if _CHECK_BOTH_FINGERS_ 3075 // we will check both the local and global fingers 3076 3077 if (_finger != NULL && objAddr < _finger) { 3078 if (_cm->verbose_high()) 3079 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), " 3080 "pushing it", _task_id, _finger); 3081 push(obj); 3082 } else if (_curr_region != NULL && objAddr < _region_limit) { 3083 // do nothing 3084 } else if (objAddr < global_finger) { 3085 // Notice that the global finger might be moving forward 3086 // concurrently. This is not a problem. In the worst case, we 3087 // mark the object while it is above the global finger and, by 3088 // the time we read the global finger, it has moved forward 3089 // passed this object. In this case, the object will probably 3090 // be visited when a task is scanning the region and will also 3091 // be pushed on the stack. So, some duplicate work, but no 3092 // correctness problems. 3093 3094 if (_cm->verbose_high()) 3095 gclog_or_tty->print_cr("[%d] below the global finger " 3096 "("PTR_FORMAT"), pushing it", 3097 _task_id, global_finger); 3098 push(obj); 3099 } else { 3100 // do nothing 3101 } 3102 #else // _CHECK_BOTH_FINGERS_ 3103 // we will only check the global finger 3104 3105 if (objAddr < global_finger) { 3106 // see long comment above 3107 3108 if (_cm->verbose_high()) 3109 gclog_or_tty->print_cr("[%d] below the global finger " 3110 "("PTR_FORMAT"), pushing it", 3111 _task_id, global_finger); 3112 push(obj); 3113 } 3114 #endif // _CHECK_BOTH_FINGERS_ 3115 } 3116 } 3117 } 3118 } 3119 3120 void CMTask::push(oop obj) { 3121 HeapWord* objAddr = (HeapWord*) obj; 3122 assert(_g1h->is_in_g1_reserved(objAddr), "invariant"); 3123 assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(), 3124 "invariant"); 3125 assert(!_g1h->is_obj_ill(obj), "invariant"); 3126 assert(_nextMarkBitMap->isMarked(objAddr), "invariant"); 3127 3128 if (_cm->verbose_high()) 3129 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj); 3130 3131 if (!_task_queue->push(obj)) { 3132 // The local task queue looks full. We need to push some entries 3133 // to the global stack. 3134 3135 if (_cm->verbose_medium()) 3136 gclog_or_tty->print_cr("[%d] task queue overflow, " 3137 "moving entries to the global stack", 3138 _task_id); 3139 move_entries_to_global_stack(); 3140 3141 // this should succeed since, even if we overflow the global 3142 // stack, we should have definitely removed some entries from the 3143 // local queue. So, there must be space on it. 3144 bool success = _task_queue->push(obj); 3145 assert(success, "invariant"); 3146 } 3147 3148 statsOnly( int tmp_size = _task_queue->size(); 3149 if (tmp_size > _local_max_size) 3150 _local_max_size = tmp_size; 3151 ++_local_pushes ); 3152 } 3153 3154 void CMTask::reached_limit() { 3155 assert(_words_scanned >= _words_scanned_limit || 3156 _refs_reached >= _refs_reached_limit , 3157 "shouldn't have been called otherwise"); 3158 regular_clock_call(); 3159 } 3160 3161 void CMTask::regular_clock_call() { 3162 if (has_aborted()) 3163 return; 3164 3165 // First, we need to recalculate the words scanned and refs reached 3166 // limits for the next clock call. 3167 recalculate_limits(); 3168 3169 // During the regular clock call we do the following 3170 3171 // (1) If an overflow has been flagged, then we abort. 3172 if (_cm->has_overflown()) { 3173 set_has_aborted(); 3174 return; 3175 } 3176 3177 // If we are not concurrent (i.e. we're doing remark) we don't need 3178 // to check anything else. The other steps are only needed during 3179 // the concurrent marking phase. 3180 if (!concurrent()) 3181 return; 3182 3183 // (2) If marking has been aborted for Full GC, then we also abort. 3184 if (_cm->has_aborted()) { 3185 set_has_aborted(); 3186 statsOnly( ++_aborted_cm_aborted ); 3187 return; 3188 } 3189 3190 double curr_time_ms = os::elapsedVTime() * 1000.0; 3191 3192 // (3) If marking stats are enabled, then we update the step history. 3193 #if _MARKING_STATS_ 3194 if (_words_scanned >= _words_scanned_limit) 3195 ++_clock_due_to_scanning; 3196 if (_refs_reached >= _refs_reached_limit) 3197 ++_clock_due_to_marking; 3198 3199 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3200 _interval_start_time_ms = curr_time_ms; 3201 _all_clock_intervals_ms.add(last_interval_ms); 3202 3203 if (_cm->verbose_medium()) { 3204 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 3205 "scanned = %d%s, refs reached = %d%s", 3206 _task_id, last_interval_ms, 3207 _words_scanned, 3208 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3209 _refs_reached, 3210 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3211 } 3212 #endif // _MARKING_STATS_ 3213 3214 // (4) We check whether we should yield. If we have to, then we abort. 3215 if (_cm->should_yield()) { 3216 // We should yield. To do this we abort the task. The caller is 3217 // responsible for yielding. 3218 set_has_aborted(); 3219 statsOnly( ++_aborted_yield ); 3220 return; 3221 } 3222 3223 // (5) We check whether we've reached our time quota. If we have, 3224 // then we abort. 3225 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3226 if (elapsed_time_ms > _time_target_ms) { 3227 set_has_aborted(); 3228 _has_aborted_timed_out = true; 3229 statsOnly( ++_aborted_timed_out ); 3230 return; 3231 } 3232 3233 // (6) Finally, we check whether there are enough completed STAB 3234 // buffers available for processing. If there are, we abort. 3235 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3236 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3237 if (_cm->verbose_low()) 3238 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 3239 _task_id); 3240 // we do need to process SATB buffers, we'll abort and restart 3241 // the marking task to do so 3242 set_has_aborted(); 3243 statsOnly( ++_aborted_satb ); 3244 return; 3245 } 3246 } 3247 3248 void CMTask::recalculate_limits() { 3249 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3250 _words_scanned_limit = _real_words_scanned_limit; 3251 3252 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3253 _refs_reached_limit = _real_refs_reached_limit; 3254 } 3255 3256 void CMTask::decrease_limits() { 3257 // This is called when we believe that we're going to do an infrequent 3258 // operation which will increase the per byte scanned cost (i.e. move 3259 // entries to/from the global stack). It basically tries to decrease the 3260 // scanning limit so that the clock is called earlier. 3261 3262 if (_cm->verbose_medium()) 3263 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 3264 3265 _words_scanned_limit = _real_words_scanned_limit - 3266 3 * words_scanned_period / 4; 3267 _refs_reached_limit = _real_refs_reached_limit - 3268 3 * refs_reached_period / 4; 3269 } 3270 3271 void CMTask::move_entries_to_global_stack() { 3272 // local array where we'll store the entries that will be popped 3273 // from the local queue 3274 oop buffer[global_stack_transfer_size]; 3275 3276 int n = 0; 3277 oop obj; 3278 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3279 buffer[n] = obj; 3280 ++n; 3281 } 3282 3283 if (n > 0) { 3284 // we popped at least one entry from the local queue 3285 3286 statsOnly( ++_global_transfers_to; _local_pops += n ); 3287 3288 if (!_cm->mark_stack_push(buffer, n)) { 3289 if (_cm->verbose_low()) 3290 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id); 3291 set_has_aborted(); 3292 } else { 3293 // the transfer was successful 3294 3295 if (_cm->verbose_medium()) 3296 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 3297 _task_id, n); 3298 statsOnly( int tmp_size = _cm->mark_stack_size(); 3299 if (tmp_size > _global_max_size) 3300 _global_max_size = tmp_size; 3301 _global_pushes += n ); 3302 } 3303 } 3304 3305 // this operation was quite expensive, so decrease the limits 3306 decrease_limits(); 3307 } 3308 3309 void CMTask::get_entries_from_global_stack() { 3310 // local array where we'll store the entries that will be popped 3311 // from the global stack. 3312 oop buffer[global_stack_transfer_size]; 3313 int n; 3314 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3315 assert(n <= global_stack_transfer_size, 3316 "we should not pop more than the given limit"); 3317 if (n > 0) { 3318 // yes, we did actually pop at least one entry 3319 3320 statsOnly( ++_global_transfers_from; _global_pops += n ); 3321 if (_cm->verbose_medium()) 3322 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 3323 _task_id, n); 3324 for (int i = 0; i < n; ++i) { 3325 bool success = _task_queue->push(buffer[i]); 3326 // We only call this when the local queue is empty or under a 3327 // given target limit. So, we do not expect this push to fail. 3328 assert(success, "invariant"); 3329 } 3330 3331 statsOnly( int tmp_size = _task_queue->size(); 3332 if (tmp_size > _local_max_size) 3333 _local_max_size = tmp_size; 3334 _local_pushes += n ); 3335 } 3336 3337 // this operation was quite expensive, so decrease the limits 3338 decrease_limits(); 3339 } 3340 3341 void CMTask::drain_local_queue(bool partially) { 3342 if (has_aborted()) 3343 return; 3344 3345 // Decide what the target size is, depending whether we're going to 3346 // drain it partially (so that other tasks can steal if they run out 3347 // of things to do) or totally (at the very end). 3348 size_t target_size; 3349 if (partially) 3350 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3351 else 3352 target_size = 0; 3353 3354 if (_task_queue->size() > target_size) { 3355 if (_cm->verbose_high()) 3356 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 3357 _task_id, target_size); 3358 3359 oop obj; 3360 bool ret = _task_queue->pop_local(obj); 3361 while (ret) { 3362 statsOnly( ++_local_pops ); 3363 3364 if (_cm->verbose_high()) 3365 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 3366 (void*) obj); 3367 3368 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3369 assert(!_g1h->heap_region_containing(obj)->is_on_free_list(), 3370 "invariant"); 3371 3372 scan_object(obj); 3373 3374 if (_task_queue->size() <= target_size || has_aborted()) 3375 ret = false; 3376 else 3377 ret = _task_queue->pop_local(obj); 3378 } 3379 3380 if (_cm->verbose_high()) 3381 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 3382 _task_id, _task_queue->size()); 3383 } 3384 } 3385 3386 void CMTask::drain_global_stack(bool partially) { 3387 if (has_aborted()) 3388 return; 3389 3390 // We have a policy to drain the local queue before we attempt to 3391 // drain the global stack. 3392 assert(partially || _task_queue->size() == 0, "invariant"); 3393 3394 // Decide what the target size is, depending whether we're going to 3395 // drain it partially (so that other tasks can steal if they run out 3396 // of things to do) or totally (at the very end). Notice that, 3397 // because we move entries from the global stack in chunks or 3398 // because another task might be doing the same, we might in fact 3399 // drop below the target. But, this is not a problem. 3400 size_t target_size; 3401 if (partially) 3402 target_size = _cm->partial_mark_stack_size_target(); 3403 else 3404 target_size = 0; 3405 3406 if (_cm->mark_stack_size() > target_size) { 3407 if (_cm->verbose_low()) 3408 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 3409 _task_id, target_size); 3410 3411 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3412 get_entries_from_global_stack(); 3413 drain_local_queue(partially); 3414 } 3415 3416 if (_cm->verbose_low()) 3417 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 3418 _task_id, _cm->mark_stack_size()); 3419 } 3420 } 3421 3422 // SATB Queue has several assumptions on whether to call the par or 3423 // non-par versions of the methods. this is why some of the code is 3424 // replicated. We should really get rid of the single-threaded version 3425 // of the code to simplify things. 3426 void CMTask::drain_satb_buffers() { 3427 if (has_aborted()) 3428 return; 3429 3430 // We set this so that the regular clock knows that we're in the 3431 // middle of draining buffers and doesn't set the abort flag when it 3432 // notices that SATB buffers are available for draining. It'd be 3433 // very counter productive if it did that. :-) 3434 _draining_satb_buffers = true; 3435 3436 CMObjectClosure oc(this); 3437 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3438 if (G1CollectedHeap::use_parallel_gc_threads()) 3439 satb_mq_set.set_par_closure(_task_id, &oc); 3440 else 3441 satb_mq_set.set_closure(&oc); 3442 3443 // This keeps claiming and applying the closure to completed buffers 3444 // until we run out of buffers or we need to abort. 3445 if (G1CollectedHeap::use_parallel_gc_threads()) { 3446 while (!has_aborted() && 3447 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 3448 if (_cm->verbose_medium()) 3449 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3450 statsOnly( ++_satb_buffers_processed ); 3451 regular_clock_call(); 3452 } 3453 } else { 3454 while (!has_aborted() && 3455 satb_mq_set.apply_closure_to_completed_buffer()) { 3456 if (_cm->verbose_medium()) 3457 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3458 statsOnly( ++_satb_buffers_processed ); 3459 regular_clock_call(); 3460 } 3461 } 3462 3463 if (!concurrent() && !has_aborted()) { 3464 // We should only do this during remark. 3465 if (G1CollectedHeap::use_parallel_gc_threads()) 3466 satb_mq_set.par_iterate_closure_all_threads(_task_id); 3467 else 3468 satb_mq_set.iterate_closure_all_threads(); 3469 } 3470 3471 _draining_satb_buffers = false; 3472 3473 assert(has_aborted() || 3474 concurrent() || 3475 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3476 3477 if (G1CollectedHeap::use_parallel_gc_threads()) 3478 satb_mq_set.set_par_closure(_task_id, NULL); 3479 else 3480 satb_mq_set.set_closure(NULL); 3481 3482 // again, this was a potentially expensive operation, decrease the 3483 // limits to get the regular clock call early 3484 decrease_limits(); 3485 } 3486 3487 void CMTask::drain_region_stack(BitMapClosure* bc) { 3488 if (has_aborted()) 3489 return; 3490 3491 assert(_region_finger == NULL, 3492 "it should be NULL when we're not scanning a region"); 3493 3494 if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) { 3495 if (_cm->verbose_low()) 3496 gclog_or_tty->print_cr("[%d] draining region stack, size = %d", 3497 _task_id, _cm->region_stack_size()); 3498 3499 MemRegion mr; 3500 3501 if (!_aborted_region.is_empty()) { 3502 mr = _aborted_region; 3503 _aborted_region = MemRegion(); 3504 3505 if (_cm->verbose_low()) 3506 gclog_or_tty->print_cr("[%d] scanning aborted region [ " PTR_FORMAT ", " PTR_FORMAT " )", 3507 _task_id, mr.start(), mr.end()); 3508 } else { 3509 mr = _cm->region_stack_pop_lock_free(); 3510 // it returns MemRegion() if the pop fails 3511 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 3512 } 3513 3514 while (mr.start() != NULL) { 3515 if (_cm->verbose_medium()) 3516 gclog_or_tty->print_cr("[%d] we are scanning region " 3517 "["PTR_FORMAT", "PTR_FORMAT")", 3518 _task_id, mr.start(), mr.end()); 3519 3520 assert(mr.end() <= _cm->finger(), 3521 "otherwise the region shouldn't be on the stack"); 3522 assert(!mr.is_empty(), "Only non-empty regions live on the region stack"); 3523 if (_nextMarkBitMap->iterate(bc, mr)) { 3524 assert(!has_aborted(), 3525 "cannot abort the task without aborting the bitmap iteration"); 3526 3527 // We finished iterating over the region without aborting. 3528 regular_clock_call(); 3529 if (has_aborted()) 3530 mr = MemRegion(); 3531 else { 3532 mr = _cm->region_stack_pop_lock_free(); 3533 // it returns MemRegion() if the pop fails 3534 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 3535 } 3536 } else { 3537 assert(has_aborted(), "currently the only way to do so"); 3538 3539 // The only way to abort the bitmap iteration is to return 3540 // false from the do_bit() method. However, inside the 3541 // do_bit() method we move the _region_finger to point to the 3542 // object currently being looked at. So, if we bail out, we 3543 // have definitely set _region_finger to something non-null. 3544 assert(_region_finger != NULL, "invariant"); 3545 3546 // Make sure that any previously aborted region has been 3547 // cleared. 3548 assert(_aborted_region.is_empty(), "aborted region not cleared"); 3549 3550 // The iteration was actually aborted. So now _region_finger 3551 // points to the address of the object we last scanned. If we 3552 // leave it there, when we restart this task, we will rescan 3553 // the object. It is easy to avoid this. We move the finger by 3554 // enough to point to the next possible object header (the 3555 // bitmap knows by how much we need to move it as it knows its 3556 // granularity). 3557 MemRegion newRegion = 3558 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end()); 3559 3560 if (!newRegion.is_empty()) { 3561 if (_cm->verbose_low()) { 3562 gclog_or_tty->print_cr("[%d] recording unscanned region" 3563 "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask", 3564 _task_id, 3565 newRegion.start(), newRegion.end()); 3566 } 3567 // Now record the part of the region we didn't scan to 3568 // make sure this task scans it later. 3569 _aborted_region = newRegion; 3570 } 3571 // break from while 3572 mr = MemRegion(); 3573 } 3574 _region_finger = NULL; 3575 } 3576 3577 if (_cm->verbose_low()) 3578 gclog_or_tty->print_cr("[%d] drained region stack, size = %d", 3579 _task_id, _cm->region_stack_size()); 3580 } 3581 } 3582 3583 void CMTask::print_stats() { 3584 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 3585 _task_id, _calls); 3586 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3587 _elapsed_time_ms, _termination_time_ms); 3588 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3589 _step_times_ms.num(), _step_times_ms.avg(), 3590 _step_times_ms.sd()); 3591 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3592 _step_times_ms.maximum(), _step_times_ms.sum()); 3593 3594 #if _MARKING_STATS_ 3595 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3596 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3597 _all_clock_intervals_ms.sd()); 3598 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3599 _all_clock_intervals_ms.maximum(), 3600 _all_clock_intervals_ms.sum()); 3601 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3602 _clock_due_to_scanning, _clock_due_to_marking); 3603 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3604 _objs_scanned, _objs_found_on_bitmap); 3605 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3606 _local_pushes, _local_pops, _local_max_size); 3607 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3608 _global_pushes, _global_pops, _global_max_size); 3609 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3610 _global_transfers_to,_global_transfers_from); 3611 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d", 3612 _regions_claimed, _region_stack_pops); 3613 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3614 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3615 _steal_attempts, _steals); 3616 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3617 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3618 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3619 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3620 _aborted_timed_out, _aborted_satb, _aborted_termination); 3621 #endif // _MARKING_STATS_ 3622 } 3623 3624 /***************************************************************************** 3625 3626 The do_marking_step(time_target_ms) method is the building block 3627 of the parallel marking framework. It can be called in parallel 3628 with other invocations of do_marking_step() on different tasks 3629 (but only one per task, obviously) and concurrently with the 3630 mutator threads, or during remark, hence it eliminates the need 3631 for two versions of the code. When called during remark, it will 3632 pick up from where the task left off during the concurrent marking 3633 phase. Interestingly, tasks are also claimable during evacuation 3634 pauses too, since do_marking_step() ensures that it aborts before 3635 it needs to yield. 3636 3637 The data structures that is uses to do marking work are the 3638 following: 3639 3640 (1) Marking Bitmap. If there are gray objects that appear only 3641 on the bitmap (this happens either when dealing with an overflow 3642 or when the initial marking phase has simply marked the roots 3643 and didn't push them on the stack), then tasks claim heap 3644 regions whose bitmap they then scan to find gray objects. A 3645 global finger indicates where the end of the last claimed region 3646 is. A local finger indicates how far into the region a task has 3647 scanned. The two fingers are used to determine how to gray an 3648 object (i.e. whether simply marking it is OK, as it will be 3649 visited by a task in the future, or whether it needs to be also 3650 pushed on a stack). 3651 3652 (2) Local Queue. The local queue of the task which is accessed 3653 reasonably efficiently by the task. Other tasks can steal from 3654 it when they run out of work. Throughout the marking phase, a 3655 task attempts to keep its local queue short but not totally 3656 empty, so that entries are available for stealing by other 3657 tasks. Only when there is no more work, a task will totally 3658 drain its local queue. 3659 3660 (3) Global Mark Stack. This handles local queue overflow. During 3661 marking only sets of entries are moved between it and the local 3662 queues, as access to it requires a mutex and more fine-grain 3663 interaction with it which might cause contention. If it 3664 overflows, then the marking phase should restart and iterate 3665 over the bitmap to identify gray objects. Throughout the marking 3666 phase, tasks attempt to keep the global mark stack at a small 3667 length but not totally empty, so that entries are available for 3668 popping by other tasks. Only when there is no more work, tasks 3669 will totally drain the global mark stack. 3670 3671 (4) Global Region Stack. Entries on it correspond to areas of 3672 the bitmap that need to be scanned since they contain gray 3673 objects. Pushes on the region stack only happen during 3674 evacuation pauses and typically correspond to areas covered by 3675 GC LABS. If it overflows, then the marking phase should restart 3676 and iterate over the bitmap to identify gray objects. Tasks will 3677 try to totally drain the region stack as soon as possible. 3678 3679 (5) SATB Buffer Queue. This is where completed SATB buffers are 3680 made available. Buffers are regularly removed from this queue 3681 and scanned for roots, so that the queue doesn't get too 3682 long. During remark, all completed buffers are processed, as 3683 well as the filled in parts of any uncompleted buffers. 3684 3685 The do_marking_step() method tries to abort when the time target 3686 has been reached. There are a few other cases when the 3687 do_marking_step() method also aborts: 3688 3689 (1) When the marking phase has been aborted (after a Full GC). 3690 3691 (2) When a global overflow (either on the global stack or the 3692 region stack) has been triggered. Before the task aborts, it 3693 will actually sync up with the other tasks to ensure that all 3694 the marking data structures (local queues, stacks, fingers etc.) 3695 are re-initialised so that when do_marking_step() completes, 3696 the marking phase can immediately restart. 3697 3698 (3) When enough completed SATB buffers are available. The 3699 do_marking_step() method only tries to drain SATB buffers right 3700 at the beginning. So, if enough buffers are available, the 3701 marking step aborts and the SATB buffers are processed at 3702 the beginning of the next invocation. 3703 3704 (4) To yield. when we have to yield then we abort and yield 3705 right at the end of do_marking_step(). This saves us from a lot 3706 of hassle as, by yielding we might allow a Full GC. If this 3707 happens then objects will be compacted underneath our feet, the 3708 heap might shrink, etc. We save checking for this by just 3709 aborting and doing the yield right at the end. 3710 3711 From the above it follows that the do_marking_step() method should 3712 be called in a loop (or, otherwise, regularly) until it completes. 3713 3714 If a marking step completes without its has_aborted() flag being 3715 true, it means it has completed the current marking phase (and 3716 also all other marking tasks have done so and have all synced up). 3717 3718 A method called regular_clock_call() is invoked "regularly" (in 3719 sub ms intervals) throughout marking. It is this clock method that 3720 checks all the abort conditions which were mentioned above and 3721 decides when the task should abort. A work-based scheme is used to 3722 trigger this clock method: when the number of object words the 3723 marking phase has scanned or the number of references the marking 3724 phase has visited reach a given limit. Additional invocations to 3725 the method clock have been planted in a few other strategic places 3726 too. The initial reason for the clock method was to avoid calling 3727 vtime too regularly, as it is quite expensive. So, once it was in 3728 place, it was natural to piggy-back all the other conditions on it 3729 too and not constantly check them throughout the code. 3730 3731 *****************************************************************************/ 3732 3733 void CMTask::do_marking_step(double time_target_ms) { 3734 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3735 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3736 3737 assert(concurrent() || _cm->region_stack_empty(), 3738 "the region stack should have been cleared before remark"); 3739 assert(concurrent() || !_cm->has_aborted_regions(), 3740 "aborted regions should have been cleared before remark"); 3741 assert(_region_finger == NULL, 3742 "this should be non-null only when a region is being scanned"); 3743 3744 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3745 assert(_task_queues != NULL, "invariant"); 3746 assert(_task_queue != NULL, "invariant"); 3747 assert(_task_queues->queue(_task_id) == _task_queue, "invariant"); 3748 3749 assert(!_claimed, 3750 "only one thread should claim this task at any one time"); 3751 3752 // OK, this doesn't safeguard again all possible scenarios, as it is 3753 // possible for two threads to set the _claimed flag at the same 3754 // time. But it is only for debugging purposes anyway and it will 3755 // catch most problems. 3756 _claimed = true; 3757 3758 _start_time_ms = os::elapsedVTime() * 1000.0; 3759 statsOnly( _interval_start_time_ms = _start_time_ms ); 3760 3761 double diff_prediction_ms = 3762 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 3763 _time_target_ms = time_target_ms - diff_prediction_ms; 3764 3765 // set up the variables that are used in the work-based scheme to 3766 // call the regular clock method 3767 _words_scanned = 0; 3768 _refs_reached = 0; 3769 recalculate_limits(); 3770 3771 // clear all flags 3772 clear_has_aborted(); 3773 _has_aborted_timed_out = false; 3774 _draining_satb_buffers = false; 3775 3776 ++_calls; 3777 3778 if (_cm->verbose_low()) 3779 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 3780 "target = %1.2lfms >>>>>>>>>>", 3781 _task_id, _calls, _time_target_ms); 3782 3783 // Set up the bitmap and oop closures. Anything that uses them is 3784 // eventually called from this method, so it is OK to allocate these 3785 // statically. 3786 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3787 CMOopClosure oop_closure(_g1h, _cm, this); 3788 set_oop_closure(&oop_closure); 3789 3790 if (_cm->has_overflown()) { 3791 // This can happen if the region stack or the mark stack overflows 3792 // during a GC pause and this task, after a yield point, 3793 // restarts. We have to abort as we need to get into the overflow 3794 // protocol which happens right at the end of this task. 3795 set_has_aborted(); 3796 } 3797 3798 // First drain any available SATB buffers. After this, we will not 3799 // look at SATB buffers before the next invocation of this method. 3800 // If enough completed SATB buffers are queued up, the regular clock 3801 // will abort this task so that it restarts. 3802 drain_satb_buffers(); 3803 // ...then partially drain the local queue and the global stack 3804 drain_local_queue(true); 3805 drain_global_stack(true); 3806 3807 // Then totally drain the region stack. We will not look at 3808 // it again before the next invocation of this method. Entries on 3809 // the region stack are only added during evacuation pauses, for 3810 // which we have to yield. When we do, we abort the task anyway so 3811 // it will look at the region stack again when it restarts. 3812 bitmap_closure.set_scanning_heap_region(false); 3813 drain_region_stack(&bitmap_closure); 3814 // ...then partially drain the local queue and the global stack 3815 drain_local_queue(true); 3816 drain_global_stack(true); 3817 3818 do { 3819 if (!has_aborted() && _curr_region != NULL) { 3820 // This means that we're already holding on to a region. 3821 assert(_finger != NULL, "if region is not NULL, then the finger " 3822 "should not be NULL either"); 3823 3824 // We might have restarted this task after an evacuation pause 3825 // which might have evacuated the region we're holding on to 3826 // underneath our feet. Let's read its limit again to make sure 3827 // that we do not iterate over a region of the heap that 3828 // contains garbage (update_region_limit() will also move 3829 // _finger to the start of the region if it is found empty). 3830 update_region_limit(); 3831 // We will start from _finger not from the start of the region, 3832 // as we might be restarting this task after aborting half-way 3833 // through scanning this region. In this case, _finger points to 3834 // the address where we last found a marked object. If this is a 3835 // fresh region, _finger points to start(). 3836 MemRegion mr = MemRegion(_finger, _region_limit); 3837 3838 if (_cm->verbose_low()) 3839 gclog_or_tty->print_cr("[%d] we're scanning part " 3840 "["PTR_FORMAT", "PTR_FORMAT") " 3841 "of region "PTR_FORMAT, 3842 _task_id, _finger, _region_limit, _curr_region); 3843 3844 // Let's iterate over the bitmap of the part of the 3845 // region that is left. 3846 bitmap_closure.set_scanning_heap_region(true); 3847 if (mr.is_empty() || 3848 _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3849 // We successfully completed iterating over the region. Now, 3850 // let's give up the region. 3851 giveup_current_region(); 3852 regular_clock_call(); 3853 } else { 3854 assert(has_aborted(), "currently the only way to do so"); 3855 // The only way to abort the bitmap iteration is to return 3856 // false from the do_bit() method. However, inside the 3857 // do_bit() method we move the _finger to point to the 3858 // object currently being looked at. So, if we bail out, we 3859 // have definitely set _finger to something non-null. 3860 assert(_finger != NULL, "invariant"); 3861 3862 // Region iteration was actually aborted. So now _finger 3863 // points to the address of the object we last scanned. If we 3864 // leave it there, when we restart this task, we will rescan 3865 // the object. It is easy to avoid this. We move the finger by 3866 // enough to point to the next possible object header (the 3867 // bitmap knows by how much we need to move it as it knows its 3868 // granularity). 3869 assert(_finger < _region_limit, "invariant"); 3870 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 3871 // Check if bitmap iteration was aborted while scanning the last object 3872 if (new_finger >= _region_limit) { 3873 giveup_current_region(); 3874 } else { 3875 move_finger_to(new_finger); 3876 } 3877 } 3878 } 3879 // At this point we have either completed iterating over the 3880 // region we were holding on to, or we have aborted. 3881 3882 // We then partially drain the local queue and the global stack. 3883 // (Do we really need this?) 3884 drain_local_queue(true); 3885 drain_global_stack(true); 3886 3887 // Read the note on the claim_region() method on why it might 3888 // return NULL with potentially more regions available for 3889 // claiming and why we have to check out_of_regions() to determine 3890 // whether we're done or not. 3891 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 3892 // We are going to try to claim a new region. We should have 3893 // given up on the previous one. 3894 // Separated the asserts so that we know which one fires. 3895 assert(_curr_region == NULL, "invariant"); 3896 assert(_finger == NULL, "invariant"); 3897 assert(_region_limit == NULL, "invariant"); 3898 if (_cm->verbose_low()) 3899 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 3900 HeapRegion* claimed_region = _cm->claim_region(_task_id); 3901 if (claimed_region != NULL) { 3902 // Yes, we managed to claim one 3903 statsOnly( ++_regions_claimed ); 3904 3905 if (_cm->verbose_low()) 3906 gclog_or_tty->print_cr("[%d] we successfully claimed " 3907 "region "PTR_FORMAT, 3908 _task_id, claimed_region); 3909 3910 setup_for_region(claimed_region); 3911 assert(_curr_region == claimed_region, "invariant"); 3912 } 3913 // It is important to call the regular clock here. It might take 3914 // a while to claim a region if, for example, we hit a large 3915 // block of empty regions. So we need to call the regular clock 3916 // method once round the loop to make sure it's called 3917 // frequently enough. 3918 regular_clock_call(); 3919 } 3920 3921 if (!has_aborted() && _curr_region == NULL) { 3922 assert(_cm->out_of_regions(), 3923 "at this point we should be out of regions"); 3924 } 3925 } while ( _curr_region != NULL && !has_aborted()); 3926 3927 if (!has_aborted()) { 3928 // We cannot check whether the global stack is empty, since other 3929 // tasks might be pushing objects to it concurrently. We also cannot 3930 // check if the region stack is empty because if a thread is aborting 3931 // it can push a partially done region back. 3932 assert(_cm->out_of_regions(), 3933 "at this point we should be out of regions"); 3934 3935 if (_cm->verbose_low()) 3936 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 3937 3938 // Try to reduce the number of available SATB buffers so that 3939 // remark has less work to do. 3940 drain_satb_buffers(); 3941 } 3942 3943 // Since we've done everything else, we can now totally drain the 3944 // local queue and global stack. 3945 drain_local_queue(false); 3946 drain_global_stack(false); 3947 3948 // Attempt at work stealing from other task's queues. 3949 if (!has_aborted()) { 3950 // We have not aborted. This means that we have finished all that 3951 // we could. Let's try to do some stealing... 3952 3953 // We cannot check whether the global stack is empty, since other 3954 // tasks might be pushing objects to it concurrently. We also cannot 3955 // check if the region stack is empty because if a thread is aborting 3956 // it can push a partially done region back. 3957 assert(_cm->out_of_regions() && _task_queue->size() == 0, 3958 "only way to reach here"); 3959 3960 if (_cm->verbose_low()) 3961 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 3962 3963 while (!has_aborted()) { 3964 oop obj; 3965 statsOnly( ++_steal_attempts ); 3966 3967 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 3968 if (_cm->verbose_medium()) 3969 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 3970 _task_id, (void*) obj); 3971 3972 statsOnly( ++_steals ); 3973 3974 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 3975 "any stolen object should be marked"); 3976 scan_object(obj); 3977 3978 // And since we're towards the end, let's totally drain the 3979 // local queue and global stack. 3980 drain_local_queue(false); 3981 drain_global_stack(false); 3982 } else { 3983 break; 3984 } 3985 } 3986 } 3987 3988 // We still haven't aborted. Now, let's try to get into the 3989 // termination protocol. 3990 if (!has_aborted()) { 3991 // We cannot check whether the global stack is empty, since other 3992 // tasks might be concurrently pushing objects on it. We also cannot 3993 // check if the region stack is empty because if a thread is aborting 3994 // it can push a partially done region back. 3995 // Separated the asserts so that we know which one fires. 3996 assert(_cm->out_of_regions(), "only way to reach here"); 3997 assert(_task_queue->size() == 0, "only way to reach here"); 3998 3999 if (_cm->verbose_low()) 4000 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 4001 4002 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4003 // The CMTask class also extends the TerminatorTerminator class, 4004 // hence its should_exit_termination() method will also decide 4005 // whether to exit the termination protocol or not. 4006 bool finished = _cm->terminator()->offer_termination(this); 4007 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4008 _termination_time_ms += 4009 termination_end_time_ms - _termination_start_time_ms; 4010 4011 if (finished) { 4012 // We're all done. 4013 4014 if (_task_id == 0) { 4015 // let's allow task 0 to do this 4016 if (concurrent()) { 4017 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4018 // we need to set this to false before the next 4019 // safepoint. This way we ensure that the marking phase 4020 // doesn't observe any more heap expansions. 4021 _cm->clear_concurrent_marking_in_progress(); 4022 } 4023 } 4024 4025 // We can now guarantee that the global stack is empty, since 4026 // all other tasks have finished. We separated the guarantees so 4027 // that, if a condition is false, we can immediately find out 4028 // which one. 4029 guarantee(_cm->out_of_regions(), "only way to reach here"); 4030 guarantee(_aborted_region.is_empty(), "only way to reach here"); 4031 guarantee(_cm->region_stack_empty(), "only way to reach here"); 4032 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4033 guarantee(_task_queue->size() == 0, "only way to reach here"); 4034 guarantee(!_cm->has_overflown(), "only way to reach here"); 4035 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4036 guarantee(!_cm->region_stack_overflow(), "only way to reach here"); 4037 4038 if (_cm->verbose_low()) 4039 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 4040 } else { 4041 // Apparently there's more work to do. Let's abort this task. It 4042 // will restart it and we can hopefully find more things to do. 4043 4044 if (_cm->verbose_low()) 4045 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id); 4046 4047 set_has_aborted(); 4048 statsOnly( ++_aborted_termination ); 4049 } 4050 } 4051 4052 // Mainly for debugging purposes to make sure that a pointer to the 4053 // closure which was statically allocated in this frame doesn't 4054 // escape it by accident. 4055 set_oop_closure(NULL); 4056 double end_time_ms = os::elapsedVTime() * 1000.0; 4057 double elapsed_time_ms = end_time_ms - _start_time_ms; 4058 // Update the step history. 4059 _step_times_ms.add(elapsed_time_ms); 4060 4061 if (has_aborted()) { 4062 // The task was aborted for some reason. 4063 4064 statsOnly( ++_aborted ); 4065 4066 if (_has_aborted_timed_out) { 4067 double diff_ms = elapsed_time_ms - _time_target_ms; 4068 // Keep statistics of how well we did with respect to hitting 4069 // our target only if we actually timed out (if we aborted for 4070 // other reasons, then the results might get skewed). 4071 _marking_step_diffs_ms.add(diff_ms); 4072 } 4073 4074 if (_cm->has_overflown()) { 4075 // This is the interesting one. We aborted because a global 4076 // overflow was raised. This means we have to restart the 4077 // marking phase and start iterating over regions. However, in 4078 // order to do this we have to make sure that all tasks stop 4079 // what they are doing and re-initialise in a safe manner. We 4080 // will achieve this with the use of two barrier sync points. 4081 4082 if (_cm->verbose_low()) 4083 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 4084 4085 _cm->enter_first_sync_barrier(_task_id); 4086 // When we exit this sync barrier we know that all tasks have 4087 // stopped doing marking work. So, it's now safe to 4088 // re-initialise our data structures. At the end of this method, 4089 // task 0 will clear the global data structures. 4090 4091 statsOnly( ++_aborted_overflow ); 4092 4093 // We clear the local state of this task... 4094 clear_region_fields(); 4095 4096 // ...and enter the second barrier. 4097 _cm->enter_second_sync_barrier(_task_id); 4098 // At this point everything has bee re-initialised and we're 4099 // ready to restart. 4100 } 4101 4102 if (_cm->verbose_low()) { 4103 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4104 "elapsed = %1.2lfms <<<<<<<<<<", 4105 _task_id, _time_target_ms, elapsed_time_ms); 4106 if (_cm->has_aborted()) 4107 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 4108 _task_id); 4109 } 4110 } else { 4111 if (_cm->verbose_low()) 4112 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4113 "elapsed = %1.2lfms <<<<<<<<<<", 4114 _task_id, _time_target_ms, elapsed_time_ms); 4115 } 4116 4117 _claimed = false; 4118 } 4119 4120 CMTask::CMTask(int task_id, 4121 ConcurrentMark* cm, 4122 CMTaskQueue* task_queue, 4123 CMTaskQueueSet* task_queues) 4124 : _g1h(G1CollectedHeap::heap()), 4125 _task_id(task_id), _cm(cm), 4126 _claimed(false), 4127 _nextMarkBitMap(NULL), _hash_seed(17), 4128 _task_queue(task_queue), 4129 _task_queues(task_queues), 4130 _oop_closure(NULL), 4131 _aborted_region(MemRegion()) { 4132 guarantee(task_queue != NULL, "invariant"); 4133 guarantee(task_queues != NULL, "invariant"); 4134 4135 statsOnly( _clock_due_to_scanning = 0; 4136 _clock_due_to_marking = 0 ); 4137 4138 _marking_step_diffs_ms.add(0.5); 4139 }