1 /* 2 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "incls/_precompiled.incl" 26 #include "incls/_concurrentMark.cpp.incl" 27 28 // 29 // CMS Bit Map Wrapper 30 31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter): 32 _bm((uintptr_t*)NULL,0), 33 _shifter(shifter) { 34 _bmStartWord = (HeapWord*)(rs.base()); 35 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 36 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 37 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 38 39 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map"); 40 // For now we'll just commit all of the bit map up fromt. 41 // Later on we'll try to be more parsimonious with swap. 42 guarantee(_virtual_space.initialize(brs, brs.size()), 43 "couldn't reseve backing store for CMS bit map"); 44 assert(_virtual_space.committed_size() == brs.size(), 45 "didn't reserve backing store for all of CMS bit map?"); 46 _bm.set_map((uintptr_t*)_virtual_space.low()); 47 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 48 _bmWordSize, "inconsistency in bit map sizing"); 49 _bm.set_size(_bmWordSize >> _shifter); 50 } 51 52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 53 HeapWord* limit) const { 54 // First we must round addr *up* to a possible object boundary. 55 addr = (HeapWord*)align_size_up((intptr_t)addr, 56 HeapWordSize << _shifter); 57 size_t addrOffset = heapWordToOffset(addr); 58 if (limit == NULL) limit = _bmStartWord + _bmWordSize; 59 size_t limitOffset = heapWordToOffset(limit); 60 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 61 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 62 assert(nextAddr >= addr, "get_next_one postcondition"); 63 assert(nextAddr == limit || isMarked(nextAddr), 64 "get_next_one postcondition"); 65 return nextAddr; 66 } 67 68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 69 HeapWord* limit) const { 70 size_t addrOffset = heapWordToOffset(addr); 71 if (limit == NULL) limit = _bmStartWord + _bmWordSize; 72 size_t limitOffset = heapWordToOffset(limit); 73 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 74 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 75 assert(nextAddr >= addr, "get_next_one postcondition"); 76 assert(nextAddr == limit || !isMarked(nextAddr), 77 "get_next_one postcondition"); 78 return nextAddr; 79 } 80 81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 82 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 83 return (int) (diff >> _shifter); 84 } 85 86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) { 87 HeapWord* left = MAX2(_bmStartWord, mr.start()); 88 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end()); 89 if (right > left) { 90 // Right-open interval [leftOffset, rightOffset). 91 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right)); 92 } else { 93 return true; 94 } 95 } 96 97 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap, 98 size_t from_start_index, 99 HeapWord* to_start_word, 100 size_t word_num) { 101 _bm.mostly_disjoint_range_union(from_bitmap, 102 from_start_index, 103 heapWordToOffset(to_start_word), 104 word_num); 105 } 106 107 #ifndef PRODUCT 108 bool CMBitMapRO::covers(ReservedSpace rs) const { 109 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 110 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize, 111 "size inconsistency"); 112 return _bmStartWord == (HeapWord*)(rs.base()) && 113 _bmWordSize == rs.size()>>LogHeapWordSize; 114 } 115 #endif 116 117 void CMBitMap::clearAll() { 118 _bm.clear(); 119 return; 120 } 121 122 void CMBitMap::markRange(MemRegion mr) { 123 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 124 assert(!mr.is_empty(), "unexpected empty region"); 125 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 126 ((HeapWord *) mr.end())), 127 "markRange memory region end is not card aligned"); 128 // convert address range into offset range 129 _bm.at_put_range(heapWordToOffset(mr.start()), 130 heapWordToOffset(mr.end()), true); 131 } 132 133 void CMBitMap::clearRange(MemRegion mr) { 134 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 135 assert(!mr.is_empty(), "unexpected empty region"); 136 // convert address range into offset range 137 _bm.at_put_range(heapWordToOffset(mr.start()), 138 heapWordToOffset(mr.end()), false); 139 } 140 141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 142 HeapWord* end_addr) { 143 HeapWord* start = getNextMarkedWordAddress(addr); 144 start = MIN2(start, end_addr); 145 HeapWord* end = getNextUnmarkedWordAddress(start); 146 end = MIN2(end, end_addr); 147 assert(start <= end, "Consistency check"); 148 MemRegion mr(start, end); 149 if (!mr.is_empty()) { 150 clearRange(mr); 151 } 152 return mr; 153 } 154 155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 156 _base(NULL), _cm(cm) 157 #ifdef ASSERT 158 , _drain_in_progress(false) 159 , _drain_in_progress_yields(false) 160 #endif 161 {} 162 163 void CMMarkStack::allocate(size_t size) { 164 _base = NEW_C_HEAP_ARRAY(oop, size); 165 if (_base == NULL) 166 vm_exit_during_initialization("Failed to allocate " 167 "CM region mark stack"); 168 _index = 0; 169 // QQQQ cast ... 170 _capacity = (jint) size; 171 _oops_do_bound = -1; 172 NOT_PRODUCT(_max_depth = 0); 173 } 174 175 CMMarkStack::~CMMarkStack() { 176 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); 177 } 178 179 void CMMarkStack::par_push(oop ptr) { 180 while (true) { 181 if (isFull()) { 182 _overflow = true; 183 return; 184 } 185 // Otherwise... 186 jint index = _index; 187 jint next_index = index+1; 188 jint res = Atomic::cmpxchg(next_index, &_index, index); 189 if (res == index) { 190 _base[index] = ptr; 191 // Note that we don't maintain this atomically. We could, but it 192 // doesn't seem necessary. 193 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 194 return; 195 } 196 // Otherwise, we need to try again. 197 } 198 } 199 200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 201 while (true) { 202 if (isFull()) { 203 _overflow = true; 204 return; 205 } 206 // Otherwise... 207 jint index = _index; 208 jint next_index = index + n; 209 if (next_index > _capacity) { 210 _overflow = true; 211 return; 212 } 213 jint res = Atomic::cmpxchg(next_index, &_index, index); 214 if (res == index) { 215 for (int i = 0; i < n; i++) { 216 int ind = index + i; 217 assert(ind < _capacity, "By overflow test above."); 218 _base[ind] = ptr_arr[i]; 219 } 220 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 221 return; 222 } 223 // Otherwise, we need to try again. 224 } 225 } 226 227 228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 229 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 230 jint start = _index; 231 jint next_index = start + n; 232 if (next_index > _capacity) { 233 _overflow = true; 234 return; 235 } 236 // Otherwise. 237 _index = next_index; 238 for (int i = 0; i < n; i++) { 239 int ind = start + i; 240 assert(ind < _capacity, "By overflow test above."); 241 _base[ind] = ptr_arr[i]; 242 } 243 } 244 245 246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 247 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 248 jint index = _index; 249 if (index == 0) { 250 *n = 0; 251 return false; 252 } else { 253 int k = MIN2(max, index); 254 jint new_ind = index - k; 255 for (int j = 0; j < k; j++) { 256 ptr_arr[j] = _base[new_ind + j]; 257 } 258 _index = new_ind; 259 *n = k; 260 return true; 261 } 262 } 263 264 265 CMRegionStack::CMRegionStack() : _base(NULL) {} 266 267 void CMRegionStack::allocate(size_t size) { 268 _base = NEW_C_HEAP_ARRAY(MemRegion, size); 269 if (_base == NULL) 270 vm_exit_during_initialization("Failed to allocate " 271 "CM region mark stack"); 272 _index = 0; 273 // QQQQ cast ... 274 _capacity = (jint) size; 275 } 276 277 CMRegionStack::~CMRegionStack() { 278 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); 279 } 280 281 void CMRegionStack::push_lock_free(MemRegion mr) { 282 assert(mr.word_size() > 0, "Precondition"); 283 while (true) { 284 jint index = _index; 285 286 if (index >= _capacity) { 287 _overflow = true; 288 return; 289 } 290 // Otherwise... 291 jint next_index = index+1; 292 jint res = Atomic::cmpxchg(next_index, &_index, index); 293 if (res == index) { 294 _base[index] = mr; 295 return; 296 } 297 // Otherwise, we need to try again. 298 } 299 } 300 301 // Lock-free pop of the region stack. Called during the concurrent 302 // marking / remark phases. Should only be called in tandem with 303 // other lock-free pops. 304 MemRegion CMRegionStack::pop_lock_free() { 305 while (true) { 306 jint index = _index; 307 308 if (index == 0) { 309 return MemRegion(); 310 } 311 // Otherwise... 312 jint next_index = index-1; 313 jint res = Atomic::cmpxchg(next_index, &_index, index); 314 if (res == index) { 315 MemRegion mr = _base[next_index]; 316 if (mr.start() != NULL) { 317 assert(mr.end() != NULL, "invariant"); 318 assert(mr.word_size() > 0, "invariant"); 319 return mr; 320 } else { 321 // that entry was invalidated... let's skip it 322 assert(mr.end() == NULL, "invariant"); 323 } 324 } 325 // Otherwise, we need to try again. 326 } 327 } 328 329 #if 0 330 // The routines that manipulate the region stack with a lock are 331 // not currently used. They should be retained, however, as a 332 // diagnostic aid. 333 334 void CMRegionStack::push_with_lock(MemRegion mr) { 335 assert(mr.word_size() > 0, "Precondition"); 336 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 337 338 if (isFull()) { 339 _overflow = true; 340 return; 341 } 342 343 _base[_index] = mr; 344 _index += 1; 345 } 346 347 MemRegion CMRegionStack::pop_with_lock() { 348 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 349 350 while (true) { 351 if (_index == 0) { 352 return MemRegion(); 353 } 354 _index -= 1; 355 356 MemRegion mr = _base[_index]; 357 if (mr.start() != NULL) { 358 assert(mr.end() != NULL, "invariant"); 359 assert(mr.word_size() > 0, "invariant"); 360 return mr; 361 } else { 362 // that entry was invalidated... let's skip it 363 assert(mr.end() == NULL, "invariant"); 364 } 365 } 366 } 367 #endif 368 369 bool CMRegionStack::invalidate_entries_into_cset() { 370 bool result = false; 371 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 372 for (int i = 0; i < _oops_do_bound; ++i) { 373 MemRegion mr = _base[i]; 374 if (mr.start() != NULL) { 375 assert(mr.end() != NULL, "invariant"); 376 assert(mr.word_size() > 0, "invariant"); 377 HeapRegion* hr = g1h->heap_region_containing(mr.start()); 378 assert(hr != NULL, "invariant"); 379 if (hr->in_collection_set()) { 380 // The region points into the collection set 381 _base[i] = MemRegion(); 382 result = true; 383 } 384 } else { 385 // that entry was invalidated... let's skip it 386 assert(mr.end() == NULL, "invariant"); 387 } 388 } 389 return result; 390 } 391 392 template<class OopClosureClass> 393 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 394 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 395 || SafepointSynchronize::is_at_safepoint(), 396 "Drain recursion must be yield-safe."); 397 bool res = true; 398 debug_only(_drain_in_progress = true); 399 debug_only(_drain_in_progress_yields = yield_after); 400 while (!isEmpty()) { 401 oop newOop = pop(); 402 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 403 assert(newOop->is_oop(), "Expected an oop"); 404 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 405 "only grey objects on this stack"); 406 // iterate over the oops in this oop, marking and pushing 407 // the ones in CMS generation. 408 newOop->oop_iterate(cl); 409 if (yield_after && _cm->do_yield_check()) { 410 res = false; break; 411 } 412 } 413 debug_only(_drain_in_progress = false); 414 return res; 415 } 416 417 void CMMarkStack::oops_do(OopClosure* f) { 418 if (_index == 0) return; 419 assert(_oops_do_bound != -1 && _oops_do_bound <= _index, 420 "Bound must be set."); 421 for (int i = 0; i < _oops_do_bound; i++) { 422 f->do_oop(&_base[i]); 423 } 424 _oops_do_bound = -1; 425 } 426 427 bool ConcurrentMark::not_yet_marked(oop obj) const { 428 return (_g1h->is_obj_ill(obj) 429 || (_g1h->is_in_permanent(obj) 430 && !nextMarkBitMap()->isMarked((HeapWord*)obj))); 431 } 432 433 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 434 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 435 #endif // _MSC_VER 436 437 ConcurrentMark::ConcurrentMark(ReservedSpace rs, 438 int max_regions) : 439 _markBitMap1(rs, MinObjAlignment - 1), 440 _markBitMap2(rs, MinObjAlignment - 1), 441 442 _parallel_marking_threads(0), 443 _sleep_factor(0.0), 444 _marking_task_overhead(1.0), 445 _cleanup_sleep_factor(0.0), 446 _cleanup_task_overhead(1.0), 447 _region_bm(max_regions, false /* in_resource_area*/), 448 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 449 CardTableModRefBS::card_shift, 450 false /* in_resource_area*/), 451 _prevMarkBitMap(&_markBitMap1), 452 _nextMarkBitMap(&_markBitMap2), 453 _at_least_one_mark_complete(false), 454 455 _markStack(this), 456 _regionStack(), 457 // _finger set in set_non_marking_state 458 459 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)), 460 // _active_tasks set in set_non_marking_state 461 // _tasks set inside the constructor 462 _task_queues(new CMTaskQueueSet((int) _max_task_num)), 463 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 464 465 _has_overflown(false), 466 _concurrent(false), 467 _has_aborted(false), 468 _restart_for_overflow(false), 469 _concurrent_marking_in_progress(false), 470 _should_gray_objects(false), 471 472 // _verbose_level set below 473 474 _init_times(), 475 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 476 _cleanup_times(), 477 _total_counting_time(0.0), 478 _total_rs_scrub_time(0.0), 479 480 _parallel_workers(NULL) 481 { 482 CMVerboseLevel verbose_level = 483 (CMVerboseLevel) G1MarkingVerboseLevel; 484 if (verbose_level < no_verbose) 485 verbose_level = no_verbose; 486 if (verbose_level > high_verbose) 487 verbose_level = high_verbose; 488 _verbose_level = verbose_level; 489 490 if (verbose_low()) 491 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 492 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 493 494 _markStack.allocate(MarkStackSize); 495 _regionStack.allocate(G1MarkRegionStackSize); 496 497 // Create & start a ConcurrentMark thread. 498 _cmThread = new ConcurrentMarkThread(this); 499 assert(cmThread() != NULL, "CM Thread should have been created"); 500 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 501 502 _g1h = G1CollectedHeap::heap(); 503 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 504 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 505 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 506 507 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 508 satb_qs.set_buffer_size(G1SATBBufferSize); 509 510 int size = (int) MAX2(ParallelGCThreads, (size_t)1); 511 _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size); 512 for (int i = 0 ; i < size; i++) { 513 _par_cleanup_thread_state[i] = new ParCleanupThreadState; 514 } 515 516 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num); 517 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num); 518 519 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 520 _active_tasks = _max_task_num; 521 for (int i = 0; i < (int) _max_task_num; ++i) { 522 CMTaskQueue* task_queue = new CMTaskQueue(); 523 task_queue->initialize(); 524 _task_queues->register_queue(i, task_queue); 525 526 _tasks[i] = new CMTask(i, this, task_queue, _task_queues); 527 _accum_task_vtime[i] = 0.0; 528 } 529 530 if (ConcGCThreads > ParallelGCThreads) { 531 vm_exit_during_initialization("Can't have more ConcGCThreads " 532 "than ParallelGCThreads."); 533 } 534 if (ParallelGCThreads == 0) { 535 // if we are not running with any parallel GC threads we will not 536 // spawn any marking threads either 537 _parallel_marking_threads = 0; 538 _sleep_factor = 0.0; 539 _marking_task_overhead = 1.0; 540 } else { 541 if (ConcGCThreads > 0) { 542 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 543 // if both are set 544 545 _parallel_marking_threads = ConcGCThreads; 546 _sleep_factor = 0.0; 547 _marking_task_overhead = 1.0; 548 } else if (G1MarkingOverheadPercent > 0) { 549 // we will calculate the number of parallel marking threads 550 // based on a target overhead with respect to the soft real-time 551 // goal 552 553 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 554 double overall_cm_overhead = 555 (double) MaxGCPauseMillis * marking_overhead / 556 (double) GCPauseIntervalMillis; 557 double cpu_ratio = 1.0 / (double) os::processor_count(); 558 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 559 double marking_task_overhead = 560 overall_cm_overhead / marking_thread_num * 561 (double) os::processor_count(); 562 double sleep_factor = 563 (1.0 - marking_task_overhead) / marking_task_overhead; 564 565 _parallel_marking_threads = (size_t) marking_thread_num; 566 _sleep_factor = sleep_factor; 567 _marking_task_overhead = marking_task_overhead; 568 } else { 569 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1); 570 _sleep_factor = 0.0; 571 _marking_task_overhead = 1.0; 572 } 573 574 if (parallel_marking_threads() > 1) 575 _cleanup_task_overhead = 1.0; 576 else 577 _cleanup_task_overhead = marking_task_overhead(); 578 _cleanup_sleep_factor = 579 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 580 581 #if 0 582 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 583 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 584 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 585 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 586 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 587 #endif 588 589 guarantee(parallel_marking_threads() > 0, "peace of mind"); 590 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 591 (int) _parallel_marking_threads, false, true); 592 if (_parallel_workers == NULL) { 593 vm_exit_during_initialization("Failed necessary allocation."); 594 } else { 595 _parallel_workers->initialize_workers(); 596 } 597 } 598 599 // so that the call below can read a sensible value 600 _heap_start = (HeapWord*) rs.base(); 601 set_non_marking_state(); 602 } 603 604 void ConcurrentMark::update_g1_committed(bool force) { 605 // If concurrent marking is not in progress, then we do not need to 606 // update _heap_end. This has a subtle and important 607 // side-effect. Imagine that two evacuation pauses happen between 608 // marking completion and remark. The first one can grow the 609 // heap (hence now the finger is below the heap end). Then, the 610 // second one could unnecessarily push regions on the region 611 // stack. This causes the invariant that the region stack is empty 612 // at the beginning of remark to be false. By ensuring that we do 613 // not observe heap expansions after marking is complete, then we do 614 // not have this problem. 615 if (!concurrent_marking_in_progress() && !force) 616 return; 617 618 MemRegion committed = _g1h->g1_committed(); 619 assert(committed.start() == _heap_start, "start shouldn't change"); 620 HeapWord* new_end = committed.end(); 621 if (new_end > _heap_end) { 622 // The heap has been expanded. 623 624 _heap_end = new_end; 625 } 626 // Notice that the heap can also shrink. However, this only happens 627 // during a Full GC (at least currently) and the entire marking 628 // phase will bail out and the task will not be restarted. So, let's 629 // do nothing. 630 } 631 632 void ConcurrentMark::reset() { 633 // Starting values for these two. This should be called in a STW 634 // phase. CM will be notified of any future g1_committed expansions 635 // will be at the end of evacuation pauses, when tasks are 636 // inactive. 637 MemRegion committed = _g1h->g1_committed(); 638 _heap_start = committed.start(); 639 _heap_end = committed.end(); 640 641 // Separated the asserts so that we know which one fires. 642 assert(_heap_start != NULL, "heap bounds should look ok"); 643 assert(_heap_end != NULL, "heap bounds should look ok"); 644 assert(_heap_start < _heap_end, "heap bounds should look ok"); 645 646 // reset all the marking data structures and any necessary flags 647 clear_marking_state(); 648 649 if (verbose_low()) 650 gclog_or_tty->print_cr("[global] resetting"); 651 652 // We do reset all of them, since different phases will use 653 // different number of active threads. So, it's easiest to have all 654 // of them ready. 655 for (int i = 0; i < (int) _max_task_num; ++i) { 656 _tasks[i]->reset(_nextMarkBitMap); 657 } 658 659 // we need this to make sure that the flag is on during the evac 660 // pause with initial mark piggy-backed 661 set_concurrent_marking_in_progress(); 662 } 663 664 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) { 665 assert(active_tasks <= _max_task_num, "we should not have more"); 666 667 _active_tasks = active_tasks; 668 // Need to update the three data structures below according to the 669 // number of active threads for this phase. 670 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 671 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 672 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 673 674 _concurrent = concurrent; 675 // We propagate this to all tasks, not just the active ones. 676 for (int i = 0; i < (int) _max_task_num; ++i) 677 _tasks[i]->set_concurrent(concurrent); 678 679 if (concurrent) { 680 set_concurrent_marking_in_progress(); 681 } else { 682 // We currently assume that the concurrent flag has been set to 683 // false before we start remark. At this point we should also be 684 // in a STW phase. 685 assert(!concurrent_marking_in_progress(), "invariant"); 686 assert(_finger == _heap_end, "only way to get here"); 687 update_g1_committed(true); 688 } 689 } 690 691 void ConcurrentMark::set_non_marking_state() { 692 // We set the global marking state to some default values when we're 693 // not doing marking. 694 clear_marking_state(); 695 _active_tasks = 0; 696 clear_concurrent_marking_in_progress(); 697 } 698 699 ConcurrentMark::~ConcurrentMark() { 700 int size = (int) MAX2(ParallelGCThreads, (size_t)1); 701 for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i]; 702 FREE_C_HEAP_ARRAY(ParCleanupThreadState*, 703 _par_cleanup_thread_state); 704 705 for (int i = 0; i < (int) _max_task_num; ++i) { 706 delete _task_queues->queue(i); 707 delete _tasks[i]; 708 } 709 delete _task_queues; 710 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num); 711 } 712 713 // This closure is used to mark refs into the g1 generation 714 // from external roots in the CMS bit map. 715 // Called at the first checkpoint. 716 // 717 718 void ConcurrentMark::clearNextBitmap() { 719 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 720 G1CollectorPolicy* g1p = g1h->g1_policy(); 721 722 // Make sure that the concurrent mark thread looks to still be in 723 // the current cycle. 724 guarantee(cmThread()->during_cycle(), "invariant"); 725 726 // We are finishing up the current cycle by clearing the next 727 // marking bitmap and getting it ready for the next cycle. During 728 // this time no other cycle can start. So, let's make sure that this 729 // is the case. 730 guarantee(!g1h->mark_in_progress(), "invariant"); 731 732 // clear the mark bitmap (no grey objects to start with). 733 // We need to do this in chunks and offer to yield in between 734 // each chunk. 735 HeapWord* start = _nextMarkBitMap->startWord(); 736 HeapWord* end = _nextMarkBitMap->endWord(); 737 HeapWord* cur = start; 738 size_t chunkSize = M; 739 while (cur < end) { 740 HeapWord* next = cur + chunkSize; 741 if (next > end) 742 next = end; 743 MemRegion mr(cur,next); 744 _nextMarkBitMap->clearRange(mr); 745 cur = next; 746 do_yield_check(); 747 748 // Repeat the asserts from above. We'll do them as asserts here to 749 // minimize their overhead on the product. However, we'll have 750 // them as guarantees at the beginning / end of the bitmap 751 // clearing to get some checking in the product. 752 assert(cmThread()->during_cycle(), "invariant"); 753 assert(!g1h->mark_in_progress(), "invariant"); 754 } 755 756 // Repeat the asserts from above. 757 guarantee(cmThread()->during_cycle(), "invariant"); 758 guarantee(!g1h->mark_in_progress(), "invariant"); 759 } 760 761 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 762 public: 763 bool doHeapRegion(HeapRegion* r) { 764 if (!r->continuesHumongous()) { 765 r->note_start_of_marking(true); 766 } 767 return false; 768 } 769 }; 770 771 void ConcurrentMark::checkpointRootsInitialPre() { 772 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 773 G1CollectorPolicy* g1p = g1h->g1_policy(); 774 775 _has_aborted = false; 776 777 #ifndef PRODUCT 778 if (G1PrintReachableAtInitialMark) { 779 print_reachable("at-cycle-start", 780 true /* use_prev_marking */, true /* all */); 781 } 782 #endif 783 784 // Initialise marking structures. This has to be done in a STW phase. 785 reset(); 786 } 787 788 class CMMarkRootsClosure: public OopsInGenClosure { 789 private: 790 ConcurrentMark* _cm; 791 G1CollectedHeap* _g1h; 792 bool _do_barrier; 793 794 public: 795 CMMarkRootsClosure(ConcurrentMark* cm, 796 G1CollectedHeap* g1h, 797 bool do_barrier) : _cm(cm), _g1h(g1h), 798 _do_barrier(do_barrier) { } 799 800 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 801 virtual void do_oop( oop* p) { do_oop_work(p); } 802 803 template <class T> void do_oop_work(T* p) { 804 T heap_oop = oopDesc::load_heap_oop(p); 805 if (!oopDesc::is_null(heap_oop)) { 806 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); 807 assert(obj->is_oop() || obj->mark() == NULL, 808 "expected an oop, possibly with mark word displaced"); 809 HeapWord* addr = (HeapWord*)obj; 810 if (_g1h->is_in_g1_reserved(addr)) { 811 _cm->grayRoot(obj); 812 } 813 } 814 if (_do_barrier) { 815 assert(!_g1h->is_in_g1_reserved(p), 816 "Should be called on external roots"); 817 do_barrier(p); 818 } 819 } 820 }; 821 822 void ConcurrentMark::checkpointRootsInitialPost() { 823 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 824 825 // For each region note start of marking. 826 NoteStartOfMarkHRClosure startcl; 827 g1h->heap_region_iterate(&startcl); 828 829 // Start weak-reference discovery. 830 ReferenceProcessor* rp = g1h->ref_processor(); 831 rp->verify_no_references_recorded(); 832 rp->enable_discovery(); // enable ("weak") refs discovery 833 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 834 835 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 836 // This is the start of the marking cycle, we're expected all 837 // threads to have SATB queues with active set to false. 838 satb_mq_set.set_active_all_threads(true, /* new active value */ 839 false /* expected_active */); 840 841 // update_g1_committed() will be called at the end of an evac pause 842 // when marking is on. So, it's also called at the end of the 843 // initial-mark pause to update the heap end, if the heap expands 844 // during it. No need to call it here. 845 } 846 847 // Checkpoint the roots into this generation from outside 848 // this generation. [Note this initial checkpoint need only 849 // be approximate -- we'll do a catch up phase subsequently.] 850 void ConcurrentMark::checkpointRootsInitial() { 851 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped"); 852 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 853 854 double start = os::elapsedTime(); 855 856 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 857 g1p->record_concurrent_mark_init_start(); 858 checkpointRootsInitialPre(); 859 860 // YSR: when concurrent precleaning is in place, we'll 861 // need to clear the cached card table here 862 863 ResourceMark rm; 864 HandleMark hm; 865 866 g1h->ensure_parsability(false); 867 g1h->perm_gen()->save_marks(); 868 869 CMMarkRootsClosure notOlder(this, g1h, false); 870 CMMarkRootsClosure older(this, g1h, true); 871 872 g1h->set_marking_started(); 873 g1h->rem_set()->prepare_for_younger_refs_iterate(false); 874 875 g1h->process_strong_roots(true, // activate StrongRootsScope 876 false, // fake perm gen collection 877 SharedHeap::SO_AllClasses, 878 ¬Older, // Regular roots 879 NULL, // do not visit active blobs 880 &older // Perm Gen Roots 881 ); 882 checkpointRootsInitialPost(); 883 884 // Statistics. 885 double end = os::elapsedTime(); 886 _init_times.add((end - start) * 1000.0); 887 888 g1p->record_concurrent_mark_init_end(); 889 } 890 891 /* 892 Notice that in the next two methods, we actually leave the STS 893 during the barrier sync and join it immediately afterwards. If we 894 do not do this, this then the following deadlock can occur: one 895 thread could be in the barrier sync code, waiting for the other 896 thread to also sync up, whereas another one could be trying to 897 yield, while also waiting for the other threads to sync up too. 898 899 Because the thread that does the sync barrier has left the STS, it 900 is possible to be suspended for a Full GC or an evacuation pause 901 could occur. This is actually safe, since the entering the sync 902 barrier is one of the last things do_marking_step() does, and it 903 doesn't manipulate any data structures afterwards. 904 */ 905 906 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 907 if (verbose_low()) 908 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 909 910 ConcurrentGCThread::stsLeave(); 911 _first_overflow_barrier_sync.enter(); 912 ConcurrentGCThread::stsJoin(); 913 // at this point everyone should have synced up and not be doing any 914 // more work 915 916 if (verbose_low()) 917 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 918 919 // let task 0 do this 920 if (task_num == 0) { 921 // task 0 is responsible for clearing the global data structures 922 clear_marking_state(); 923 924 if (PrintGC) { 925 gclog_or_tty->date_stamp(PrintGCDateStamps); 926 gclog_or_tty->stamp(PrintGCTimeStamps); 927 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 928 } 929 } 930 931 // after this, each task should reset its own data structures then 932 // then go into the second barrier 933 } 934 935 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 936 if (verbose_low()) 937 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 938 939 ConcurrentGCThread::stsLeave(); 940 _second_overflow_barrier_sync.enter(); 941 ConcurrentGCThread::stsJoin(); 942 // at this point everything should be re-initialised and ready to go 943 944 if (verbose_low()) 945 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 946 } 947 948 void ConcurrentMark::grayRoot(oop p) { 949 HeapWord* addr = (HeapWord*) p; 950 // We can't really check against _heap_start and _heap_end, since it 951 // is possible during an evacuation pause with piggy-backed 952 // initial-mark that the committed space is expanded during the 953 // pause without CM observing this change. So the assertions below 954 // is a bit conservative; but better than nothing. 955 assert(_g1h->g1_committed().contains(addr), 956 "address should be within the heap bounds"); 957 958 if (!_nextMarkBitMap->isMarked(addr)) 959 _nextMarkBitMap->parMark(addr); 960 } 961 962 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) { 963 // The objects on the region have already been marked "in bulk" by 964 // the caller. We only need to decide whether to push the region on 965 // the region stack or not. 966 967 if (!concurrent_marking_in_progress() || !_should_gray_objects) 968 // We're done with marking and waiting for remark. We do not need to 969 // push anything else on the region stack. 970 return; 971 972 HeapWord* finger = _finger; 973 974 if (verbose_low()) 975 gclog_or_tty->print_cr("[global] attempting to push " 976 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at " 977 PTR_FORMAT, mr.start(), mr.end(), finger); 978 979 if (mr.start() < finger) { 980 // The finger is always heap region aligned and it is not possible 981 // for mr to span heap regions. 982 assert(mr.end() <= finger, "invariant"); 983 984 // Separated the asserts so that we know which one fires. 985 assert(mr.start() <= mr.end(), 986 "region boundaries should fall within the committed space"); 987 assert(_heap_start <= mr.start(), 988 "region boundaries should fall within the committed space"); 989 assert(mr.end() <= _heap_end, 990 "region boundaries should fall within the committed space"); 991 if (verbose_low()) 992 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") " 993 "below the finger, pushing it", 994 mr.start(), mr.end()); 995 996 if (!region_stack_push_lock_free(mr)) { 997 if (verbose_low()) 998 gclog_or_tty->print_cr("[global] region stack has overflown."); 999 } 1000 } 1001 } 1002 1003 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) { 1004 // The object is not marked by the caller. We need to at least mark 1005 // it and maybe push in on the stack. 1006 1007 HeapWord* addr = (HeapWord*)p; 1008 if (!_nextMarkBitMap->isMarked(addr)) { 1009 // We definitely need to mark it, irrespective whether we bail out 1010 // because we're done with marking. 1011 if (_nextMarkBitMap->parMark(addr)) { 1012 if (!concurrent_marking_in_progress() || !_should_gray_objects) 1013 // If we're done with concurrent marking and we're waiting for 1014 // remark, then we're not pushing anything on the stack. 1015 return; 1016 1017 // No OrderAccess:store_load() is needed. It is implicit in the 1018 // CAS done in parMark(addr) above 1019 HeapWord* finger = _finger; 1020 1021 if (addr < finger) { 1022 if (!mark_stack_push(oop(addr))) { 1023 if (verbose_low()) 1024 gclog_or_tty->print_cr("[global] global stack overflow " 1025 "during parMark"); 1026 } 1027 } 1028 } 1029 } 1030 } 1031 1032 class CMConcurrentMarkingTask: public AbstractGangTask { 1033 private: 1034 ConcurrentMark* _cm; 1035 ConcurrentMarkThread* _cmt; 1036 1037 public: 1038 void work(int worker_i) { 1039 assert(Thread::current()->is_ConcurrentGC_thread(), 1040 "this should only be done by a conc GC thread"); 1041 1042 double start_vtime = os::elapsedVTime(); 1043 1044 ConcurrentGCThread::stsJoin(); 1045 1046 assert((size_t) worker_i < _cm->active_tasks(), "invariant"); 1047 CMTask* the_task = _cm->task(worker_i); 1048 the_task->record_start_time(); 1049 if (!_cm->has_aborted()) { 1050 do { 1051 double start_vtime_sec = os::elapsedVTime(); 1052 double start_time_sec = os::elapsedTime(); 1053 the_task->do_marking_step(10.0); 1054 double end_time_sec = os::elapsedTime(); 1055 double end_vtime_sec = os::elapsedVTime(); 1056 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1057 double elapsed_time_sec = end_time_sec - start_time_sec; 1058 _cm->clear_has_overflown(); 1059 1060 bool ret = _cm->do_yield_check(worker_i); 1061 1062 jlong sleep_time_ms; 1063 if (!_cm->has_aborted() && the_task->has_aborted()) { 1064 sleep_time_ms = 1065 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1066 ConcurrentGCThread::stsLeave(); 1067 os::sleep(Thread::current(), sleep_time_ms, false); 1068 ConcurrentGCThread::stsJoin(); 1069 } 1070 double end_time2_sec = os::elapsedTime(); 1071 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1072 1073 #if 0 1074 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1075 "overhead %1.4lf", 1076 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1077 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1078 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1079 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1080 #endif 1081 } while (!_cm->has_aborted() && the_task->has_aborted()); 1082 } 1083 the_task->record_end_time(); 1084 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1085 1086 ConcurrentGCThread::stsLeave(); 1087 1088 double end_vtime = os::elapsedVTime(); 1089 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime); 1090 } 1091 1092 CMConcurrentMarkingTask(ConcurrentMark* cm, 1093 ConcurrentMarkThread* cmt) : 1094 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1095 1096 ~CMConcurrentMarkingTask() { } 1097 }; 1098 1099 void ConcurrentMark::markFromRoots() { 1100 // we might be tempted to assert that: 1101 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1102 // "inconsistent argument?"); 1103 // However that wouldn't be right, because it's possible that 1104 // a safepoint is indeed in progress as a younger generation 1105 // stop-the-world GC happens even as we mark in this generation. 1106 1107 _restart_for_overflow = false; 1108 1109 set_phase(MAX2((size_t) 1, parallel_marking_threads()), true); 1110 1111 CMConcurrentMarkingTask markingTask(this, cmThread()); 1112 if (parallel_marking_threads() > 0) 1113 _parallel_workers->run_task(&markingTask); 1114 else 1115 markingTask.work(0); 1116 print_stats(); 1117 } 1118 1119 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1120 // world is stopped at this checkpoint 1121 assert(SafepointSynchronize::is_at_safepoint(), 1122 "world should be stopped"); 1123 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1124 1125 // If a full collection has happened, we shouldn't do this. 1126 if (has_aborted()) { 1127 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1128 return; 1129 } 1130 1131 if (VerifyDuringGC) { 1132 HandleMark hm; // handle scope 1133 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1134 Universe::heap()->prepare_for_verify(); 1135 Universe::verify(true, false, true); 1136 } 1137 1138 G1CollectorPolicy* g1p = g1h->g1_policy(); 1139 g1p->record_concurrent_mark_remark_start(); 1140 1141 double start = os::elapsedTime(); 1142 1143 checkpointRootsFinalWork(); 1144 1145 double mark_work_end = os::elapsedTime(); 1146 1147 weakRefsWork(clear_all_soft_refs); 1148 1149 if (has_overflown()) { 1150 // Oops. We overflowed. Restart concurrent marking. 1151 _restart_for_overflow = true; 1152 // Clear the flag. We do not need it any more. 1153 clear_has_overflown(); 1154 if (G1TraceMarkStackOverflow) 1155 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1156 } else { 1157 // We're done with marking. 1158 // This is the end of the marking cycle, we're expected all 1159 // threads to have SATB queues with active set to true. 1160 JavaThread::satb_mark_queue_set().set_active_all_threads( 1161 false, /* new active value */ 1162 true /* expected_active */); 1163 1164 if (VerifyDuringGC) { 1165 HandleMark hm; // handle scope 1166 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1167 Universe::heap()->prepare_for_verify(); 1168 Universe::heap()->verify(/* allow_dirty */ true, 1169 /* silent */ false, 1170 /* use_prev_marking */ false); 1171 } 1172 } 1173 1174 #if VERIFY_OBJS_PROCESSED 1175 _scan_obj_cl.objs_processed = 0; 1176 ThreadLocalObjQueue::objs_enqueued = 0; 1177 #endif 1178 1179 // Statistics 1180 double now = os::elapsedTime(); 1181 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1182 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1183 _remark_times.add((now - start) * 1000.0); 1184 1185 g1p->record_concurrent_mark_remark_end(); 1186 } 1187 1188 1189 #define CARD_BM_TEST_MODE 0 1190 1191 class CalcLiveObjectsClosure: public HeapRegionClosure { 1192 1193 CMBitMapRO* _bm; 1194 ConcurrentMark* _cm; 1195 bool _changed; 1196 bool _yield; 1197 size_t _words_done; 1198 size_t _tot_live; 1199 size_t _tot_used; 1200 size_t _regions_done; 1201 double _start_vtime_sec; 1202 1203 BitMap* _region_bm; 1204 BitMap* _card_bm; 1205 intptr_t _bottom_card_num; 1206 bool _final; 1207 1208 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { 1209 for (intptr_t i = start_card_num; i <= last_card_num; i++) { 1210 #if CARD_BM_TEST_MODE 1211 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set."); 1212 #else 1213 _card_bm->par_at_put(i - _bottom_card_num, 1); 1214 #endif 1215 } 1216 } 1217 1218 public: 1219 CalcLiveObjectsClosure(bool final, 1220 CMBitMapRO *bm, ConcurrentMark *cm, 1221 BitMap* region_bm, BitMap* card_bm) : 1222 _bm(bm), _cm(cm), _changed(false), _yield(true), 1223 _words_done(0), _tot_live(0), _tot_used(0), 1224 _region_bm(region_bm), _card_bm(card_bm),_final(final), 1225 _regions_done(0), _start_vtime_sec(0.0) 1226 { 1227 _bottom_card_num = 1228 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> 1229 CardTableModRefBS::card_shift); 1230 } 1231 1232 // It takes a region that's not empty (i.e., it has at least one 1233 // live object in it and sets its corresponding bit on the region 1234 // bitmap to 1. If the region is "starts humongous" it will also set 1235 // to 1 the bits on the region bitmap that correspond to its 1236 // associated "continues humongous" regions. 1237 void set_bit_for_region(HeapRegion* hr) { 1238 assert(!hr->continuesHumongous(), "should have filtered those out"); 1239 1240 size_t index = hr->hrs_index(); 1241 if (!hr->startsHumongous()) { 1242 // Normal (non-humongous) case: just set the bit. 1243 _region_bm->par_at_put((BitMap::idx_t) index, true); 1244 } else { 1245 // Starts humongous case: calculate how many regions are part of 1246 // this humongous region and then set the bit range. It might 1247 // have been a bit more efficient to look at the object that 1248 // spans these humongous regions to calculate their number from 1249 // the object's size. However, it's a good idea to calculate 1250 // this based on the metadata itself, and not the region 1251 // contents, so that this code is not aware of what goes into 1252 // the humongous regions (in case this changes in the future). 1253 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1254 size_t end_index = index + 1; 1255 while (end_index < g1h->n_regions()) { 1256 HeapRegion* chr = g1h->region_at(end_index); 1257 if (!chr->continuesHumongous()) { 1258 break; 1259 } 1260 end_index += 1; 1261 } 1262 _region_bm->par_at_put_range((BitMap::idx_t) index, 1263 (BitMap::idx_t) end_index, true); 1264 } 1265 } 1266 1267 bool doHeapRegion(HeapRegion* hr) { 1268 if (!_final && _regions_done == 0) 1269 _start_vtime_sec = os::elapsedVTime(); 1270 1271 if (hr->continuesHumongous()) { 1272 // We will ignore these here and process them when their 1273 // associated "starts humongous" region is processed (see 1274 // set_bit_for_heap_region()). Note that we cannot rely on their 1275 // associated "starts humongous" region to have their bit set to 1276 // 1 since, due to the region chunking in the parallel region 1277 // iteration, a "continues humongous" region might be visited 1278 // before its associated "starts humongous". 1279 return false; 1280 } 1281 1282 HeapWord* nextTop = hr->next_top_at_mark_start(); 1283 HeapWord* start = hr->top_at_conc_mark_count(); 1284 assert(hr->bottom() <= start && start <= hr->end() && 1285 hr->bottom() <= nextTop && nextTop <= hr->end() && 1286 start <= nextTop, 1287 "Preconditions."); 1288 // Otherwise, record the number of word's we'll examine. 1289 size_t words_done = (nextTop - start); 1290 // Find the first marked object at or after "start". 1291 start = _bm->getNextMarkedWordAddress(start, nextTop); 1292 size_t marked_bytes = 0; 1293 1294 // Below, the term "card num" means the result of shifting an address 1295 // by the card shift -- address 0 corresponds to card number 0. One 1296 // must subtract the card num of the bottom of the heap to obtain a 1297 // card table index. 1298 // The first card num of the sequence of live cards currently being 1299 // constructed. -1 ==> no sequence. 1300 intptr_t start_card_num = -1; 1301 // The last card num of the sequence of live cards currently being 1302 // constructed. -1 ==> no sequence. 1303 intptr_t last_card_num = -1; 1304 1305 while (start < nextTop) { 1306 if (_yield && _cm->do_yield_check()) { 1307 // We yielded. It might be for a full collection, in which case 1308 // all bets are off; terminate the traversal. 1309 if (_cm->has_aborted()) { 1310 _changed = false; 1311 return true; 1312 } else { 1313 // Otherwise, it might be a collection pause, and the region 1314 // we're looking at might be in the collection set. We'll 1315 // abandon this region. 1316 return false; 1317 } 1318 } 1319 oop obj = oop(start); 1320 int obj_sz = obj->size(); 1321 // The card num of the start of the current object. 1322 intptr_t obj_card_num = 1323 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); 1324 1325 HeapWord* obj_last = start + obj_sz - 1; 1326 intptr_t obj_last_card_num = 1327 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift); 1328 1329 if (obj_card_num != last_card_num) { 1330 if (start_card_num == -1) { 1331 assert(last_card_num == -1, "Both or neither."); 1332 start_card_num = obj_card_num; 1333 } else { 1334 assert(last_card_num != -1, "Both or neither."); 1335 assert(obj_card_num >= last_card_num, "Inv"); 1336 if ((obj_card_num - last_card_num) > 1) { 1337 // Mark the last run, and start a new one. 1338 mark_card_num_range(start_card_num, last_card_num); 1339 start_card_num = obj_card_num; 1340 } 1341 } 1342 #if CARD_BM_TEST_MODE 1343 /* 1344 gclog_or_tty->print_cr("Setting bits from %d/%d.", 1345 obj_card_num - _bottom_card_num, 1346 obj_last_card_num - _bottom_card_num); 1347 */ 1348 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) { 1349 _card_bm->par_at_put(j - _bottom_card_num, 1); 1350 } 1351 #endif 1352 } 1353 // In any case, we set the last card num. 1354 last_card_num = obj_last_card_num; 1355 1356 marked_bytes += (size_t)obj_sz * HeapWordSize; 1357 // Find the next marked object after this one. 1358 start = _bm->getNextMarkedWordAddress(start + 1, nextTop); 1359 _changed = true; 1360 } 1361 // Handle the last range, if any. 1362 if (start_card_num != -1) 1363 mark_card_num_range(start_card_num, last_card_num); 1364 if (_final) { 1365 // Mark the allocated-since-marking portion... 1366 HeapWord* tp = hr->top(); 1367 if (nextTop < tp) { 1368 start_card_num = 1369 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift); 1370 last_card_num = 1371 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift); 1372 mark_card_num_range(start_card_num, last_card_num); 1373 // This definitely means the region has live objects. 1374 set_bit_for_region(hr); 1375 } 1376 } 1377 1378 hr->add_to_marked_bytes(marked_bytes); 1379 // Update the live region bitmap. 1380 if (marked_bytes > 0) { 1381 set_bit_for_region(hr); 1382 } 1383 hr->set_top_at_conc_mark_count(nextTop); 1384 _tot_live += hr->next_live_bytes(); 1385 _tot_used += hr->used(); 1386 _words_done = words_done; 1387 1388 if (!_final) { 1389 ++_regions_done; 1390 if (_regions_done % 10 == 0) { 1391 double end_vtime_sec = os::elapsedVTime(); 1392 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec; 1393 if (elapsed_vtime_sec > (10.0 / 1000.0)) { 1394 jlong sleep_time_ms = 1395 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0); 1396 os::sleep(Thread::current(), sleep_time_ms, false); 1397 _start_vtime_sec = end_vtime_sec; 1398 } 1399 } 1400 } 1401 1402 return false; 1403 } 1404 1405 bool changed() { return _changed; } 1406 void reset() { _changed = false; _words_done = 0; } 1407 void no_yield() { _yield = false; } 1408 size_t words_done() { return _words_done; } 1409 size_t tot_live() { return _tot_live; } 1410 size_t tot_used() { return _tot_used; } 1411 }; 1412 1413 1414 void ConcurrentMark::calcDesiredRegions() { 1415 _region_bm.clear(); 1416 _card_bm.clear(); 1417 CalcLiveObjectsClosure calccl(false /*final*/, 1418 nextMarkBitMap(), this, 1419 &_region_bm, &_card_bm); 1420 G1CollectedHeap *g1h = G1CollectedHeap::heap(); 1421 g1h->heap_region_iterate(&calccl); 1422 1423 do { 1424 calccl.reset(); 1425 g1h->heap_region_iterate(&calccl); 1426 } while (calccl.changed()); 1427 } 1428 1429 class G1ParFinalCountTask: public AbstractGangTask { 1430 protected: 1431 G1CollectedHeap* _g1h; 1432 CMBitMap* _bm; 1433 size_t _n_workers; 1434 size_t *_live_bytes; 1435 size_t *_used_bytes; 1436 BitMap* _region_bm; 1437 BitMap* _card_bm; 1438 public: 1439 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm, 1440 BitMap* region_bm, BitMap* card_bm) : 1441 AbstractGangTask("G1 final counting"), _g1h(g1h), 1442 _bm(bm), _region_bm(region_bm), _card_bm(card_bm) 1443 { 1444 if (ParallelGCThreads > 0) 1445 _n_workers = _g1h->workers()->total_workers(); 1446 else 1447 _n_workers = 1; 1448 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1449 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1450 } 1451 1452 ~G1ParFinalCountTask() { 1453 FREE_C_HEAP_ARRAY(size_t, _live_bytes); 1454 FREE_C_HEAP_ARRAY(size_t, _used_bytes); 1455 } 1456 1457 void work(int i) { 1458 CalcLiveObjectsClosure calccl(true /*final*/, 1459 _bm, _g1h->concurrent_mark(), 1460 _region_bm, _card_bm); 1461 calccl.no_yield(); 1462 if (G1CollectedHeap::use_parallel_gc_threads()) { 1463 _g1h->heap_region_par_iterate_chunked(&calccl, i, 1464 HeapRegion::FinalCountClaimValue); 1465 } else { 1466 _g1h->heap_region_iterate(&calccl); 1467 } 1468 assert(calccl.complete(), "Shouldn't have yielded!"); 1469 1470 assert((size_t) i < _n_workers, "invariant"); 1471 _live_bytes[i] = calccl.tot_live(); 1472 _used_bytes[i] = calccl.tot_used(); 1473 } 1474 size_t live_bytes() { 1475 size_t live_bytes = 0; 1476 for (size_t i = 0; i < _n_workers; ++i) 1477 live_bytes += _live_bytes[i]; 1478 return live_bytes; 1479 } 1480 size_t used_bytes() { 1481 size_t used_bytes = 0; 1482 for (size_t i = 0; i < _n_workers; ++i) 1483 used_bytes += _used_bytes[i]; 1484 return used_bytes; 1485 } 1486 }; 1487 1488 class G1ParNoteEndTask; 1489 1490 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1491 G1CollectedHeap* _g1; 1492 int _worker_num; 1493 size_t _max_live_bytes; 1494 size_t _regions_claimed; 1495 size_t _freed_bytes; 1496 size_t _cleared_h_regions; 1497 size_t _freed_regions; 1498 UncleanRegionList* _unclean_region_list; 1499 double _claimed_region_time; 1500 double _max_region_time; 1501 1502 public: 1503 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1504 UncleanRegionList* list, 1505 int worker_num); 1506 size_t freed_bytes() { return _freed_bytes; } 1507 size_t cleared_h_regions() { return _cleared_h_regions; } 1508 size_t freed_regions() { return _freed_regions; } 1509 UncleanRegionList* unclean_region_list() { 1510 return _unclean_region_list; 1511 } 1512 1513 bool doHeapRegion(HeapRegion *r); 1514 1515 size_t max_live_bytes() { return _max_live_bytes; } 1516 size_t regions_claimed() { return _regions_claimed; } 1517 double claimed_region_time_sec() { return _claimed_region_time; } 1518 double max_region_time_sec() { return _max_region_time; } 1519 }; 1520 1521 class G1ParNoteEndTask: public AbstractGangTask { 1522 friend class G1NoteEndOfConcMarkClosure; 1523 protected: 1524 G1CollectedHeap* _g1h; 1525 size_t _max_live_bytes; 1526 size_t _freed_bytes; 1527 ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state; 1528 public: 1529 G1ParNoteEndTask(G1CollectedHeap* g1h, 1530 ConcurrentMark::ParCleanupThreadState** 1531 par_cleanup_thread_state) : 1532 AbstractGangTask("G1 note end"), _g1h(g1h), 1533 _max_live_bytes(0), _freed_bytes(0), 1534 _par_cleanup_thread_state(par_cleanup_thread_state) 1535 {} 1536 1537 void work(int i) { 1538 double start = os::elapsedTime(); 1539 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, 1540 &_par_cleanup_thread_state[i]->list, 1541 i); 1542 if (G1CollectedHeap::use_parallel_gc_threads()) { 1543 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, 1544 HeapRegion::NoteEndClaimValue); 1545 } else { 1546 _g1h->heap_region_iterate(&g1_note_end); 1547 } 1548 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1549 1550 // Now finish up freeing the current thread's regions. 1551 _g1h->finish_free_region_work(g1_note_end.freed_bytes(), 1552 g1_note_end.cleared_h_regions(), 1553 0, NULL); 1554 { 1555 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1556 _max_live_bytes += g1_note_end.max_live_bytes(); 1557 _freed_bytes += g1_note_end.freed_bytes(); 1558 } 1559 double end = os::elapsedTime(); 1560 if (G1PrintParCleanupStats) { 1561 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] " 1562 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n", 1563 i, start, end, (end-start)*1000.0, 1564 g1_note_end.regions_claimed(), 1565 g1_note_end.claimed_region_time_sec()*1000.0, 1566 g1_note_end.max_region_time_sec()*1000.0); 1567 } 1568 } 1569 size_t max_live_bytes() { return _max_live_bytes; } 1570 size_t freed_bytes() { return _freed_bytes; } 1571 }; 1572 1573 class G1ParScrubRemSetTask: public AbstractGangTask { 1574 protected: 1575 G1RemSet* _g1rs; 1576 BitMap* _region_bm; 1577 BitMap* _card_bm; 1578 public: 1579 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1580 BitMap* region_bm, BitMap* card_bm) : 1581 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1582 _region_bm(region_bm), _card_bm(card_bm) 1583 {} 1584 1585 void work(int i) { 1586 if (G1CollectedHeap::use_parallel_gc_threads()) { 1587 _g1rs->scrub_par(_region_bm, _card_bm, i, 1588 HeapRegion::ScrubRemSetClaimValue); 1589 } else { 1590 _g1rs->scrub(_region_bm, _card_bm); 1591 } 1592 } 1593 1594 }; 1595 1596 G1NoteEndOfConcMarkClosure:: 1597 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1598 UncleanRegionList* list, 1599 int worker_num) 1600 : _g1(g1), _worker_num(worker_num), 1601 _max_live_bytes(0), _regions_claimed(0), 1602 _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0), 1603 _claimed_region_time(0.0), _max_region_time(0.0), 1604 _unclean_region_list(list) 1605 {} 1606 1607 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) { 1608 // We use a claim value of zero here because all regions 1609 // were claimed with value 1 in the FinalCount task. 1610 r->reset_gc_time_stamp(); 1611 if (!r->continuesHumongous()) { 1612 double start = os::elapsedTime(); 1613 _regions_claimed++; 1614 r->note_end_of_marking(); 1615 _max_live_bytes += r->max_live_bytes(); 1616 _g1->free_region_if_totally_empty_work(r, 1617 _freed_bytes, 1618 _cleared_h_regions, 1619 _freed_regions, 1620 _unclean_region_list, 1621 true /*par*/); 1622 double region_time = (os::elapsedTime() - start); 1623 _claimed_region_time += region_time; 1624 if (region_time > _max_region_time) _max_region_time = region_time; 1625 } 1626 return false; 1627 } 1628 1629 void ConcurrentMark::cleanup() { 1630 // world is stopped at this checkpoint 1631 assert(SafepointSynchronize::is_at_safepoint(), 1632 "world should be stopped"); 1633 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1634 1635 // If a full collection has happened, we shouldn't do this. 1636 if (has_aborted()) { 1637 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1638 return; 1639 } 1640 1641 if (VerifyDuringGC) { 1642 HandleMark hm; // handle scope 1643 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1644 Universe::heap()->prepare_for_verify(); 1645 Universe::verify(/* allow dirty */ true, 1646 /* silent */ false, 1647 /* prev marking */ true); 1648 } 1649 1650 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1651 g1p->record_concurrent_mark_cleanup_start(); 1652 1653 double start = os::elapsedTime(); 1654 1655 // Do counting once more with the world stopped for good measure. 1656 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), 1657 &_region_bm, &_card_bm); 1658 if (G1CollectedHeap::use_parallel_gc_threads()) { 1659 assert(g1h->check_heap_region_claim_values( 1660 HeapRegion::InitialClaimValue), 1661 "sanity check"); 1662 1663 int n_workers = g1h->workers()->total_workers(); 1664 g1h->set_par_threads(n_workers); 1665 g1h->workers()->run_task(&g1_par_count_task); 1666 g1h->set_par_threads(0); 1667 1668 assert(g1h->check_heap_region_claim_values( 1669 HeapRegion::FinalCountClaimValue), 1670 "sanity check"); 1671 } else { 1672 g1_par_count_task.work(0); 1673 } 1674 1675 size_t known_garbage_bytes = 1676 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes(); 1677 #if 0 1678 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf", 1679 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024), 1680 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024), 1681 (double) known_garbage_bytes / (double) (1024 * 1024)); 1682 #endif // 0 1683 g1p->set_known_garbage_bytes(known_garbage_bytes); 1684 1685 size_t start_used_bytes = g1h->used(); 1686 _at_least_one_mark_complete = true; 1687 g1h->set_marking_complete(); 1688 1689 double count_end = os::elapsedTime(); 1690 double this_final_counting_time = (count_end - start); 1691 if (G1PrintParCleanupStats) { 1692 gclog_or_tty->print_cr("Cleanup:"); 1693 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms", 1694 this_final_counting_time*1000.0); 1695 } 1696 _total_counting_time += this_final_counting_time; 1697 1698 // Install newly created mark bitMap as "prev". 1699 swapMarkBitMaps(); 1700 1701 g1h->reset_gc_time_stamp(); 1702 1703 // Note end of marking in all heap regions. 1704 double note_end_start = os::elapsedTime(); 1705 G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state); 1706 if (G1CollectedHeap::use_parallel_gc_threads()) { 1707 int n_workers = g1h->workers()->total_workers(); 1708 g1h->set_par_threads(n_workers); 1709 g1h->workers()->run_task(&g1_par_note_end_task); 1710 g1h->set_par_threads(0); 1711 1712 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 1713 "sanity check"); 1714 } else { 1715 g1_par_note_end_task.work(0); 1716 } 1717 g1h->set_unclean_regions_coming(true); 1718 double note_end_end = os::elapsedTime(); 1719 // Tell the mutators that there might be unclean regions coming... 1720 if (G1PrintParCleanupStats) { 1721 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.", 1722 (note_end_end - note_end_start)*1000.0); 1723 } 1724 1725 1726 // call below, since it affects the metric by which we sort the heap 1727 // regions. 1728 if (G1ScrubRemSets) { 1729 double rs_scrub_start = os::elapsedTime(); 1730 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1731 if (G1CollectedHeap::use_parallel_gc_threads()) { 1732 int n_workers = g1h->workers()->total_workers(); 1733 g1h->set_par_threads(n_workers); 1734 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1735 g1h->set_par_threads(0); 1736 1737 assert(g1h->check_heap_region_claim_values( 1738 HeapRegion::ScrubRemSetClaimValue), 1739 "sanity check"); 1740 } else { 1741 g1_par_scrub_rs_task.work(0); 1742 } 1743 1744 double rs_scrub_end = os::elapsedTime(); 1745 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1746 _total_rs_scrub_time += this_rs_scrub_time; 1747 } 1748 1749 // this will also free any regions totally full of garbage objects, 1750 // and sort the regions. 1751 g1h->g1_policy()->record_concurrent_mark_cleanup_end( 1752 g1_par_note_end_task.freed_bytes(), 1753 g1_par_note_end_task.max_live_bytes()); 1754 1755 // Statistics. 1756 double end = os::elapsedTime(); 1757 _cleanup_times.add((end - start) * 1000.0); 1758 1759 // G1CollectedHeap::heap()->print(); 1760 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d", 1761 // G1CollectedHeap::heap()->get_gc_time_stamp()); 1762 1763 if (PrintGC || PrintGCDetails) { 1764 g1h->print_size_transition(gclog_or_tty, 1765 start_used_bytes, 1766 g1h->used(), 1767 g1h->capacity()); 1768 } 1769 1770 size_t cleaned_up_bytes = start_used_bytes - g1h->used(); 1771 g1p->decrease_known_garbage_bytes(cleaned_up_bytes); 1772 1773 // We need to make this be a "collection" so any collection pause that 1774 // races with it goes around and waits for completeCleanup to finish. 1775 g1h->increment_total_collections(); 1776 1777 if (VerifyDuringGC) { 1778 HandleMark hm; // handle scope 1779 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1780 Universe::heap()->prepare_for_verify(); 1781 Universe::verify(/* allow dirty */ true, 1782 /* silent */ false, 1783 /* prev marking */ true); 1784 } 1785 } 1786 1787 void ConcurrentMark::completeCleanup() { 1788 // A full collection intervened. 1789 if (has_aborted()) return; 1790 1791 int first = 0; 1792 int last = (int)MAX2(ParallelGCThreads, (size_t)1); 1793 for (int t = 0; t < last; t++) { 1794 UncleanRegionList* list = &_par_cleanup_thread_state[t]->list; 1795 assert(list->well_formed(), "Inv"); 1796 HeapRegion* hd = list->hd(); 1797 while (hd != NULL) { 1798 // Now finish up the other stuff. 1799 hd->rem_set()->clear(); 1800 HeapRegion* next_hd = hd->next_from_unclean_list(); 1801 (void)list->pop(); 1802 assert(list->hd() == next_hd, "how not?"); 1803 _g1h->put_region_on_unclean_list(hd); 1804 if (!hd->isHumongous()) { 1805 // Add this to the _free_regions count by 1. 1806 _g1h->finish_free_region_work(0, 0, 1, NULL); 1807 } 1808 hd = list->hd(); 1809 assert(hd == next_hd, "how not?"); 1810 } 1811 } 1812 } 1813 1814 1815 class G1CMIsAliveClosure: public BoolObjectClosure { 1816 G1CollectedHeap* _g1; 1817 public: 1818 G1CMIsAliveClosure(G1CollectedHeap* g1) : 1819 _g1(g1) 1820 {} 1821 1822 void do_object(oop obj) { 1823 assert(false, "not to be invoked"); 1824 } 1825 bool do_object_b(oop obj) { 1826 HeapWord* addr = (HeapWord*)obj; 1827 return addr != NULL && 1828 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1829 } 1830 }; 1831 1832 class G1CMKeepAliveClosure: public OopClosure { 1833 G1CollectedHeap* _g1; 1834 ConcurrentMark* _cm; 1835 CMBitMap* _bitMap; 1836 public: 1837 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm, 1838 CMBitMap* bitMap) : 1839 _g1(g1), _cm(cm), 1840 _bitMap(bitMap) {} 1841 1842 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1843 virtual void do_oop( oop* p) { do_oop_work(p); } 1844 1845 template <class T> void do_oop_work(T* p) { 1846 oop thisOop = oopDesc::load_decode_heap_oop(p); 1847 HeapWord* addr = (HeapWord*)thisOop; 1848 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) { 1849 _bitMap->mark(addr); 1850 _cm->mark_stack_push(thisOop); 1851 } 1852 } 1853 }; 1854 1855 class G1CMDrainMarkingStackClosure: public VoidClosure { 1856 CMMarkStack* _markStack; 1857 CMBitMap* _bitMap; 1858 G1CMKeepAliveClosure* _oopClosure; 1859 public: 1860 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack, 1861 G1CMKeepAliveClosure* oopClosure) : 1862 _bitMap(bitMap), 1863 _markStack(markStack), 1864 _oopClosure(oopClosure) 1865 {} 1866 1867 void do_void() { 1868 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false); 1869 } 1870 }; 1871 1872 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 1873 ResourceMark rm; 1874 HandleMark hm; 1875 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1876 ReferenceProcessor* rp = g1h->ref_processor(); 1877 1878 // Process weak references. 1879 rp->setup_policy(clear_all_soft_refs); 1880 assert(_markStack.isEmpty(), "mark stack should be empty"); 1881 1882 G1CMIsAliveClosure g1IsAliveClosure (g1h); 1883 G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap()); 1884 G1CMDrainMarkingStackClosure 1885 g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack, 1886 &g1KeepAliveClosure); 1887 1888 // XXXYYY Also: copy the parallel ref processing code from CMS. 1889 rp->process_discovered_references(&g1IsAliveClosure, 1890 &g1KeepAliveClosure, 1891 &g1DrainMarkingStackClosure, 1892 NULL); 1893 assert(_markStack.overflow() || _markStack.isEmpty(), 1894 "mark stack should be empty (unless it overflowed)"); 1895 if (_markStack.overflow()) { 1896 set_has_overflown(); 1897 } 1898 1899 rp->enqueue_discovered_references(); 1900 rp->verify_no_references_recorded(); 1901 assert(!rp->discovery_enabled(), "should have been disabled"); 1902 1903 // Now clean up stale oops in SymbolTable and StringTable 1904 SymbolTable::unlink(&g1IsAliveClosure); 1905 StringTable::unlink(&g1IsAliveClosure); 1906 } 1907 1908 void ConcurrentMark::swapMarkBitMaps() { 1909 CMBitMapRO* temp = _prevMarkBitMap; 1910 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 1911 _nextMarkBitMap = (CMBitMap*) temp; 1912 } 1913 1914 class CMRemarkTask: public AbstractGangTask { 1915 private: 1916 ConcurrentMark *_cm; 1917 1918 public: 1919 void work(int worker_i) { 1920 // Since all available tasks are actually started, we should 1921 // only proceed if we're supposed to be actived. 1922 if ((size_t)worker_i < _cm->active_tasks()) { 1923 CMTask* task = _cm->task(worker_i); 1924 task->record_start_time(); 1925 do { 1926 task->do_marking_step(1000000000.0 /* something very large */); 1927 } while (task->has_aborted() && !_cm->has_overflown()); 1928 // If we overflow, then we do not want to restart. We instead 1929 // want to abort remark and do concurrent marking again. 1930 task->record_end_time(); 1931 } 1932 } 1933 1934 CMRemarkTask(ConcurrentMark* cm) : 1935 AbstractGangTask("Par Remark"), _cm(cm) { } 1936 }; 1937 1938 void ConcurrentMark::checkpointRootsFinalWork() { 1939 ResourceMark rm; 1940 HandleMark hm; 1941 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1942 1943 g1h->ensure_parsability(false); 1944 1945 if (G1CollectedHeap::use_parallel_gc_threads()) { 1946 G1CollectedHeap::StrongRootsScope srs(g1h); 1947 // this is remark, so we'll use up all available threads 1948 int active_workers = ParallelGCThreads; 1949 set_phase(active_workers, false); 1950 1951 CMRemarkTask remarkTask(this); 1952 // We will start all available threads, even if we decide that the 1953 // active_workers will be fewer. The extra ones will just bail out 1954 // immediately. 1955 int n_workers = g1h->workers()->total_workers(); 1956 g1h->set_par_threads(n_workers); 1957 g1h->workers()->run_task(&remarkTask); 1958 g1h->set_par_threads(0); 1959 } else { 1960 G1CollectedHeap::StrongRootsScope srs(g1h); 1961 // this is remark, so we'll use up all available threads 1962 int active_workers = 1; 1963 set_phase(active_workers, false); 1964 1965 CMRemarkTask remarkTask(this); 1966 // We will start all available threads, even if we decide that the 1967 // active_workers will be fewer. The extra ones will just bail out 1968 // immediately. 1969 remarkTask.work(0); 1970 } 1971 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1972 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 1973 1974 print_stats(); 1975 1976 if (!restart_for_overflow()) 1977 set_non_marking_state(); 1978 1979 #if VERIFY_OBJS_PROCESSED 1980 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 1981 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 1982 _scan_obj_cl.objs_processed, 1983 ThreadLocalObjQueue::objs_enqueued); 1984 guarantee(_scan_obj_cl.objs_processed == 1985 ThreadLocalObjQueue::objs_enqueued, 1986 "Different number of objs processed and enqueued."); 1987 } 1988 #endif 1989 } 1990 1991 #ifndef PRODUCT 1992 1993 class PrintReachableOopClosure: public OopClosure { 1994 private: 1995 G1CollectedHeap* _g1h; 1996 CMBitMapRO* _bitmap; 1997 outputStream* _out; 1998 bool _use_prev_marking; 1999 bool _all; 2000 2001 public: 2002 PrintReachableOopClosure(CMBitMapRO* bitmap, 2003 outputStream* out, 2004 bool use_prev_marking, 2005 bool all) : 2006 _g1h(G1CollectedHeap::heap()), 2007 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { } 2008 2009 void do_oop(narrowOop* p) { do_oop_work(p); } 2010 void do_oop( oop* p) { do_oop_work(p); } 2011 2012 template <class T> void do_oop_work(T* p) { 2013 oop obj = oopDesc::load_decode_heap_oop(p); 2014 const char* str = NULL; 2015 const char* str2 = ""; 2016 2017 if (obj == NULL) { 2018 str = ""; 2019 } else if (!_g1h->is_in_g1_reserved(obj)) { 2020 str = " O"; 2021 } else { 2022 HeapRegion* hr = _g1h->heap_region_containing(obj); 2023 guarantee(hr != NULL, "invariant"); 2024 bool over_tams = false; 2025 if (_use_prev_marking) { 2026 over_tams = hr->obj_allocated_since_prev_marking(obj); 2027 } else { 2028 over_tams = hr->obj_allocated_since_next_marking(obj); 2029 } 2030 bool marked = _bitmap->isMarked((HeapWord*) obj); 2031 2032 if (over_tams) { 2033 str = " >"; 2034 if (marked) { 2035 str2 = " AND MARKED"; 2036 } 2037 } else if (marked) { 2038 str = " M"; 2039 } else { 2040 str = " NOT"; 2041 } 2042 } 2043 2044 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2045 p, (void*) obj, str, str2); 2046 } 2047 }; 2048 2049 class PrintReachableObjectClosure : public ObjectClosure { 2050 private: 2051 CMBitMapRO* _bitmap; 2052 outputStream* _out; 2053 bool _use_prev_marking; 2054 bool _all; 2055 HeapRegion* _hr; 2056 2057 public: 2058 PrintReachableObjectClosure(CMBitMapRO* bitmap, 2059 outputStream* out, 2060 bool use_prev_marking, 2061 bool all, 2062 HeapRegion* hr) : 2063 _bitmap(bitmap), _out(out), 2064 _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { } 2065 2066 void do_object(oop o) { 2067 bool over_tams; 2068 if (_use_prev_marking) { 2069 over_tams = _hr->obj_allocated_since_prev_marking(o); 2070 } else { 2071 over_tams = _hr->obj_allocated_since_next_marking(o); 2072 } 2073 bool marked = _bitmap->isMarked((HeapWord*) o); 2074 bool print_it = _all || over_tams || marked; 2075 2076 if (print_it) { 2077 _out->print_cr(" "PTR_FORMAT"%s", 2078 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2079 PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all); 2080 o->oop_iterate(&oopCl); 2081 } 2082 } 2083 }; 2084 2085 class PrintReachableRegionClosure : public HeapRegionClosure { 2086 private: 2087 CMBitMapRO* _bitmap; 2088 outputStream* _out; 2089 bool _use_prev_marking; 2090 bool _all; 2091 2092 public: 2093 bool doHeapRegion(HeapRegion* hr) { 2094 HeapWord* b = hr->bottom(); 2095 HeapWord* e = hr->end(); 2096 HeapWord* t = hr->top(); 2097 HeapWord* p = NULL; 2098 if (_use_prev_marking) { 2099 p = hr->prev_top_at_mark_start(); 2100 } else { 2101 p = hr->next_top_at_mark_start(); 2102 } 2103 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2104 "TAMS: "PTR_FORMAT, b, e, t, p); 2105 _out->cr(); 2106 2107 HeapWord* from = b; 2108 HeapWord* to = t; 2109 2110 if (to > from) { 2111 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2112 _out->cr(); 2113 PrintReachableObjectClosure ocl(_bitmap, _out, 2114 _use_prev_marking, _all, hr); 2115 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2116 _out->cr(); 2117 } 2118 2119 return false; 2120 } 2121 2122 PrintReachableRegionClosure(CMBitMapRO* bitmap, 2123 outputStream* out, 2124 bool use_prev_marking, 2125 bool all) : 2126 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { } 2127 }; 2128 2129 void ConcurrentMark::print_reachable(const char* str, 2130 bool use_prev_marking, 2131 bool all) { 2132 gclog_or_tty->cr(); 2133 gclog_or_tty->print_cr("== Doing heap dump... "); 2134 2135 if (G1PrintReachableBaseFile == NULL) { 2136 gclog_or_tty->print_cr(" #### error: no base file defined"); 2137 return; 2138 } 2139 2140 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2141 (JVM_MAXPATHLEN - 1)) { 2142 gclog_or_tty->print_cr(" #### error: file name too long"); 2143 return; 2144 } 2145 2146 char file_name[JVM_MAXPATHLEN]; 2147 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2148 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2149 2150 fileStream fout(file_name); 2151 if (!fout.is_open()) { 2152 gclog_or_tty->print_cr(" #### error: could not open file"); 2153 return; 2154 } 2155 2156 outputStream* out = &fout; 2157 2158 CMBitMapRO* bitmap = NULL; 2159 if (use_prev_marking) { 2160 bitmap = _prevMarkBitMap; 2161 } else { 2162 bitmap = _nextMarkBitMap; 2163 } 2164 2165 out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS"); 2166 out->cr(); 2167 2168 out->print_cr("--- ITERATING OVER REGIONS"); 2169 out->cr(); 2170 PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all); 2171 _g1h->heap_region_iterate(&rcl); 2172 out->cr(); 2173 2174 gclog_or_tty->print_cr(" done"); 2175 gclog_or_tty->flush(); 2176 } 2177 2178 #endif // PRODUCT 2179 2180 // This note is for drainAllSATBBuffers and the code in between. 2181 // In the future we could reuse a task to do this work during an 2182 // evacuation pause (since now tasks are not active and can be claimed 2183 // during an evacuation pause). This was a late change to the code and 2184 // is currently not being taken advantage of. 2185 2186 class CMGlobalObjectClosure : public ObjectClosure { 2187 private: 2188 ConcurrentMark* _cm; 2189 2190 public: 2191 void do_object(oop obj) { 2192 _cm->deal_with_reference(obj); 2193 } 2194 2195 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { } 2196 }; 2197 2198 void ConcurrentMark::deal_with_reference(oop obj) { 2199 if (verbose_high()) 2200 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT, 2201 (void*) obj); 2202 2203 2204 HeapWord* objAddr = (HeapWord*) obj; 2205 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error"); 2206 if (_g1h->is_in_g1_reserved(objAddr)) { 2207 assert(obj != NULL, "is_in_g1_reserved should ensure this"); 2208 HeapRegion* hr = _g1h->heap_region_containing(obj); 2209 if (_g1h->is_obj_ill(obj, hr)) { 2210 if (verbose_high()) 2211 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered " 2212 "marked", (void*) obj); 2213 2214 // we need to mark it first 2215 if (_nextMarkBitMap->parMark(objAddr)) { 2216 // No OrderAccess:store_load() is needed. It is implicit in the 2217 // CAS done in parMark(objAddr) above 2218 HeapWord* finger = _finger; 2219 if (objAddr < finger) { 2220 if (verbose_high()) 2221 gclog_or_tty->print_cr("[global] below the global finger " 2222 "("PTR_FORMAT"), pushing it", finger); 2223 if (!mark_stack_push(obj)) { 2224 if (verbose_low()) 2225 gclog_or_tty->print_cr("[global] global stack overflow during " 2226 "deal_with_reference"); 2227 } 2228 } 2229 } 2230 } 2231 } 2232 } 2233 2234 void ConcurrentMark::drainAllSATBBuffers() { 2235 CMGlobalObjectClosure oc(this); 2236 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2237 satb_mq_set.set_closure(&oc); 2238 2239 while (satb_mq_set.apply_closure_to_completed_buffer()) { 2240 if (verbose_medium()) 2241 gclog_or_tty->print_cr("[global] processed an SATB buffer"); 2242 } 2243 2244 // no need to check whether we should do this, as this is only 2245 // called during an evacuation pause 2246 satb_mq_set.iterate_closure_all_threads(); 2247 2248 satb_mq_set.set_closure(NULL); 2249 assert(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2250 } 2251 2252 void ConcurrentMark::markPrev(oop p) { 2253 // Note we are overriding the read-only view of the prev map here, via 2254 // the cast. 2255 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p); 2256 } 2257 2258 void ConcurrentMark::clear(oop p) { 2259 assert(p != NULL && p->is_oop(), "expected an oop"); 2260 HeapWord* addr = (HeapWord*)p; 2261 assert(addr >= _nextMarkBitMap->startWord() || 2262 addr < _nextMarkBitMap->endWord(), "in a region"); 2263 2264 _nextMarkBitMap->clear(addr); 2265 } 2266 2267 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) { 2268 // Note we are overriding the read-only view of the prev map here, via 2269 // the cast. 2270 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2271 _nextMarkBitMap->clearRange(mr); 2272 } 2273 2274 HeapRegion* 2275 ConcurrentMark::claim_region(int task_num) { 2276 // "checkpoint" the finger 2277 HeapWord* finger = _finger; 2278 2279 // _heap_end will not change underneath our feet; it only changes at 2280 // yield points. 2281 while (finger < _heap_end) { 2282 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2283 2284 // is the gap between reading the finger and doing the CAS too long? 2285 2286 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 2287 HeapWord* bottom = curr_region->bottom(); 2288 HeapWord* end = curr_region->end(); 2289 HeapWord* limit = curr_region->next_top_at_mark_start(); 2290 2291 if (verbose_low()) 2292 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 2293 "["PTR_FORMAT", "PTR_FORMAT"), " 2294 "limit = "PTR_FORMAT, 2295 task_num, curr_region, bottom, end, limit); 2296 2297 HeapWord* res = 2298 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2299 if (res == finger) { 2300 // we succeeded 2301 2302 // notice that _finger == end cannot be guaranteed here since, 2303 // someone else might have moved the finger even further 2304 assert(_finger >= end, "the finger should have moved forward"); 2305 2306 if (verbose_low()) 2307 gclog_or_tty->print_cr("[%d] we were successful with region = " 2308 PTR_FORMAT, task_num, curr_region); 2309 2310 if (limit > bottom) { 2311 if (verbose_low()) 2312 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 2313 "returning it ", task_num, curr_region); 2314 return curr_region; 2315 } else { 2316 assert(limit == bottom, 2317 "the region limit should be at bottom"); 2318 if (verbose_low()) 2319 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 2320 "returning NULL", task_num, curr_region); 2321 // we return NULL and the caller should try calling 2322 // claim_region() again. 2323 return NULL; 2324 } 2325 } else { 2326 assert(_finger > finger, "the finger should have moved forward"); 2327 if (verbose_low()) 2328 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 2329 "global finger = "PTR_FORMAT", " 2330 "our finger = "PTR_FORMAT, 2331 task_num, _finger, finger); 2332 2333 // read it again 2334 finger = _finger; 2335 } 2336 } 2337 2338 return NULL; 2339 } 2340 2341 bool ConcurrentMark::invalidate_aborted_regions_in_cset() { 2342 bool result = false; 2343 for (int i = 0; i < (int)_max_task_num; ++i) { 2344 CMTask* the_task = _tasks[i]; 2345 MemRegion mr = the_task->aborted_region(); 2346 if (mr.start() != NULL) { 2347 assert(mr.end() != NULL, "invariant"); 2348 assert(mr.word_size() > 0, "invariant"); 2349 HeapRegion* hr = _g1h->heap_region_containing(mr.start()); 2350 assert(hr != NULL, "invariant"); 2351 if (hr->in_collection_set()) { 2352 // The region points into the collection set 2353 the_task->set_aborted_region(MemRegion()); 2354 result = true; 2355 } 2356 } 2357 } 2358 return result; 2359 } 2360 2361 bool ConcurrentMark::has_aborted_regions() { 2362 for (int i = 0; i < (int)_max_task_num; ++i) { 2363 CMTask* the_task = _tasks[i]; 2364 MemRegion mr = the_task->aborted_region(); 2365 if (mr.start() != NULL) { 2366 assert(mr.end() != NULL, "invariant"); 2367 assert(mr.word_size() > 0, "invariant"); 2368 return true; 2369 } 2370 } 2371 return false; 2372 } 2373 2374 void ConcurrentMark::oops_do(OopClosure* cl) { 2375 if (_markStack.size() > 0 && verbose_low()) 2376 gclog_or_tty->print_cr("[global] scanning the global marking stack, " 2377 "size = %d", _markStack.size()); 2378 // we first iterate over the contents of the mark stack... 2379 _markStack.oops_do(cl); 2380 2381 for (int i = 0; i < (int)_max_task_num; ++i) { 2382 OopTaskQueue* queue = _task_queues->queue((int)i); 2383 2384 if (queue->size() > 0 && verbose_low()) 2385 gclog_or_tty->print_cr("[global] scanning task queue of task %d, " 2386 "size = %d", i, queue->size()); 2387 2388 // ...then over the contents of the all the task queues. 2389 queue->oops_do(cl); 2390 } 2391 2392 // Invalidate any entries, that are in the region stack, that 2393 // point into the collection set 2394 if (_regionStack.invalidate_entries_into_cset()) { 2395 // otherwise, any gray objects copied during the evacuation pause 2396 // might not be visited. 2397 assert(_should_gray_objects, "invariant"); 2398 } 2399 2400 // Invalidate any aborted regions, recorded in the individual CM 2401 // tasks, that point into the collection set. 2402 if (invalidate_aborted_regions_in_cset()) { 2403 // otherwise, any gray objects copied during the evacuation pause 2404 // might not be visited. 2405 assert(_should_gray_objects, "invariant"); 2406 } 2407 2408 } 2409 2410 void ConcurrentMark::clear_marking_state() { 2411 _markStack.setEmpty(); 2412 _markStack.clear_overflow(); 2413 _regionStack.setEmpty(); 2414 _regionStack.clear_overflow(); 2415 clear_has_overflown(); 2416 _finger = _heap_start; 2417 2418 for (int i = 0; i < (int)_max_task_num; ++i) { 2419 OopTaskQueue* queue = _task_queues->queue(i); 2420 queue->set_empty(); 2421 } 2422 } 2423 2424 void ConcurrentMark::print_stats() { 2425 if (verbose_stats()) { 2426 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2427 for (size_t i = 0; i < _active_tasks; ++i) { 2428 _tasks[i]->print_stats(); 2429 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2430 } 2431 } 2432 } 2433 2434 class CSMarkOopClosure: public OopClosure { 2435 friend class CSMarkBitMapClosure; 2436 2437 G1CollectedHeap* _g1h; 2438 CMBitMap* _bm; 2439 ConcurrentMark* _cm; 2440 oop* _ms; 2441 jint* _array_ind_stack; 2442 int _ms_size; 2443 int _ms_ind; 2444 int _array_increment; 2445 2446 bool push(oop obj, int arr_ind = 0) { 2447 if (_ms_ind == _ms_size) { 2448 gclog_or_tty->print_cr("Mark stack is full."); 2449 return false; 2450 } 2451 _ms[_ms_ind] = obj; 2452 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind; 2453 _ms_ind++; 2454 return true; 2455 } 2456 2457 oop pop() { 2458 if (_ms_ind == 0) return NULL; 2459 else { 2460 _ms_ind--; 2461 return _ms[_ms_ind]; 2462 } 2463 } 2464 2465 template <class T> bool drain() { 2466 while (_ms_ind > 0) { 2467 oop obj = pop(); 2468 assert(obj != NULL, "Since index was non-zero."); 2469 if (obj->is_objArray()) { 2470 jint arr_ind = _array_ind_stack[_ms_ind]; 2471 objArrayOop aobj = objArrayOop(obj); 2472 jint len = aobj->length(); 2473 jint next_arr_ind = arr_ind + _array_increment; 2474 if (next_arr_ind < len) { 2475 push(obj, next_arr_ind); 2476 } 2477 // Now process this portion of this one. 2478 int lim = MIN2(next_arr_ind, len); 2479 for (int j = arr_ind; j < lim; j++) { 2480 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j)); 2481 } 2482 2483 } else { 2484 obj->oop_iterate(this); 2485 } 2486 if (abort()) return false; 2487 } 2488 return true; 2489 } 2490 2491 public: 2492 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) : 2493 _g1h(G1CollectedHeap::heap()), 2494 _cm(cm), 2495 _bm(cm->nextMarkBitMap()), 2496 _ms_size(ms_size), _ms_ind(0), 2497 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)), 2498 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)), 2499 _array_increment(MAX2(ms_size/8, 16)) 2500 {} 2501 2502 ~CSMarkOopClosure() { 2503 FREE_C_HEAP_ARRAY(oop, _ms); 2504 FREE_C_HEAP_ARRAY(jint, _array_ind_stack); 2505 } 2506 2507 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2508 virtual void do_oop( oop* p) { do_oop_work(p); } 2509 2510 template <class T> void do_oop_work(T* p) { 2511 T heap_oop = oopDesc::load_heap_oop(p); 2512 if (oopDesc::is_null(heap_oop)) return; 2513 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); 2514 if (obj->is_forwarded()) { 2515 // If the object has already been forwarded, we have to make sure 2516 // that it's marked. So follow the forwarding pointer. Note that 2517 // this does the right thing for self-forwarding pointers in the 2518 // evacuation failure case. 2519 obj = obj->forwardee(); 2520 } 2521 HeapRegion* hr = _g1h->heap_region_containing(obj); 2522 if (hr != NULL) { 2523 if (hr->in_collection_set()) { 2524 if (_g1h->is_obj_ill(obj)) { 2525 _bm->mark((HeapWord*)obj); 2526 if (!push(obj)) { 2527 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed."); 2528 set_abort(); 2529 } 2530 } 2531 } else { 2532 // Outside the collection set; we need to gray it 2533 _cm->deal_with_reference(obj); 2534 } 2535 } 2536 } 2537 }; 2538 2539 class CSMarkBitMapClosure: public BitMapClosure { 2540 G1CollectedHeap* _g1h; 2541 CMBitMap* _bitMap; 2542 ConcurrentMark* _cm; 2543 CSMarkOopClosure _oop_cl; 2544 public: 2545 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) : 2546 _g1h(G1CollectedHeap::heap()), 2547 _bitMap(cm->nextMarkBitMap()), 2548 _oop_cl(cm, ms_size) 2549 {} 2550 2551 ~CSMarkBitMapClosure() {} 2552 2553 bool do_bit(size_t offset) { 2554 // convert offset into a HeapWord* 2555 HeapWord* addr = _bitMap->offsetToHeapWord(offset); 2556 assert(_bitMap->endWord() && addr < _bitMap->endWord(), 2557 "address out of range"); 2558 assert(_bitMap->isMarked(addr), "tautology"); 2559 oop obj = oop(addr); 2560 if (!obj->is_forwarded()) { 2561 if (!_oop_cl.push(obj)) return false; 2562 if (UseCompressedOops) { 2563 if (!_oop_cl.drain<narrowOop>()) return false; 2564 } else { 2565 if (!_oop_cl.drain<oop>()) return false; 2566 } 2567 } 2568 // Otherwise... 2569 return true; 2570 } 2571 }; 2572 2573 2574 class CompleteMarkingInCSHRClosure: public HeapRegionClosure { 2575 CMBitMap* _bm; 2576 CSMarkBitMapClosure _bit_cl; 2577 enum SomePrivateConstants { 2578 MSSize = 1000 2579 }; 2580 bool _completed; 2581 public: 2582 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) : 2583 _bm(cm->nextMarkBitMap()), 2584 _bit_cl(cm, MSSize), 2585 _completed(true) 2586 {} 2587 2588 ~CompleteMarkingInCSHRClosure() {} 2589 2590 bool doHeapRegion(HeapRegion* r) { 2591 if (!r->evacuation_failed()) { 2592 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start()); 2593 if (!mr.is_empty()) { 2594 if (!_bm->iterate(&_bit_cl, mr)) { 2595 _completed = false; 2596 return true; 2597 } 2598 } 2599 } 2600 return false; 2601 } 2602 2603 bool completed() { return _completed; } 2604 }; 2605 2606 class ClearMarksInHRClosure: public HeapRegionClosure { 2607 CMBitMap* _bm; 2608 public: 2609 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { } 2610 2611 bool doHeapRegion(HeapRegion* r) { 2612 if (!r->used_region().is_empty() && !r->evacuation_failed()) { 2613 MemRegion usedMR = r->used_region(); 2614 _bm->clearRange(r->used_region()); 2615 } 2616 return false; 2617 } 2618 }; 2619 2620 void ConcurrentMark::complete_marking_in_collection_set() { 2621 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2622 2623 if (!g1h->mark_in_progress()) { 2624 g1h->g1_policy()->record_mark_closure_time(0.0); 2625 return; 2626 } 2627 2628 int i = 1; 2629 double start = os::elapsedTime(); 2630 while (true) { 2631 i++; 2632 CompleteMarkingInCSHRClosure cmplt(this); 2633 g1h->collection_set_iterate(&cmplt); 2634 if (cmplt.completed()) break; 2635 } 2636 double end_time = os::elapsedTime(); 2637 double elapsed_time_ms = (end_time - start) * 1000.0; 2638 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms); 2639 2640 ClearMarksInHRClosure clr(nextMarkBitMap()); 2641 g1h->collection_set_iterate(&clr); 2642 } 2643 2644 // The next two methods deal with the following optimisation. Some 2645 // objects are gray by being marked and located above the finger. If 2646 // they are copied, during an evacuation pause, below the finger then 2647 // the need to be pushed on the stack. The observation is that, if 2648 // there are no regions in the collection set located above the 2649 // finger, then the above cannot happen, hence we do not need to 2650 // explicitly gray any objects when copying them to below the 2651 // finger. The global stack will be scanned to ensure that, if it 2652 // points to objects being copied, it will update their 2653 // location. There is a tricky situation with the gray objects in 2654 // region stack that are being coped, however. See the comment in 2655 // newCSet(). 2656 2657 void ConcurrentMark::newCSet() { 2658 if (!concurrent_marking_in_progress()) 2659 // nothing to do if marking is not in progress 2660 return; 2661 2662 // find what the lowest finger is among the global and local fingers 2663 _min_finger = _finger; 2664 for (int i = 0; i < (int)_max_task_num; ++i) { 2665 CMTask* task = _tasks[i]; 2666 HeapWord* task_finger = task->finger(); 2667 if (task_finger != NULL && task_finger < _min_finger) 2668 _min_finger = task_finger; 2669 } 2670 2671 _should_gray_objects = false; 2672 2673 // This fixes a very subtle and fustrating bug. It might be the case 2674 // that, during en evacuation pause, heap regions that contain 2675 // objects that are gray (by being in regions contained in the 2676 // region stack) are included in the collection set. Since such gray 2677 // objects will be moved, and because it's not easy to redirect 2678 // region stack entries to point to a new location (because objects 2679 // in one region might be scattered to multiple regions after they 2680 // are copied), one option is to ensure that all marked objects 2681 // copied during a pause are pushed on the stack. Notice, however, 2682 // that this problem can only happen when the region stack is not 2683 // empty during an evacuation pause. So, we make the fix a bit less 2684 // conservative and ensure that regions are pushed on the stack, 2685 // irrespective whether all collection set regions are below the 2686 // finger, if the region stack is not empty. This is expected to be 2687 // a rare case, so I don't think it's necessary to be smarted about it. 2688 if (!region_stack_empty() || has_aborted_regions()) 2689 _should_gray_objects = true; 2690 } 2691 2692 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) { 2693 if (!concurrent_marking_in_progress()) 2694 return; 2695 2696 HeapWord* region_end = hr->end(); 2697 if (region_end > _min_finger) 2698 _should_gray_objects = true; 2699 } 2700 2701 // abandon current marking iteration due to a Full GC 2702 void ConcurrentMark::abort() { 2703 // Clear all marks to force marking thread to do nothing 2704 _nextMarkBitMap->clearAll(); 2705 // Empty mark stack 2706 clear_marking_state(); 2707 for (int i = 0; i < (int)_max_task_num; ++i) { 2708 _tasks[i]->clear_region_fields(); 2709 _tasks[i]->clear_aborted_region(); 2710 } 2711 _has_aborted = true; 2712 2713 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2714 satb_mq_set.abandon_partial_marking(); 2715 // This can be called either during or outside marking, we'll read 2716 // the expected_active value from the SATB queue set. 2717 satb_mq_set.set_active_all_threads( 2718 false, /* new active value */ 2719 satb_mq_set.is_active() /* expected_active */); 2720 } 2721 2722 static void print_ms_time_info(const char* prefix, const char* name, 2723 NumberSeq& ns) { 2724 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2725 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2726 if (ns.num() > 0) { 2727 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2728 prefix, ns.sd(), ns.maximum()); 2729 } 2730 } 2731 2732 void ConcurrentMark::print_summary_info() { 2733 gclog_or_tty->print_cr(" Concurrent marking:"); 2734 print_ms_time_info(" ", "init marks", _init_times); 2735 print_ms_time_info(" ", "remarks", _remark_times); 2736 { 2737 print_ms_time_info(" ", "final marks", _remark_mark_times); 2738 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2739 2740 } 2741 print_ms_time_info(" ", "cleanups", _cleanup_times); 2742 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 2743 _total_counting_time, 2744 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 2745 (double)_cleanup_times.num() 2746 : 0.0)); 2747 if (G1ScrubRemSets) { 2748 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2749 _total_rs_scrub_time, 2750 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 2751 (double)_cleanup_times.num() 2752 : 0.0)); 2753 } 2754 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 2755 (_init_times.sum() + _remark_times.sum() + 2756 _cleanup_times.sum())/1000.0); 2757 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 2758 "(%8.2f s marking, %8.2f s counting).", 2759 cmThread()->vtime_accum(), 2760 cmThread()->vtime_mark_accum(), 2761 cmThread()->vtime_count_accum()); 2762 } 2763 2764 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2765 _parallel_workers->print_worker_threads_on(st); 2766 } 2767 2768 // Closures 2769 // XXX: there seems to be a lot of code duplication here; 2770 // should refactor and consolidate the shared code. 2771 2772 // This closure is used to mark refs into the CMS generation in 2773 // the CMS bit map. Called at the first checkpoint. 2774 2775 // We take a break if someone is trying to stop the world. 2776 bool ConcurrentMark::do_yield_check(int worker_i) { 2777 if (should_yield()) { 2778 if (worker_i == 0) 2779 _g1h->g1_policy()->record_concurrent_pause(); 2780 cmThread()->yield(); 2781 if (worker_i == 0) 2782 _g1h->g1_policy()->record_concurrent_pause_end(); 2783 return true; 2784 } else { 2785 return false; 2786 } 2787 } 2788 2789 bool ConcurrentMark::should_yield() { 2790 return cmThread()->should_yield(); 2791 } 2792 2793 bool ConcurrentMark::containing_card_is_marked(void* p) { 2794 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 2795 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 2796 } 2797 2798 bool ConcurrentMark::containing_cards_are_marked(void* start, 2799 void* last) { 2800 return 2801 containing_card_is_marked(start) && 2802 containing_card_is_marked(last); 2803 } 2804 2805 #ifndef PRODUCT 2806 // for debugging purposes 2807 void ConcurrentMark::print_finger() { 2808 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 2809 _heap_start, _heap_end, _finger); 2810 for (int i = 0; i < (int) _max_task_num; ++i) { 2811 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 2812 } 2813 gclog_or_tty->print_cr(""); 2814 } 2815 #endif 2816 2817 // Closure for iteration over bitmaps 2818 class CMBitMapClosure : public BitMapClosure { 2819 private: 2820 // the bitmap that is being iterated over 2821 CMBitMap* _nextMarkBitMap; 2822 ConcurrentMark* _cm; 2823 CMTask* _task; 2824 // true if we're scanning a heap region claimed by the task (so that 2825 // we move the finger along), false if we're not, i.e. currently when 2826 // scanning a heap region popped from the region stack (so that we 2827 // do not move the task finger along; it'd be a mistake if we did so). 2828 bool _scanning_heap_region; 2829 2830 public: 2831 CMBitMapClosure(CMTask *task, 2832 ConcurrentMark* cm, 2833 CMBitMap* nextMarkBitMap) 2834 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 2835 2836 void set_scanning_heap_region(bool scanning_heap_region) { 2837 _scanning_heap_region = scanning_heap_region; 2838 } 2839 2840 bool do_bit(size_t offset) { 2841 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 2842 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 2843 assert( addr < _cm->finger(), "invariant"); 2844 2845 if (_scanning_heap_region) { 2846 statsOnly( _task->increase_objs_found_on_bitmap() ); 2847 assert(addr >= _task->finger(), "invariant"); 2848 // We move that task's local finger along. 2849 _task->move_finger_to(addr); 2850 } else { 2851 // We move the task's region finger along. 2852 _task->move_region_finger_to(addr); 2853 } 2854 2855 _task->scan_object(oop(addr)); 2856 // we only partially drain the local queue and global stack 2857 _task->drain_local_queue(true); 2858 _task->drain_global_stack(true); 2859 2860 // if the has_aborted flag has been raised, we need to bail out of 2861 // the iteration 2862 return !_task->has_aborted(); 2863 } 2864 }; 2865 2866 // Closure for iterating over objects, currently only used for 2867 // processing SATB buffers. 2868 class CMObjectClosure : public ObjectClosure { 2869 private: 2870 CMTask* _task; 2871 2872 public: 2873 void do_object(oop obj) { 2874 _task->deal_with_reference(obj); 2875 } 2876 2877 CMObjectClosure(CMTask* task) : _task(task) { } 2878 }; 2879 2880 // Closure for iterating over object fields 2881 class CMOopClosure : public OopClosure { 2882 private: 2883 G1CollectedHeap* _g1h; 2884 ConcurrentMark* _cm; 2885 CMTask* _task; 2886 2887 public: 2888 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2889 virtual void do_oop( oop* p) { do_oop_work(p); } 2890 2891 template <class T> void do_oop_work(T* p) { 2892 assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant"); 2893 assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(), 2894 "invariant"); 2895 2896 oop obj = oopDesc::load_decode_heap_oop(p); 2897 if (_cm->verbose_high()) 2898 gclog_or_tty->print_cr("[%d] we're looking at location " 2899 "*"PTR_FORMAT" = "PTR_FORMAT, 2900 _task->task_id(), p, (void*) obj); 2901 _task->deal_with_reference(obj); 2902 } 2903 2904 CMOopClosure(G1CollectedHeap* g1h, 2905 ConcurrentMark* cm, 2906 CMTask* task) 2907 : _g1h(g1h), _cm(cm), _task(task) { } 2908 }; 2909 2910 void CMTask::setup_for_region(HeapRegion* hr) { 2911 // Separated the asserts so that we know which one fires. 2912 assert(hr != NULL, 2913 "claim_region() should have filtered out continues humongous regions"); 2914 assert(!hr->continuesHumongous(), 2915 "claim_region() should have filtered out continues humongous regions"); 2916 2917 if (_cm->verbose_low()) 2918 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 2919 _task_id, hr); 2920 2921 _curr_region = hr; 2922 _finger = hr->bottom(); 2923 update_region_limit(); 2924 } 2925 2926 void CMTask::update_region_limit() { 2927 HeapRegion* hr = _curr_region; 2928 HeapWord* bottom = hr->bottom(); 2929 HeapWord* limit = hr->next_top_at_mark_start(); 2930 2931 if (limit == bottom) { 2932 if (_cm->verbose_low()) 2933 gclog_or_tty->print_cr("[%d] found an empty region " 2934 "["PTR_FORMAT", "PTR_FORMAT")", 2935 _task_id, bottom, limit); 2936 // The region was collected underneath our feet. 2937 // We set the finger to bottom to ensure that the bitmap 2938 // iteration that will follow this will not do anything. 2939 // (this is not a condition that holds when we set the region up, 2940 // as the region is not supposed to be empty in the first place) 2941 _finger = bottom; 2942 } else if (limit >= _region_limit) { 2943 assert(limit >= _finger, "peace of mind"); 2944 } else { 2945 assert(limit < _region_limit, "only way to get here"); 2946 // This can happen under some pretty unusual circumstances. An 2947 // evacuation pause empties the region underneath our feet (NTAMS 2948 // at bottom). We then do some allocation in the region (NTAMS 2949 // stays at bottom), followed by the region being used as a GC 2950 // alloc region (NTAMS will move to top() and the objects 2951 // originally below it will be grayed). All objects now marked in 2952 // the region are explicitly grayed, if below the global finger, 2953 // and we do not need in fact to scan anything else. So, we simply 2954 // set _finger to be limit to ensure that the bitmap iteration 2955 // doesn't do anything. 2956 _finger = limit; 2957 } 2958 2959 _region_limit = limit; 2960 } 2961 2962 void CMTask::giveup_current_region() { 2963 assert(_curr_region != NULL, "invariant"); 2964 if (_cm->verbose_low()) 2965 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 2966 _task_id, _curr_region); 2967 clear_region_fields(); 2968 } 2969 2970 void CMTask::clear_region_fields() { 2971 // Values for these three fields that indicate that we're not 2972 // holding on to a region. 2973 _curr_region = NULL; 2974 _finger = NULL; 2975 _region_limit = NULL; 2976 2977 _region_finger = NULL; 2978 } 2979 2980 void CMTask::reset(CMBitMap* nextMarkBitMap) { 2981 guarantee(nextMarkBitMap != NULL, "invariant"); 2982 2983 if (_cm->verbose_low()) 2984 gclog_or_tty->print_cr("[%d] resetting", _task_id); 2985 2986 _nextMarkBitMap = nextMarkBitMap; 2987 clear_region_fields(); 2988 clear_aborted_region(); 2989 2990 _calls = 0; 2991 _elapsed_time_ms = 0.0; 2992 _termination_time_ms = 0.0; 2993 _termination_start_time_ms = 0.0; 2994 2995 #if _MARKING_STATS_ 2996 _local_pushes = 0; 2997 _local_pops = 0; 2998 _local_max_size = 0; 2999 _objs_scanned = 0; 3000 _global_pushes = 0; 3001 _global_pops = 0; 3002 _global_max_size = 0; 3003 _global_transfers_to = 0; 3004 _global_transfers_from = 0; 3005 _region_stack_pops = 0; 3006 _regions_claimed = 0; 3007 _objs_found_on_bitmap = 0; 3008 _satb_buffers_processed = 0; 3009 _steal_attempts = 0; 3010 _steals = 0; 3011 _aborted = 0; 3012 _aborted_overflow = 0; 3013 _aborted_cm_aborted = 0; 3014 _aborted_yield = 0; 3015 _aborted_timed_out = 0; 3016 _aborted_satb = 0; 3017 _aborted_termination = 0; 3018 #endif // _MARKING_STATS_ 3019 } 3020 3021 bool CMTask::should_exit_termination() { 3022 regular_clock_call(); 3023 // This is called when we are in the termination protocol. We should 3024 // quit if, for some reason, this task wants to abort or the global 3025 // stack is not empty (this means that we can get work from it). 3026 return !_cm->mark_stack_empty() || has_aborted(); 3027 } 3028 3029 // This determines whether the method below will check both the local 3030 // and global fingers when determining whether to push on the stack a 3031 // gray object (value 1) or whether it will only check the global one 3032 // (value 0). The tradeoffs are that the former will be a bit more 3033 // accurate and possibly push less on the stack, but it might also be 3034 // a little bit slower. 3035 3036 #define _CHECK_BOTH_FINGERS_ 1 3037 3038 void CMTask::deal_with_reference(oop obj) { 3039 if (_cm->verbose_high()) 3040 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT, 3041 _task_id, (void*) obj); 3042 3043 ++_refs_reached; 3044 3045 HeapWord* objAddr = (HeapWord*) obj; 3046 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error"); 3047 if (_g1h->is_in_g1_reserved(objAddr)) { 3048 assert(obj != NULL, "is_in_g1_reserved should ensure this"); 3049 HeapRegion* hr = _g1h->heap_region_containing(obj); 3050 if (_g1h->is_obj_ill(obj, hr)) { 3051 if (_cm->verbose_high()) 3052 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked", 3053 _task_id, (void*) obj); 3054 3055 // we need to mark it first 3056 if (_nextMarkBitMap->parMark(objAddr)) { 3057 // No OrderAccess:store_load() is needed. It is implicit in the 3058 // CAS done in parMark(objAddr) above 3059 HeapWord* global_finger = _cm->finger(); 3060 3061 #if _CHECK_BOTH_FINGERS_ 3062 // we will check both the local and global fingers 3063 3064 if (_finger != NULL && objAddr < _finger) { 3065 if (_cm->verbose_high()) 3066 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), " 3067 "pushing it", _task_id, _finger); 3068 push(obj); 3069 } else if (_curr_region != NULL && objAddr < _region_limit) { 3070 // do nothing 3071 } else if (objAddr < global_finger) { 3072 // Notice that the global finger might be moving forward 3073 // concurrently. This is not a problem. In the worst case, we 3074 // mark the object while it is above the global finger and, by 3075 // the time we read the global finger, it has moved forward 3076 // passed this object. In this case, the object will probably 3077 // be visited when a task is scanning the region and will also 3078 // be pushed on the stack. So, some duplicate work, but no 3079 // correctness problems. 3080 3081 if (_cm->verbose_high()) 3082 gclog_or_tty->print_cr("[%d] below the global finger " 3083 "("PTR_FORMAT"), pushing it", 3084 _task_id, global_finger); 3085 push(obj); 3086 } else { 3087 // do nothing 3088 } 3089 #else // _CHECK_BOTH_FINGERS_ 3090 // we will only check the global finger 3091 3092 if (objAddr < global_finger) { 3093 // see long comment above 3094 3095 if (_cm->verbose_high()) 3096 gclog_or_tty->print_cr("[%d] below the global finger " 3097 "("PTR_FORMAT"), pushing it", 3098 _task_id, global_finger); 3099 push(obj); 3100 } 3101 #endif // _CHECK_BOTH_FINGERS_ 3102 } 3103 } 3104 } 3105 } 3106 3107 void CMTask::push(oop obj) { 3108 HeapWord* objAddr = (HeapWord*) obj; 3109 assert(_g1h->is_in_g1_reserved(objAddr), "invariant"); 3110 assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(), 3111 "invariant"); 3112 assert(!_g1h->is_obj_ill(obj), "invariant"); 3113 assert(_nextMarkBitMap->isMarked(objAddr), "invariant"); 3114 3115 if (_cm->verbose_high()) 3116 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj); 3117 3118 if (!_task_queue->push(obj)) { 3119 // The local task queue looks full. We need to push some entries 3120 // to the global stack. 3121 3122 if (_cm->verbose_medium()) 3123 gclog_or_tty->print_cr("[%d] task queue overflow, " 3124 "moving entries to the global stack", 3125 _task_id); 3126 move_entries_to_global_stack(); 3127 3128 // this should succeed since, even if we overflow the global 3129 // stack, we should have definitely removed some entries from the 3130 // local queue. So, there must be space on it. 3131 bool success = _task_queue->push(obj); 3132 assert(success, "invariant"); 3133 } 3134 3135 statsOnly( int tmp_size = _task_queue->size(); 3136 if (tmp_size > _local_max_size) 3137 _local_max_size = tmp_size; 3138 ++_local_pushes ); 3139 } 3140 3141 void CMTask::reached_limit() { 3142 assert(_words_scanned >= _words_scanned_limit || 3143 _refs_reached >= _refs_reached_limit , 3144 "shouldn't have been called otherwise"); 3145 regular_clock_call(); 3146 } 3147 3148 void CMTask::regular_clock_call() { 3149 if (has_aborted()) 3150 return; 3151 3152 // First, we need to recalculate the words scanned and refs reached 3153 // limits for the next clock call. 3154 recalculate_limits(); 3155 3156 // During the regular clock call we do the following 3157 3158 // (1) If an overflow has been flagged, then we abort. 3159 if (_cm->has_overflown()) { 3160 set_has_aborted(); 3161 return; 3162 } 3163 3164 // If we are not concurrent (i.e. we're doing remark) we don't need 3165 // to check anything else. The other steps are only needed during 3166 // the concurrent marking phase. 3167 if (!concurrent()) 3168 return; 3169 3170 // (2) If marking has been aborted for Full GC, then we also abort. 3171 if (_cm->has_aborted()) { 3172 set_has_aborted(); 3173 statsOnly( ++_aborted_cm_aborted ); 3174 return; 3175 } 3176 3177 double curr_time_ms = os::elapsedVTime() * 1000.0; 3178 3179 // (3) If marking stats are enabled, then we update the step history. 3180 #if _MARKING_STATS_ 3181 if (_words_scanned >= _words_scanned_limit) 3182 ++_clock_due_to_scanning; 3183 if (_refs_reached >= _refs_reached_limit) 3184 ++_clock_due_to_marking; 3185 3186 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3187 _interval_start_time_ms = curr_time_ms; 3188 _all_clock_intervals_ms.add(last_interval_ms); 3189 3190 if (_cm->verbose_medium()) { 3191 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 3192 "scanned = %d%s, refs reached = %d%s", 3193 _task_id, last_interval_ms, 3194 _words_scanned, 3195 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3196 _refs_reached, 3197 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3198 } 3199 #endif // _MARKING_STATS_ 3200 3201 // (4) We check whether we should yield. If we have to, then we abort. 3202 if (_cm->should_yield()) { 3203 // We should yield. To do this we abort the task. The caller is 3204 // responsible for yielding. 3205 set_has_aborted(); 3206 statsOnly( ++_aborted_yield ); 3207 return; 3208 } 3209 3210 // (5) We check whether we've reached our time quota. If we have, 3211 // then we abort. 3212 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3213 if (elapsed_time_ms > _time_target_ms) { 3214 set_has_aborted(); 3215 _has_aborted_timed_out = true; 3216 statsOnly( ++_aborted_timed_out ); 3217 return; 3218 } 3219 3220 // (6) Finally, we check whether there are enough completed STAB 3221 // buffers available for processing. If there are, we abort. 3222 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3223 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3224 if (_cm->verbose_low()) 3225 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 3226 _task_id); 3227 // we do need to process SATB buffers, we'll abort and restart 3228 // the marking task to do so 3229 set_has_aborted(); 3230 statsOnly( ++_aborted_satb ); 3231 return; 3232 } 3233 } 3234 3235 void CMTask::recalculate_limits() { 3236 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3237 _words_scanned_limit = _real_words_scanned_limit; 3238 3239 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3240 _refs_reached_limit = _real_refs_reached_limit; 3241 } 3242 3243 void CMTask::decrease_limits() { 3244 // This is called when we believe that we're going to do an infrequent 3245 // operation which will increase the per byte scanned cost (i.e. move 3246 // entries to/from the global stack). It basically tries to decrease the 3247 // scanning limit so that the clock is called earlier. 3248 3249 if (_cm->verbose_medium()) 3250 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 3251 3252 _words_scanned_limit = _real_words_scanned_limit - 3253 3 * words_scanned_period / 4; 3254 _refs_reached_limit = _real_refs_reached_limit - 3255 3 * refs_reached_period / 4; 3256 } 3257 3258 void CMTask::move_entries_to_global_stack() { 3259 // local array where we'll store the entries that will be popped 3260 // from the local queue 3261 oop buffer[global_stack_transfer_size]; 3262 3263 int n = 0; 3264 oop obj; 3265 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3266 buffer[n] = obj; 3267 ++n; 3268 } 3269 3270 if (n > 0) { 3271 // we popped at least one entry from the local queue 3272 3273 statsOnly( ++_global_transfers_to; _local_pops += n ); 3274 3275 if (!_cm->mark_stack_push(buffer, n)) { 3276 if (_cm->verbose_low()) 3277 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id); 3278 set_has_aborted(); 3279 } else { 3280 // the transfer was successful 3281 3282 if (_cm->verbose_medium()) 3283 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 3284 _task_id, n); 3285 statsOnly( int tmp_size = _cm->mark_stack_size(); 3286 if (tmp_size > _global_max_size) 3287 _global_max_size = tmp_size; 3288 _global_pushes += n ); 3289 } 3290 } 3291 3292 // this operation was quite expensive, so decrease the limits 3293 decrease_limits(); 3294 } 3295 3296 void CMTask::get_entries_from_global_stack() { 3297 // local array where we'll store the entries that will be popped 3298 // from the global stack. 3299 oop buffer[global_stack_transfer_size]; 3300 int n; 3301 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3302 assert(n <= global_stack_transfer_size, 3303 "we should not pop more than the given limit"); 3304 if (n > 0) { 3305 // yes, we did actually pop at least one entry 3306 3307 statsOnly( ++_global_transfers_from; _global_pops += n ); 3308 if (_cm->verbose_medium()) 3309 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 3310 _task_id, n); 3311 for (int i = 0; i < n; ++i) { 3312 bool success = _task_queue->push(buffer[i]); 3313 // We only call this when the local queue is empty or under a 3314 // given target limit. So, we do not expect this push to fail. 3315 assert(success, "invariant"); 3316 } 3317 3318 statsOnly( int tmp_size = _task_queue->size(); 3319 if (tmp_size > _local_max_size) 3320 _local_max_size = tmp_size; 3321 _local_pushes += n ); 3322 } 3323 3324 // this operation was quite expensive, so decrease the limits 3325 decrease_limits(); 3326 } 3327 3328 void CMTask::drain_local_queue(bool partially) { 3329 if (has_aborted()) 3330 return; 3331 3332 // Decide what the target size is, depending whether we're going to 3333 // drain it partially (so that other tasks can steal if they run out 3334 // of things to do) or totally (at the very end). 3335 size_t target_size; 3336 if (partially) 3337 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3338 else 3339 target_size = 0; 3340 3341 if (_task_queue->size() > target_size) { 3342 if (_cm->verbose_high()) 3343 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 3344 _task_id, target_size); 3345 3346 oop obj; 3347 bool ret = _task_queue->pop_local(obj); 3348 while (ret) { 3349 statsOnly( ++_local_pops ); 3350 3351 if (_cm->verbose_high()) 3352 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 3353 (void*) obj); 3354 3355 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3356 assert(!_g1h->heap_region_containing(obj)->is_on_free_list(), 3357 "invariant"); 3358 3359 scan_object(obj); 3360 3361 if (_task_queue->size() <= target_size || has_aborted()) 3362 ret = false; 3363 else 3364 ret = _task_queue->pop_local(obj); 3365 } 3366 3367 if (_cm->verbose_high()) 3368 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 3369 _task_id, _task_queue->size()); 3370 } 3371 } 3372 3373 void CMTask::drain_global_stack(bool partially) { 3374 if (has_aborted()) 3375 return; 3376 3377 // We have a policy to drain the local queue before we attempt to 3378 // drain the global stack. 3379 assert(partially || _task_queue->size() == 0, "invariant"); 3380 3381 // Decide what the target size is, depending whether we're going to 3382 // drain it partially (so that other tasks can steal if they run out 3383 // of things to do) or totally (at the very end). Notice that, 3384 // because we move entries from the global stack in chunks or 3385 // because another task might be doing the same, we might in fact 3386 // drop below the target. But, this is not a problem. 3387 size_t target_size; 3388 if (partially) 3389 target_size = _cm->partial_mark_stack_size_target(); 3390 else 3391 target_size = 0; 3392 3393 if (_cm->mark_stack_size() > target_size) { 3394 if (_cm->verbose_low()) 3395 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 3396 _task_id, target_size); 3397 3398 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3399 get_entries_from_global_stack(); 3400 drain_local_queue(partially); 3401 } 3402 3403 if (_cm->verbose_low()) 3404 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 3405 _task_id, _cm->mark_stack_size()); 3406 } 3407 } 3408 3409 // SATB Queue has several assumptions on whether to call the par or 3410 // non-par versions of the methods. this is why some of the code is 3411 // replicated. We should really get rid of the single-threaded version 3412 // of the code to simplify things. 3413 void CMTask::drain_satb_buffers() { 3414 if (has_aborted()) 3415 return; 3416 3417 // We set this so that the regular clock knows that we're in the 3418 // middle of draining buffers and doesn't set the abort flag when it 3419 // notices that SATB buffers are available for draining. It'd be 3420 // very counter productive if it did that. :-) 3421 _draining_satb_buffers = true; 3422 3423 CMObjectClosure oc(this); 3424 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3425 if (G1CollectedHeap::use_parallel_gc_threads()) 3426 satb_mq_set.set_par_closure(_task_id, &oc); 3427 else 3428 satb_mq_set.set_closure(&oc); 3429 3430 // This keeps claiming and applying the closure to completed buffers 3431 // until we run out of buffers or we need to abort. 3432 if (G1CollectedHeap::use_parallel_gc_threads()) { 3433 while (!has_aborted() && 3434 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 3435 if (_cm->verbose_medium()) 3436 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3437 statsOnly( ++_satb_buffers_processed ); 3438 regular_clock_call(); 3439 } 3440 } else { 3441 while (!has_aborted() && 3442 satb_mq_set.apply_closure_to_completed_buffer()) { 3443 if (_cm->verbose_medium()) 3444 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3445 statsOnly( ++_satb_buffers_processed ); 3446 regular_clock_call(); 3447 } 3448 } 3449 3450 if (!concurrent() && !has_aborted()) { 3451 // We should only do this during remark. 3452 if (G1CollectedHeap::use_parallel_gc_threads()) 3453 satb_mq_set.par_iterate_closure_all_threads(_task_id); 3454 else 3455 satb_mq_set.iterate_closure_all_threads(); 3456 } 3457 3458 _draining_satb_buffers = false; 3459 3460 assert(has_aborted() || 3461 concurrent() || 3462 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3463 3464 if (G1CollectedHeap::use_parallel_gc_threads()) 3465 satb_mq_set.set_par_closure(_task_id, NULL); 3466 else 3467 satb_mq_set.set_closure(NULL); 3468 3469 // again, this was a potentially expensive operation, decrease the 3470 // limits to get the regular clock call early 3471 decrease_limits(); 3472 } 3473 3474 void CMTask::drain_region_stack(BitMapClosure* bc) { 3475 if (has_aborted()) 3476 return; 3477 3478 assert(_region_finger == NULL, 3479 "it should be NULL when we're not scanning a region"); 3480 3481 if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) { 3482 if (_cm->verbose_low()) 3483 gclog_or_tty->print_cr("[%d] draining region stack, size = %d", 3484 _task_id, _cm->region_stack_size()); 3485 3486 MemRegion mr; 3487 3488 if (!_aborted_region.is_empty()) { 3489 mr = _aborted_region; 3490 _aborted_region = MemRegion(); 3491 3492 if (_cm->verbose_low()) 3493 gclog_or_tty->print_cr("[%d] scanning aborted region [ " PTR_FORMAT ", " PTR_FORMAT " )", 3494 _task_id, mr.start(), mr.end()); 3495 } else { 3496 mr = _cm->region_stack_pop_lock_free(); 3497 // it returns MemRegion() if the pop fails 3498 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 3499 } 3500 3501 while (mr.start() != NULL) { 3502 if (_cm->verbose_medium()) 3503 gclog_or_tty->print_cr("[%d] we are scanning region " 3504 "["PTR_FORMAT", "PTR_FORMAT")", 3505 _task_id, mr.start(), mr.end()); 3506 3507 assert(mr.end() <= _cm->finger(), 3508 "otherwise the region shouldn't be on the stack"); 3509 assert(!mr.is_empty(), "Only non-empty regions live on the region stack"); 3510 if (_nextMarkBitMap->iterate(bc, mr)) { 3511 assert(!has_aborted(), 3512 "cannot abort the task without aborting the bitmap iteration"); 3513 3514 // We finished iterating over the region without aborting. 3515 regular_clock_call(); 3516 if (has_aborted()) 3517 mr = MemRegion(); 3518 else { 3519 mr = _cm->region_stack_pop_lock_free(); 3520 // it returns MemRegion() if the pop fails 3521 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 3522 } 3523 } else { 3524 assert(has_aborted(), "currently the only way to do so"); 3525 3526 // The only way to abort the bitmap iteration is to return 3527 // false from the do_bit() method. However, inside the 3528 // do_bit() method we move the _region_finger to point to the 3529 // object currently being looked at. So, if we bail out, we 3530 // have definitely set _region_finger to something non-null. 3531 assert(_region_finger != NULL, "invariant"); 3532 3533 // Make sure that any previously aborted region has been 3534 // cleared. 3535 assert(_aborted_region.is_empty(), "aborted region not cleared"); 3536 3537 // The iteration was actually aborted. So now _region_finger 3538 // points to the address of the object we last scanned. If we 3539 // leave it there, when we restart this task, we will rescan 3540 // the object. It is easy to avoid this. We move the finger by 3541 // enough to point to the next possible object header (the 3542 // bitmap knows by how much we need to move it as it knows its 3543 // granularity). 3544 MemRegion newRegion = 3545 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end()); 3546 3547 if (!newRegion.is_empty()) { 3548 if (_cm->verbose_low()) { 3549 gclog_or_tty->print_cr("[%d] recording unscanned region" 3550 "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask", 3551 _task_id, 3552 newRegion.start(), newRegion.end()); 3553 } 3554 // Now record the part of the region we didn't scan to 3555 // make sure this task scans it later. 3556 _aborted_region = newRegion; 3557 } 3558 // break from while 3559 mr = MemRegion(); 3560 } 3561 _region_finger = NULL; 3562 } 3563 3564 if (_cm->verbose_low()) 3565 gclog_or_tty->print_cr("[%d] drained region stack, size = %d", 3566 _task_id, _cm->region_stack_size()); 3567 } 3568 } 3569 3570 void CMTask::print_stats() { 3571 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 3572 _task_id, _calls); 3573 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3574 _elapsed_time_ms, _termination_time_ms); 3575 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3576 _step_times_ms.num(), _step_times_ms.avg(), 3577 _step_times_ms.sd()); 3578 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3579 _step_times_ms.maximum(), _step_times_ms.sum()); 3580 3581 #if _MARKING_STATS_ 3582 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3583 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3584 _all_clock_intervals_ms.sd()); 3585 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3586 _all_clock_intervals_ms.maximum(), 3587 _all_clock_intervals_ms.sum()); 3588 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3589 _clock_due_to_scanning, _clock_due_to_marking); 3590 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3591 _objs_scanned, _objs_found_on_bitmap); 3592 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3593 _local_pushes, _local_pops, _local_max_size); 3594 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3595 _global_pushes, _global_pops, _global_max_size); 3596 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3597 _global_transfers_to,_global_transfers_from); 3598 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d", 3599 _regions_claimed, _region_stack_pops); 3600 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3601 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3602 _steal_attempts, _steals); 3603 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3604 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3605 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3606 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3607 _aborted_timed_out, _aborted_satb, _aborted_termination); 3608 #endif // _MARKING_STATS_ 3609 } 3610 3611 /***************************************************************************** 3612 3613 The do_marking_step(time_target_ms) method is the building block 3614 of the parallel marking framework. It can be called in parallel 3615 with other invocations of do_marking_step() on different tasks 3616 (but only one per task, obviously) and concurrently with the 3617 mutator threads, or during remark, hence it eliminates the need 3618 for two versions of the code. When called during remark, it will 3619 pick up from where the task left off during the concurrent marking 3620 phase. Interestingly, tasks are also claimable during evacuation 3621 pauses too, since do_marking_step() ensures that it aborts before 3622 it needs to yield. 3623 3624 The data structures that is uses to do marking work are the 3625 following: 3626 3627 (1) Marking Bitmap. If there are gray objects that appear only 3628 on the bitmap (this happens either when dealing with an overflow 3629 or when the initial marking phase has simply marked the roots 3630 and didn't push them on the stack), then tasks claim heap 3631 regions whose bitmap they then scan to find gray objects. A 3632 global finger indicates where the end of the last claimed region 3633 is. A local finger indicates how far into the region a task has 3634 scanned. The two fingers are used to determine how to gray an 3635 object (i.e. whether simply marking it is OK, as it will be 3636 visited by a task in the future, or whether it needs to be also 3637 pushed on a stack). 3638 3639 (2) Local Queue. The local queue of the task which is accessed 3640 reasonably efficiently by the task. Other tasks can steal from 3641 it when they run out of work. Throughout the marking phase, a 3642 task attempts to keep its local queue short but not totally 3643 empty, so that entries are available for stealing by other 3644 tasks. Only when there is no more work, a task will totally 3645 drain its local queue. 3646 3647 (3) Global Mark Stack. This handles local queue overflow. During 3648 marking only sets of entries are moved between it and the local 3649 queues, as access to it requires a mutex and more fine-grain 3650 interaction with it which might cause contention. If it 3651 overflows, then the marking phase should restart and iterate 3652 over the bitmap to identify gray objects. Throughout the marking 3653 phase, tasks attempt to keep the global mark stack at a small 3654 length but not totally empty, so that entries are available for 3655 popping by other tasks. Only when there is no more work, tasks 3656 will totally drain the global mark stack. 3657 3658 (4) Global Region Stack. Entries on it correspond to areas of 3659 the bitmap that need to be scanned since they contain gray 3660 objects. Pushes on the region stack only happen during 3661 evacuation pauses and typically correspond to areas covered by 3662 GC LABS. If it overflows, then the marking phase should restart 3663 and iterate over the bitmap to identify gray objects. Tasks will 3664 try to totally drain the region stack as soon as possible. 3665 3666 (5) SATB Buffer Queue. This is where completed SATB buffers are 3667 made available. Buffers are regularly removed from this queue 3668 and scanned for roots, so that the queue doesn't get too 3669 long. During remark, all completed buffers are processed, as 3670 well as the filled in parts of any uncompleted buffers. 3671 3672 The do_marking_step() method tries to abort when the time target 3673 has been reached. There are a few other cases when the 3674 do_marking_step() method also aborts: 3675 3676 (1) When the marking phase has been aborted (after a Full GC). 3677 3678 (2) When a global overflow (either on the global stack or the 3679 region stack) has been triggered. Before the task aborts, it 3680 will actually sync up with the other tasks to ensure that all 3681 the marking data structures (local queues, stacks, fingers etc.) 3682 are re-initialised so that when do_marking_step() completes, 3683 the marking phase can immediately restart. 3684 3685 (3) When enough completed SATB buffers are available. The 3686 do_marking_step() method only tries to drain SATB buffers right 3687 at the beginning. So, if enough buffers are available, the 3688 marking step aborts and the SATB buffers are processed at 3689 the beginning of the next invocation. 3690 3691 (4) To yield. when we have to yield then we abort and yield 3692 right at the end of do_marking_step(). This saves us from a lot 3693 of hassle as, by yielding we might allow a Full GC. If this 3694 happens then objects will be compacted underneath our feet, the 3695 heap might shrink, etc. We save checking for this by just 3696 aborting and doing the yield right at the end. 3697 3698 From the above it follows that the do_marking_step() method should 3699 be called in a loop (or, otherwise, regularly) until it completes. 3700 3701 If a marking step completes without its has_aborted() flag being 3702 true, it means it has completed the current marking phase (and 3703 also all other marking tasks have done so and have all synced up). 3704 3705 A method called regular_clock_call() is invoked "regularly" (in 3706 sub ms intervals) throughout marking. It is this clock method that 3707 checks all the abort conditions which were mentioned above and 3708 decides when the task should abort. A work-based scheme is used to 3709 trigger this clock method: when the number of object words the 3710 marking phase has scanned or the number of references the marking 3711 phase has visited reach a given limit. Additional invocations to 3712 the method clock have been planted in a few other strategic places 3713 too. The initial reason for the clock method was to avoid calling 3714 vtime too regularly, as it is quite expensive. So, once it was in 3715 place, it was natural to piggy-back all the other conditions on it 3716 too and not constantly check them throughout the code. 3717 3718 *****************************************************************************/ 3719 3720 void CMTask::do_marking_step(double time_target_ms) { 3721 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3722 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3723 3724 assert(concurrent() || _cm->region_stack_empty(), 3725 "the region stack should have been cleared before remark"); 3726 assert(concurrent() || !_cm->has_aborted_regions(), 3727 "aborted regions should have been cleared before remark"); 3728 assert(_region_finger == NULL, 3729 "this should be non-null only when a region is being scanned"); 3730 3731 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3732 assert(_task_queues != NULL, "invariant"); 3733 assert(_task_queue != NULL, "invariant"); 3734 assert(_task_queues->queue(_task_id) == _task_queue, "invariant"); 3735 3736 assert(!_claimed, 3737 "only one thread should claim this task at any one time"); 3738 3739 // OK, this doesn't safeguard again all possible scenarios, as it is 3740 // possible for two threads to set the _claimed flag at the same 3741 // time. But it is only for debugging purposes anyway and it will 3742 // catch most problems. 3743 _claimed = true; 3744 3745 _start_time_ms = os::elapsedVTime() * 1000.0; 3746 statsOnly( _interval_start_time_ms = _start_time_ms ); 3747 3748 double diff_prediction_ms = 3749 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 3750 _time_target_ms = time_target_ms - diff_prediction_ms; 3751 3752 // set up the variables that are used in the work-based scheme to 3753 // call the regular clock method 3754 _words_scanned = 0; 3755 _refs_reached = 0; 3756 recalculate_limits(); 3757 3758 // clear all flags 3759 clear_has_aborted(); 3760 _has_aborted_timed_out = false; 3761 _draining_satb_buffers = false; 3762 3763 ++_calls; 3764 3765 if (_cm->verbose_low()) 3766 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 3767 "target = %1.2lfms >>>>>>>>>>", 3768 _task_id, _calls, _time_target_ms); 3769 3770 // Set up the bitmap and oop closures. Anything that uses them is 3771 // eventually called from this method, so it is OK to allocate these 3772 // statically. 3773 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3774 CMOopClosure oop_closure(_g1h, _cm, this); 3775 set_oop_closure(&oop_closure); 3776 3777 if (_cm->has_overflown()) { 3778 // This can happen if the region stack or the mark stack overflows 3779 // during a GC pause and this task, after a yield point, 3780 // restarts. We have to abort as we need to get into the overflow 3781 // protocol which happens right at the end of this task. 3782 set_has_aborted(); 3783 } 3784 3785 // First drain any available SATB buffers. After this, we will not 3786 // look at SATB buffers before the next invocation of this method. 3787 // If enough completed SATB buffers are queued up, the regular clock 3788 // will abort this task so that it restarts. 3789 drain_satb_buffers(); 3790 // ...then partially drain the local queue and the global stack 3791 drain_local_queue(true); 3792 drain_global_stack(true); 3793 3794 // Then totally drain the region stack. We will not look at 3795 // it again before the next invocation of this method. Entries on 3796 // the region stack are only added during evacuation pauses, for 3797 // which we have to yield. When we do, we abort the task anyway so 3798 // it will look at the region stack again when it restarts. 3799 bitmap_closure.set_scanning_heap_region(false); 3800 drain_region_stack(&bitmap_closure); 3801 // ...then partially drain the local queue and the global stack 3802 drain_local_queue(true); 3803 drain_global_stack(true); 3804 3805 do { 3806 if (!has_aborted() && _curr_region != NULL) { 3807 // This means that we're already holding on to a region. 3808 assert(_finger != NULL, "if region is not NULL, then the finger " 3809 "should not be NULL either"); 3810 3811 // We might have restarted this task after an evacuation pause 3812 // which might have evacuated the region we're holding on to 3813 // underneath our feet. Let's read its limit again to make sure 3814 // that we do not iterate over a region of the heap that 3815 // contains garbage (update_region_limit() will also move 3816 // _finger to the start of the region if it is found empty). 3817 update_region_limit(); 3818 // We will start from _finger not from the start of the region, 3819 // as we might be restarting this task after aborting half-way 3820 // through scanning this region. In this case, _finger points to 3821 // the address where we last found a marked object. If this is a 3822 // fresh region, _finger points to start(). 3823 MemRegion mr = MemRegion(_finger, _region_limit); 3824 3825 if (_cm->verbose_low()) 3826 gclog_or_tty->print_cr("[%d] we're scanning part " 3827 "["PTR_FORMAT", "PTR_FORMAT") " 3828 "of region "PTR_FORMAT, 3829 _task_id, _finger, _region_limit, _curr_region); 3830 3831 // Let's iterate over the bitmap of the part of the 3832 // region that is left. 3833 bitmap_closure.set_scanning_heap_region(true); 3834 if (mr.is_empty() || 3835 _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3836 // We successfully completed iterating over the region. Now, 3837 // let's give up the region. 3838 giveup_current_region(); 3839 regular_clock_call(); 3840 } else { 3841 assert(has_aborted(), "currently the only way to do so"); 3842 // The only way to abort the bitmap iteration is to return 3843 // false from the do_bit() method. However, inside the 3844 // do_bit() method we move the _finger to point to the 3845 // object currently being looked at. So, if we bail out, we 3846 // have definitely set _finger to something non-null. 3847 assert(_finger != NULL, "invariant"); 3848 3849 // Region iteration was actually aborted. So now _finger 3850 // points to the address of the object we last scanned. If we 3851 // leave it there, when we restart this task, we will rescan 3852 // the object. It is easy to avoid this. We move the finger by 3853 // enough to point to the next possible object header (the 3854 // bitmap knows by how much we need to move it as it knows its 3855 // granularity). 3856 assert(_finger < _region_limit, "invariant"); 3857 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 3858 // Check if bitmap iteration was aborted while scanning the last object 3859 if (new_finger >= _region_limit) { 3860 giveup_current_region(); 3861 } else { 3862 move_finger_to(new_finger); 3863 } 3864 } 3865 } 3866 // At this point we have either completed iterating over the 3867 // region we were holding on to, or we have aborted. 3868 3869 // We then partially drain the local queue and the global stack. 3870 // (Do we really need this?) 3871 drain_local_queue(true); 3872 drain_global_stack(true); 3873 3874 // Read the note on the claim_region() method on why it might 3875 // return NULL with potentially more regions available for 3876 // claiming and why we have to check out_of_regions() to determine 3877 // whether we're done or not. 3878 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 3879 // We are going to try to claim a new region. We should have 3880 // given up on the previous one. 3881 // Separated the asserts so that we know which one fires. 3882 assert(_curr_region == NULL, "invariant"); 3883 assert(_finger == NULL, "invariant"); 3884 assert(_region_limit == NULL, "invariant"); 3885 if (_cm->verbose_low()) 3886 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 3887 HeapRegion* claimed_region = _cm->claim_region(_task_id); 3888 if (claimed_region != NULL) { 3889 // Yes, we managed to claim one 3890 statsOnly( ++_regions_claimed ); 3891 3892 if (_cm->verbose_low()) 3893 gclog_or_tty->print_cr("[%d] we successfully claimed " 3894 "region "PTR_FORMAT, 3895 _task_id, claimed_region); 3896 3897 setup_for_region(claimed_region); 3898 assert(_curr_region == claimed_region, "invariant"); 3899 } 3900 // It is important to call the regular clock here. It might take 3901 // a while to claim a region if, for example, we hit a large 3902 // block of empty regions. So we need to call the regular clock 3903 // method once round the loop to make sure it's called 3904 // frequently enough. 3905 regular_clock_call(); 3906 } 3907 3908 if (!has_aborted() && _curr_region == NULL) { 3909 assert(_cm->out_of_regions(), 3910 "at this point we should be out of regions"); 3911 } 3912 } while ( _curr_region != NULL && !has_aborted()); 3913 3914 if (!has_aborted()) { 3915 // We cannot check whether the global stack is empty, since other 3916 // tasks might be pushing objects to it concurrently. We also cannot 3917 // check if the region stack is empty because if a thread is aborting 3918 // it can push a partially done region back. 3919 assert(_cm->out_of_regions(), 3920 "at this point we should be out of regions"); 3921 3922 if (_cm->verbose_low()) 3923 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 3924 3925 // Try to reduce the number of available SATB buffers so that 3926 // remark has less work to do. 3927 drain_satb_buffers(); 3928 } 3929 3930 // Since we've done everything else, we can now totally drain the 3931 // local queue and global stack. 3932 drain_local_queue(false); 3933 drain_global_stack(false); 3934 3935 // Attempt at work stealing from other task's queues. 3936 if (!has_aborted()) { 3937 // We have not aborted. This means that we have finished all that 3938 // we could. Let's try to do some stealing... 3939 3940 // We cannot check whether the global stack is empty, since other 3941 // tasks might be pushing objects to it concurrently. We also cannot 3942 // check if the region stack is empty because if a thread is aborting 3943 // it can push a partially done region back. 3944 assert(_cm->out_of_regions() && _task_queue->size() == 0, 3945 "only way to reach here"); 3946 3947 if (_cm->verbose_low()) 3948 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 3949 3950 while (!has_aborted()) { 3951 oop obj; 3952 statsOnly( ++_steal_attempts ); 3953 3954 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 3955 if (_cm->verbose_medium()) 3956 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 3957 _task_id, (void*) obj); 3958 3959 statsOnly( ++_steals ); 3960 3961 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 3962 "any stolen object should be marked"); 3963 scan_object(obj); 3964 3965 // And since we're towards the end, let's totally drain the 3966 // local queue and global stack. 3967 drain_local_queue(false); 3968 drain_global_stack(false); 3969 } else { 3970 break; 3971 } 3972 } 3973 } 3974 3975 // We still haven't aborted. Now, let's try to get into the 3976 // termination protocol. 3977 if (!has_aborted()) { 3978 // We cannot check whether the global stack is empty, since other 3979 // tasks might be concurrently pushing objects on it. We also cannot 3980 // check if the region stack is empty because if a thread is aborting 3981 // it can push a partially done region back. 3982 // Separated the asserts so that we know which one fires. 3983 assert(_cm->out_of_regions(), "only way to reach here"); 3984 assert(_task_queue->size() == 0, "only way to reach here"); 3985 3986 if (_cm->verbose_low()) 3987 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 3988 3989 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 3990 // The CMTask class also extends the TerminatorTerminator class, 3991 // hence its should_exit_termination() method will also decide 3992 // whether to exit the termination protocol or not. 3993 bool finished = _cm->terminator()->offer_termination(this); 3994 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 3995 _termination_time_ms += 3996 termination_end_time_ms - _termination_start_time_ms; 3997 3998 if (finished) { 3999 // We're all done. 4000 4001 if (_task_id == 0) { 4002 // let's allow task 0 to do this 4003 if (concurrent()) { 4004 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4005 // we need to set this to false before the next 4006 // safepoint. This way we ensure that the marking phase 4007 // doesn't observe any more heap expansions. 4008 _cm->clear_concurrent_marking_in_progress(); 4009 } 4010 } 4011 4012 // We can now guarantee that the global stack is empty, since 4013 // all other tasks have finished. We separated the guarantees so 4014 // that, if a condition is false, we can immediately find out 4015 // which one. 4016 guarantee(_cm->out_of_regions(), "only way to reach here"); 4017 guarantee(_aborted_region.is_empty(), "only way to reach here"); 4018 guarantee(_cm->region_stack_empty(), "only way to reach here"); 4019 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4020 guarantee(_task_queue->size() == 0, "only way to reach here"); 4021 guarantee(!_cm->has_overflown(), "only way to reach here"); 4022 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4023 guarantee(!_cm->region_stack_overflow(), "only way to reach here"); 4024 4025 if (_cm->verbose_low()) 4026 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 4027 } else { 4028 // Apparently there's more work to do. Let's abort this task. It 4029 // will restart it and we can hopefully find more things to do. 4030 4031 if (_cm->verbose_low()) 4032 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id); 4033 4034 set_has_aborted(); 4035 statsOnly( ++_aborted_termination ); 4036 } 4037 } 4038 4039 // Mainly for debugging purposes to make sure that a pointer to the 4040 // closure which was statically allocated in this frame doesn't 4041 // escape it by accident. 4042 set_oop_closure(NULL); 4043 double end_time_ms = os::elapsedVTime() * 1000.0; 4044 double elapsed_time_ms = end_time_ms - _start_time_ms; 4045 // Update the step history. 4046 _step_times_ms.add(elapsed_time_ms); 4047 4048 if (has_aborted()) { 4049 // The task was aborted for some reason. 4050 4051 statsOnly( ++_aborted ); 4052 4053 if (_has_aborted_timed_out) { 4054 double diff_ms = elapsed_time_ms - _time_target_ms; 4055 // Keep statistics of how well we did with respect to hitting 4056 // our target only if we actually timed out (if we aborted for 4057 // other reasons, then the results might get skewed). 4058 _marking_step_diffs_ms.add(diff_ms); 4059 } 4060 4061 if (_cm->has_overflown()) { 4062 // This is the interesting one. We aborted because a global 4063 // overflow was raised. This means we have to restart the 4064 // marking phase and start iterating over regions. However, in 4065 // order to do this we have to make sure that all tasks stop 4066 // what they are doing and re-initialise in a safe manner. We 4067 // will achieve this with the use of two barrier sync points. 4068 4069 if (_cm->verbose_low()) 4070 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 4071 4072 _cm->enter_first_sync_barrier(_task_id); 4073 // When we exit this sync barrier we know that all tasks have 4074 // stopped doing marking work. So, it's now safe to 4075 // re-initialise our data structures. At the end of this method, 4076 // task 0 will clear the global data structures. 4077 4078 statsOnly( ++_aborted_overflow ); 4079 4080 // We clear the local state of this task... 4081 clear_region_fields(); 4082 4083 // ...and enter the second barrier. 4084 _cm->enter_second_sync_barrier(_task_id); 4085 // At this point everything has bee re-initialised and we're 4086 // ready to restart. 4087 } 4088 4089 if (_cm->verbose_low()) { 4090 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4091 "elapsed = %1.2lfms <<<<<<<<<<", 4092 _task_id, _time_target_ms, elapsed_time_ms); 4093 if (_cm->has_aborted()) 4094 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 4095 _task_id); 4096 } 4097 } else { 4098 if (_cm->verbose_low()) 4099 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4100 "elapsed = %1.2lfms <<<<<<<<<<", 4101 _task_id, _time_target_ms, elapsed_time_ms); 4102 } 4103 4104 _claimed = false; 4105 } 4106 4107 CMTask::CMTask(int task_id, 4108 ConcurrentMark* cm, 4109 CMTaskQueue* task_queue, 4110 CMTaskQueueSet* task_queues) 4111 : _g1h(G1CollectedHeap::heap()), 4112 _task_id(task_id), _cm(cm), 4113 _claimed(false), 4114 _nextMarkBitMap(NULL), _hash_seed(17), 4115 _task_queue(task_queue), 4116 _task_queues(task_queues), 4117 _oop_closure(NULL), 4118 _aborted_region(MemRegion()) { 4119 guarantee(task_queue != NULL, "invariant"); 4120 guarantee(task_queues != NULL, "invariant"); 4121 4122 statsOnly( _clock_due_to_scanning = 0; 4123 _clock_due_to_marking = 0 ); 4124 4125 _marking_step_diffs_ms.add(0.5); 4126 }