1 /* 2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 32 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 33 #include "gc_implementation/g1/g1RemSet.hpp" 34 #include "gc_implementation/g1/heapRegionRemSet.hpp" 35 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 36 #include "gc_implementation/shared/vmGCOperations.hpp" 37 #include "memory/genOopClosures.inline.hpp" 38 #include "memory/referencePolicy.hpp" 39 #include "memory/resourceArea.hpp" 40 #include "oops/oop.inline.hpp" 41 #include "runtime/handles.inline.hpp" 42 #include "runtime/java.hpp" 43 44 // 45 // CMS Bit Map Wrapper 46 47 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter): 48 _bm((uintptr_t*)NULL,0), 49 _shifter(shifter) { 50 _bmStartWord = (HeapWord*)(rs.base()); 51 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 52 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 53 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 54 55 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map"); 56 // For now we'll just commit all of the bit map up fromt. 57 // Later on we'll try to be more parsimonious with swap. 58 guarantee(_virtual_space.initialize(brs, brs.size()), 59 "couldn't reseve backing store for CMS bit map"); 60 assert(_virtual_space.committed_size() == brs.size(), 61 "didn't reserve backing store for all of CMS bit map?"); 62 _bm.set_map((uintptr_t*)_virtual_space.low()); 63 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 64 _bmWordSize, "inconsistency in bit map sizing"); 65 _bm.set_size(_bmWordSize >> _shifter); 66 } 67 68 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 69 HeapWord* limit) const { 70 // First we must round addr *up* to a possible object boundary. 71 addr = (HeapWord*)align_size_up((intptr_t)addr, 72 HeapWordSize << _shifter); 73 size_t addrOffset = heapWordToOffset(addr); 74 if (limit == NULL) { 75 limit = _bmStartWord + _bmWordSize; 76 } 77 size_t limitOffset = heapWordToOffset(limit); 78 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 79 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 80 assert(nextAddr >= addr, "get_next_one postcondition"); 81 assert(nextAddr == limit || isMarked(nextAddr), 82 "get_next_one postcondition"); 83 return nextAddr; 84 } 85 86 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 87 HeapWord* limit) const { 88 size_t addrOffset = heapWordToOffset(addr); 89 if (limit == NULL) { 90 limit = _bmStartWord + _bmWordSize; 91 } 92 size_t limitOffset = heapWordToOffset(limit); 93 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 94 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 95 assert(nextAddr >= addr, "get_next_one postcondition"); 96 assert(nextAddr == limit || !isMarked(nextAddr), 97 "get_next_one postcondition"); 98 return nextAddr; 99 } 100 101 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 102 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 103 return (int) (diff >> _shifter); 104 } 105 106 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) { 107 HeapWord* left = MAX2(_bmStartWord, mr.start()); 108 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end()); 109 if (right > left) { 110 // Right-open interval [leftOffset, rightOffset). 111 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right)); 112 } else { 113 return true; 114 } 115 } 116 117 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap, 118 size_t from_start_index, 119 HeapWord* to_start_word, 120 size_t word_num) { 121 _bm.mostly_disjoint_range_union(from_bitmap, 122 from_start_index, 123 heapWordToOffset(to_start_word), 124 word_num); 125 } 126 127 #ifndef PRODUCT 128 bool CMBitMapRO::covers(ReservedSpace rs) const { 129 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 130 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize, 131 "size inconsistency"); 132 return _bmStartWord == (HeapWord*)(rs.base()) && 133 _bmWordSize == rs.size()>>LogHeapWordSize; 134 } 135 #endif 136 137 void CMBitMap::clearAll() { 138 _bm.clear(); 139 return; 140 } 141 142 void CMBitMap::markRange(MemRegion mr) { 143 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 144 assert(!mr.is_empty(), "unexpected empty region"); 145 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 146 ((HeapWord *) mr.end())), 147 "markRange memory region end is not card aligned"); 148 // convert address range into offset range 149 _bm.at_put_range(heapWordToOffset(mr.start()), 150 heapWordToOffset(mr.end()), true); 151 } 152 153 void CMBitMap::clearRange(MemRegion mr) { 154 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 155 assert(!mr.is_empty(), "unexpected empty region"); 156 // convert address range into offset range 157 _bm.at_put_range(heapWordToOffset(mr.start()), 158 heapWordToOffset(mr.end()), false); 159 } 160 161 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 162 HeapWord* end_addr) { 163 HeapWord* start = getNextMarkedWordAddress(addr); 164 start = MIN2(start, end_addr); 165 HeapWord* end = getNextUnmarkedWordAddress(start); 166 end = MIN2(end, end_addr); 167 assert(start <= end, "Consistency check"); 168 MemRegion mr(start, end); 169 if (!mr.is_empty()) { 170 clearRange(mr); 171 } 172 return mr; 173 } 174 175 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 176 _base(NULL), _cm(cm) 177 #ifdef ASSERT 178 , _drain_in_progress(false) 179 , _drain_in_progress_yields(false) 180 #endif 181 {} 182 183 void CMMarkStack::allocate(size_t size) { 184 _base = NEW_C_HEAP_ARRAY(oop, size); 185 if (_base == NULL) { 186 vm_exit_during_initialization("Failed to allocate " 187 "CM region mark stack"); 188 } 189 _index = 0; 190 _capacity = (jint) size; 191 _oops_do_bound = -1; 192 NOT_PRODUCT(_max_depth = 0); 193 } 194 195 CMMarkStack::~CMMarkStack() { 196 if (_base != NULL) { 197 FREE_C_HEAP_ARRAY(oop, _base); 198 } 199 } 200 201 void CMMarkStack::par_push(oop ptr) { 202 while (true) { 203 if (isFull()) { 204 _overflow = true; 205 return; 206 } 207 // Otherwise... 208 jint index = _index; 209 jint next_index = index+1; 210 jint res = Atomic::cmpxchg(next_index, &_index, index); 211 if (res == index) { 212 _base[index] = ptr; 213 // Note that we don't maintain this atomically. We could, but it 214 // doesn't seem necessary. 215 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 216 return; 217 } 218 // Otherwise, we need to try again. 219 } 220 } 221 222 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 223 while (true) { 224 if (isFull()) { 225 _overflow = true; 226 return; 227 } 228 // Otherwise... 229 jint index = _index; 230 jint next_index = index + n; 231 if (next_index > _capacity) { 232 _overflow = true; 233 return; 234 } 235 jint res = Atomic::cmpxchg(next_index, &_index, index); 236 if (res == index) { 237 for (int i = 0; i < n; i++) { 238 int ind = index + i; 239 assert(ind < _capacity, "By overflow test above."); 240 _base[ind] = ptr_arr[i]; 241 } 242 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 243 return; 244 } 245 // Otherwise, we need to try again. 246 } 247 } 248 249 250 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 251 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 252 jint start = _index; 253 jint next_index = start + n; 254 if (next_index > _capacity) { 255 _overflow = true; 256 return; 257 } 258 // Otherwise. 259 _index = next_index; 260 for (int i = 0; i < n; i++) { 261 int ind = start + i; 262 assert(ind < _capacity, "By overflow test above."); 263 _base[ind] = ptr_arr[i]; 264 } 265 } 266 267 268 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 269 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 270 jint index = _index; 271 if (index == 0) { 272 *n = 0; 273 return false; 274 } else { 275 int k = MIN2(max, index); 276 jint new_ind = index - k; 277 for (int j = 0; j < k; j++) { 278 ptr_arr[j] = _base[new_ind + j]; 279 } 280 _index = new_ind; 281 *n = k; 282 return true; 283 } 284 } 285 286 287 CMRegionStack::CMRegionStack() : _base(NULL) {} 288 289 void CMRegionStack::allocate(size_t size) { 290 _base = NEW_C_HEAP_ARRAY(MemRegion, size); 291 if (_base == NULL) { 292 vm_exit_during_initialization("Failed to allocate CM region mark stack"); 293 } 294 _index = 0; 295 _capacity = (jint) size; 296 } 297 298 CMRegionStack::~CMRegionStack() { 299 if (_base != NULL) { 300 FREE_C_HEAP_ARRAY(oop, _base); 301 } 302 } 303 304 void CMRegionStack::push_lock_free(MemRegion mr) { 305 assert(mr.word_size() > 0, "Precondition"); 306 while (true) { 307 jint index = _index; 308 309 if (index >= _capacity) { 310 _overflow = true; 311 return; 312 } 313 // Otherwise... 314 jint next_index = index+1; 315 jint res = Atomic::cmpxchg(next_index, &_index, index); 316 if (res == index) { 317 _base[index] = mr; 318 return; 319 } 320 // Otherwise, we need to try again. 321 } 322 } 323 324 // Lock-free pop of the region stack. Called during the concurrent 325 // marking / remark phases. Should only be called in tandem with 326 // other lock-free pops. 327 MemRegion CMRegionStack::pop_lock_free() { 328 while (true) { 329 jint index = _index; 330 331 if (index == 0) { 332 return MemRegion(); 333 } 334 // Otherwise... 335 jint next_index = index-1; 336 jint res = Atomic::cmpxchg(next_index, &_index, index); 337 if (res == index) { 338 MemRegion mr = _base[next_index]; 339 if (mr.start() != NULL) { 340 assert(mr.end() != NULL, "invariant"); 341 assert(mr.word_size() > 0, "invariant"); 342 return mr; 343 } else { 344 // that entry was invalidated... let's skip it 345 assert(mr.end() == NULL, "invariant"); 346 } 347 } 348 // Otherwise, we need to try again. 349 } 350 } 351 352 #if 0 353 // The routines that manipulate the region stack with a lock are 354 // not currently used. They should be retained, however, as a 355 // diagnostic aid. 356 357 void CMRegionStack::push_with_lock(MemRegion mr) { 358 assert(mr.word_size() > 0, "Precondition"); 359 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 360 361 if (isFull()) { 362 _overflow = true; 363 return; 364 } 365 366 _base[_index] = mr; 367 _index += 1; 368 } 369 370 MemRegion CMRegionStack::pop_with_lock() { 371 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 372 373 while (true) { 374 if (_index == 0) { 375 return MemRegion(); 376 } 377 _index -= 1; 378 379 MemRegion mr = _base[_index]; 380 if (mr.start() != NULL) { 381 assert(mr.end() != NULL, "invariant"); 382 assert(mr.word_size() > 0, "invariant"); 383 return mr; 384 } else { 385 // that entry was invalidated... let's skip it 386 assert(mr.end() == NULL, "invariant"); 387 } 388 } 389 } 390 #endif 391 392 bool CMRegionStack::invalidate_entries_into_cset() { 393 bool result = false; 394 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 395 for (int i = 0; i < _oops_do_bound; ++i) { 396 MemRegion mr = _base[i]; 397 if (mr.start() != NULL) { 398 assert(mr.end() != NULL, "invariant"); 399 assert(mr.word_size() > 0, "invariant"); 400 HeapRegion* hr = g1h->heap_region_containing(mr.start()); 401 assert(hr != NULL, "invariant"); 402 if (hr->in_collection_set()) { 403 // The region points into the collection set 404 _base[i] = MemRegion(); 405 result = true; 406 } 407 } else { 408 // that entry was invalidated... let's skip it 409 assert(mr.end() == NULL, "invariant"); 410 } 411 } 412 return result; 413 } 414 415 template<class OopClosureClass> 416 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 417 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 418 || SafepointSynchronize::is_at_safepoint(), 419 "Drain recursion must be yield-safe."); 420 bool res = true; 421 debug_only(_drain_in_progress = true); 422 debug_only(_drain_in_progress_yields = yield_after); 423 while (!isEmpty()) { 424 oop newOop = pop(); 425 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 426 assert(newOop->is_oop(), "Expected an oop"); 427 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 428 "only grey objects on this stack"); 429 // iterate over the oops in this oop, marking and pushing 430 // the ones in CMS generation. 431 newOop->oop_iterate(cl); 432 if (yield_after && _cm->do_yield_check()) { 433 res = false; 434 break; 435 } 436 } 437 debug_only(_drain_in_progress = false); 438 return res; 439 } 440 441 void CMMarkStack::oops_do(OopClosure* f) { 442 if (_index == 0) return; 443 assert(_oops_do_bound != -1 && _oops_do_bound <= _index, 444 "Bound must be set."); 445 for (int i = 0; i < _oops_do_bound; i++) { 446 f->do_oop(&_base[i]); 447 } 448 _oops_do_bound = -1; 449 } 450 451 bool ConcurrentMark::not_yet_marked(oop obj) const { 452 return (_g1h->is_obj_ill(obj) 453 || (_g1h->is_in_permanent(obj) 454 && !nextMarkBitMap()->isMarked((HeapWord*)obj))); 455 } 456 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 459 #endif // _MSC_VER 460 461 ConcurrentMark::ConcurrentMark(ReservedSpace rs, 462 int max_regions) : 463 _markBitMap1(rs, MinObjAlignment - 1), 464 _markBitMap2(rs, MinObjAlignment - 1), 465 466 _parallel_marking_threads(0), 467 _sleep_factor(0.0), 468 _marking_task_overhead(1.0), 469 _cleanup_sleep_factor(0.0), 470 _cleanup_task_overhead(1.0), 471 _cleanup_list("Cleanup List"), 472 _region_bm(max_regions, false /* in_resource_area*/), 473 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 474 CardTableModRefBS::card_shift, 475 false /* in_resource_area*/), 476 477 _prevMarkBitMap(&_markBitMap1), 478 _nextMarkBitMap(&_markBitMap2), 479 _at_least_one_mark_complete(false), 480 481 _markStack(this), 482 _regionStack(), 483 // _finger set in set_non_marking_state 484 485 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)), 486 // _active_tasks set in set_non_marking_state 487 // _tasks set inside the constructor 488 _task_queues(new CMTaskQueueSet((int) _max_task_num)), 489 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 490 491 _has_overflown(false), 492 _concurrent(false), 493 _has_aborted(false), 494 _restart_for_overflow(false), 495 _concurrent_marking_in_progress(false), 496 _should_gray_objects(false), 497 498 // _verbose_level set below 499 500 _init_times(), 501 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 502 _cleanup_times(), 503 _total_counting_time(0.0), 504 _total_rs_scrub_time(0.0), 505 506 _parallel_workers(NULL), 507 508 _count_card_bitmaps(NULL), 509 _count_marked_bytes(NULL) 510 { 511 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 512 if (verbose_level < no_verbose) { 513 verbose_level = no_verbose; 514 } 515 if (verbose_level > high_verbose) { 516 verbose_level = high_verbose; 517 } 518 _verbose_level = verbose_level; 519 520 if (verbose_low()) { 521 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 522 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 523 } 524 525 _markStack.allocate(MarkStackSize); 526 _regionStack.allocate(G1MarkRegionStackSize); 527 528 // Create & start a ConcurrentMark thread. 529 _cmThread = new ConcurrentMarkThread(this); 530 assert(cmThread() != NULL, "CM Thread should have been created"); 531 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 532 533 _g1h = G1CollectedHeap::heap(); 534 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 535 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 536 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 537 538 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 539 satb_qs.set_buffer_size(G1SATBBufferSize); 540 541 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num); 542 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num); 543 544 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_task_num); 545 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num); 546 547 BitMap::idx_t card_bm_size = _card_bm.size(); 548 549 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 550 _active_tasks = _max_task_num; 551 for (int i = 0; i < (int) _max_task_num; ++i) { 552 CMTaskQueue* task_queue = new CMTaskQueue(); 553 task_queue->initialize(); 554 _task_queues->register_queue(i, task_queue); 555 556 _tasks[i] = new CMTask(i, this, task_queue, _task_queues); 557 _accum_task_vtime[i] = 0.0; 558 559 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 560 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions); 561 } 562 563 if (ConcGCThreads > ParallelGCThreads) { 564 vm_exit_during_initialization("Can't have more ConcGCThreads " 565 "than ParallelGCThreads."); 566 } 567 if (ParallelGCThreads == 0) { 568 // if we are not running with any parallel GC threads we will not 569 // spawn any marking threads either 570 _parallel_marking_threads = 0; 571 _sleep_factor = 0.0; 572 _marking_task_overhead = 1.0; 573 } else { 574 if (ConcGCThreads > 0) { 575 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 576 // if both are set 577 578 _parallel_marking_threads = ConcGCThreads; 579 _sleep_factor = 0.0; 580 _marking_task_overhead = 1.0; 581 } else if (G1MarkingOverheadPercent > 0) { 582 // we will calculate the number of parallel marking threads 583 // based on a target overhead with respect to the soft real-time 584 // goal 585 586 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 587 double overall_cm_overhead = 588 (double) MaxGCPauseMillis * marking_overhead / 589 (double) GCPauseIntervalMillis; 590 double cpu_ratio = 1.0 / (double) os::processor_count(); 591 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 592 double marking_task_overhead = 593 overall_cm_overhead / marking_thread_num * 594 (double) os::processor_count(); 595 double sleep_factor = 596 (1.0 - marking_task_overhead) / marking_task_overhead; 597 598 _parallel_marking_threads = (size_t) marking_thread_num; 599 _sleep_factor = sleep_factor; 600 _marking_task_overhead = marking_task_overhead; 601 } else { 602 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1); 603 _sleep_factor = 0.0; 604 _marking_task_overhead = 1.0; 605 } 606 607 if (parallel_marking_threads() > 1) { 608 _cleanup_task_overhead = 1.0; 609 } else { 610 _cleanup_task_overhead = marking_task_overhead(); 611 } 612 _cleanup_sleep_factor = 613 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 614 615 #if 0 616 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 617 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 618 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 619 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 620 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 621 #endif 622 623 guarantee(parallel_marking_threads() > 0, "peace of mind"); 624 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 625 (int) _parallel_marking_threads, false, true); 626 if (_parallel_workers == NULL) { 627 vm_exit_during_initialization("Failed necessary allocation."); 628 } else { 629 _parallel_workers->initialize_workers(); 630 } 631 } 632 633 // so that the call below can read a sensible value 634 _heap_start = (HeapWord*) rs.base(); 635 set_non_marking_state(); 636 } 637 638 void ConcurrentMark::update_g1_committed(bool force) { 639 // If concurrent marking is not in progress, then we do not need to 640 // update _heap_end. This has a subtle and important 641 // side-effect. Imagine that two evacuation pauses happen between 642 // marking completion and remark. The first one can grow the 643 // heap (hence now the finger is below the heap end). Then, the 644 // second one could unnecessarily push regions on the region 645 // stack. This causes the invariant that the region stack is empty 646 // at the beginning of remark to be false. By ensuring that we do 647 // not observe heap expansions after marking is complete, then we do 648 // not have this problem. 649 if (!concurrent_marking_in_progress() && !force) return; 650 651 MemRegion committed = _g1h->g1_committed(); 652 assert(committed.start() == _heap_start, "start shouldn't change"); 653 HeapWord* new_end = committed.end(); 654 if (new_end > _heap_end) { 655 // The heap has been expanded. 656 657 _heap_end = new_end; 658 } 659 // Notice that the heap can also shrink. However, this only happens 660 // during a Full GC (at least currently) and the entire marking 661 // phase will bail out and the task will not be restarted. So, let's 662 // do nothing. 663 } 664 665 void ConcurrentMark::reset() { 666 // Starting values for these two. This should be called in a STW 667 // phase. CM will be notified of any future g1_committed expansions 668 // will be at the end of evacuation pauses, when tasks are 669 // inactive. 670 MemRegion committed = _g1h->g1_committed(); 671 _heap_start = committed.start(); 672 _heap_end = committed.end(); 673 674 // Separated the asserts so that we know which one fires. 675 assert(_heap_start != NULL, "heap bounds should look ok"); 676 assert(_heap_end != NULL, "heap bounds should look ok"); 677 assert(_heap_start < _heap_end, "heap bounds should look ok"); 678 679 // reset all the marking data structures and any necessary flags 680 clear_marking_state(); 681 682 clear_all_count_data(); 683 684 if (verbose_low()) { 685 gclog_or_tty->print_cr("[global] resetting"); 686 } 687 688 // We do reset all of them, since different phases will use 689 // different number of active threads. So, it's easiest to have all 690 // of them ready. 691 for (int i = 0; i < (int) _max_task_num; ++i) { 692 _tasks[i]->reset(_nextMarkBitMap); 693 } 694 695 // we need this to make sure that the flag is on during the evac 696 // pause with initial mark piggy-backed 697 set_concurrent_marking_in_progress(); 698 } 699 700 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) { 701 assert(active_tasks <= _max_task_num, "we should not have more"); 702 703 _active_tasks = active_tasks; 704 // Need to update the three data structures below according to the 705 // number of active threads for this phase. 706 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 707 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 708 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 709 710 _concurrent = concurrent; 711 // We propagate this to all tasks, not just the active ones. 712 for (int i = 0; i < (int) _max_task_num; ++i) 713 _tasks[i]->set_concurrent(concurrent); 714 715 if (concurrent) { 716 set_concurrent_marking_in_progress(); 717 } else { 718 // We currently assume that the concurrent flag has been set to 719 // false before we start remark. At this point we should also be 720 // in a STW phase. 721 assert(!concurrent_marking_in_progress(), "invariant"); 722 assert(_finger == _heap_end, "only way to get here"); 723 update_g1_committed(true); 724 } 725 } 726 727 void ConcurrentMark::set_non_marking_state() { 728 // We set the global marking state to some default values when we're 729 // not doing marking. 730 clear_marking_state(); 731 _active_tasks = 0; 732 clear_concurrent_marking_in_progress(); 733 } 734 735 ConcurrentMark::~ConcurrentMark() { 736 for (int i = 0; i < (int) _max_task_num; ++i) { 737 delete _task_queues->queue(i); 738 delete _tasks[i]; 739 740 _count_card_bitmaps[i].resize(0, false); 741 FREE_C_HEAP_ARRAY(size_t, _count_marked_bytes[i]); 742 } 743 744 delete _task_queues; 745 FREE_C_HEAP_ARRAY(CMTask*, _tasks); 746 FREE_C_HEAP_ARRAY(double, _accum_task_vtime); 747 748 FREE_C_HEAP_ARRAY(BitMap*, _count_card_bitmaps); 749 FREE_C_HEAP_ARRAY(size_t*, _count_marked_bytes); 750 } 751 752 // This closure is used to mark refs into the g1 generation 753 // from external roots in the CMS bit map. 754 // Called at the first checkpoint. 755 // 756 757 void ConcurrentMark::clearNextBitmap() { 758 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 759 G1CollectorPolicy* g1p = g1h->g1_policy(); 760 761 // Make sure that the concurrent mark thread looks to still be in 762 // the current cycle. 763 guarantee(cmThread()->during_cycle(), "invariant"); 764 765 // We are finishing up the current cycle by clearing the next 766 // marking bitmap and getting it ready for the next cycle. During 767 // this time no other cycle can start. So, let's make sure that this 768 // is the case. 769 guarantee(!g1h->mark_in_progress(), "invariant"); 770 771 // clear the mark bitmap (no grey objects to start with). 772 // We need to do this in chunks and offer to yield in between 773 // each chunk. 774 HeapWord* start = _nextMarkBitMap->startWord(); 775 HeapWord* end = _nextMarkBitMap->endWord(); 776 HeapWord* cur = start; 777 size_t chunkSize = M; 778 while (cur < end) { 779 HeapWord* next = cur + chunkSize; 780 if (next > end) { 781 next = end; 782 } 783 MemRegion mr(cur,next); 784 _nextMarkBitMap->clearRange(mr); 785 cur = next; 786 do_yield_check(); 787 788 // Repeat the asserts from above. We'll do them as asserts here to 789 // minimize their overhead on the product. However, we'll have 790 // them as guarantees at the beginning / end of the bitmap 791 // clearing to get some checking in the product. 792 assert(cmThread()->during_cycle(), "invariant"); 793 assert(!g1h->mark_in_progress(), "invariant"); 794 } 795 796 // Repeat the asserts from above. 797 guarantee(cmThread()->during_cycle(), "invariant"); 798 guarantee(!g1h->mark_in_progress(), "invariant"); 799 } 800 801 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 802 public: 803 bool doHeapRegion(HeapRegion* r) { 804 if (!r->continuesHumongous()) { 805 r->note_start_of_marking(true); 806 } 807 return false; 808 } 809 }; 810 811 void ConcurrentMark::checkpointRootsInitialPre() { 812 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 813 G1CollectorPolicy* g1p = g1h->g1_policy(); 814 815 _has_aborted = false; 816 817 #ifndef PRODUCT 818 if (G1PrintReachableAtInitialMark) { 819 print_reachable("at-cycle-start", 820 VerifyOption_G1UsePrevMarking, true /* all */); 821 } 822 #endif 823 824 // Initialise marking structures. This has to be done in a STW phase. 825 reset(); 826 } 827 828 829 void ConcurrentMark::checkpointRootsInitialPost() { 830 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 831 832 // If we force an overflow during remark, the remark operation will 833 // actually abort and we'll restart concurrent marking. If we always 834 // force an oveflow during remark we'll never actually complete the 835 // marking phase. So, we initilize this here, at the start of the 836 // cycle, so that at the remaining overflow number will decrease at 837 // every remark and we'll eventually not need to cause one. 838 force_overflow_stw()->init(); 839 840 // For each region note start of marking. 841 NoteStartOfMarkHRClosure startcl; 842 g1h->heap_region_iterate(&startcl); 843 844 // Start Concurrent Marking weak-reference discovery. 845 ReferenceProcessor* rp = g1h->ref_processor_cm(); 846 // enable ("weak") refs discovery 847 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 848 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 849 850 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 851 // This is the start of the marking cycle, we're expected all 852 // threads to have SATB queues with active set to false. 853 satb_mq_set.set_active_all_threads(true, /* new active value */ 854 false /* expected_active */); 855 856 // update_g1_committed() will be called at the end of an evac pause 857 // when marking is on. So, it's also called at the end of the 858 // initial-mark pause to update the heap end, if the heap expands 859 // during it. No need to call it here. 860 } 861 862 /* 863 * Notice that in the next two methods, we actually leave the STS 864 * during the barrier sync and join it immediately afterwards. If we 865 * do not do this, the following deadlock can occur: one thread could 866 * be in the barrier sync code, waiting for the other thread to also 867 * sync up, whereas another one could be trying to yield, while also 868 * waiting for the other threads to sync up too. 869 * 870 * Note, however, that this code is also used during remark and in 871 * this case we should not attempt to leave / enter the STS, otherwise 872 * we'll either hit an asseert (debug / fastdebug) or deadlock 873 * (product). So we should only leave / enter the STS if we are 874 * operating concurrently. 875 * 876 * Because the thread that does the sync barrier has left the STS, it 877 * is possible to be suspended for a Full GC or an evacuation pause 878 * could occur. This is actually safe, since the entering the sync 879 * barrier is one of the last things do_marking_step() does, and it 880 * doesn't manipulate any data structures afterwards. 881 */ 882 883 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 884 if (verbose_low()) { 885 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 886 } 887 888 if (concurrent()) { 889 ConcurrentGCThread::stsLeave(); 890 } 891 _first_overflow_barrier_sync.enter(); 892 if (concurrent()) { 893 ConcurrentGCThread::stsJoin(); 894 } 895 // at this point everyone should have synced up and not be doing any 896 // more work 897 898 if (verbose_low()) { 899 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 900 } 901 902 // let task 0 do this 903 if (task_num == 0) { 904 // task 0 is responsible for clearing the global data structures 905 // We should be here because of an overflow. During STW we should 906 // not clear the overflow flag since we rely on it being true when 907 // we exit this method to abort the pause and restart concurent 908 // marking. 909 clear_marking_state(concurrent() /* clear_overflow */); 910 force_overflow()->update(); 911 912 if (PrintGC) { 913 gclog_or_tty->date_stamp(PrintGCDateStamps); 914 gclog_or_tty->stamp(PrintGCTimeStamps); 915 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 916 } 917 } 918 919 // after this, each task should reset its own data structures then 920 // then go into the second barrier 921 } 922 923 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 924 if (verbose_low()) { 925 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 926 } 927 928 if (concurrent()) { 929 ConcurrentGCThread::stsLeave(); 930 } 931 _second_overflow_barrier_sync.enter(); 932 if (concurrent()) { 933 ConcurrentGCThread::stsJoin(); 934 } 935 // at this point everything should be re-initialised and ready to go 936 937 if (verbose_low()) { 938 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 939 } 940 } 941 942 #ifndef PRODUCT 943 void ForceOverflowSettings::init() { 944 _num_remaining = G1ConcMarkForceOverflow; 945 _force = false; 946 update(); 947 } 948 949 void ForceOverflowSettings::update() { 950 if (_num_remaining > 0) { 951 _num_remaining -= 1; 952 _force = true; 953 } else { 954 _force = false; 955 } 956 } 957 958 bool ForceOverflowSettings::should_force() { 959 if (_force) { 960 _force = false; 961 return true; 962 } else { 963 return false; 964 } 965 } 966 #endif // !PRODUCT 967 968 void ConcurrentMark::grayRoot(oop p, int worker_i) { 969 HeapWord* addr = (HeapWord*) p; 970 // We can't really check against _heap_start and _heap_end, since it 971 // is possible during an evacuation pause with piggy-backed 972 // initial-mark that the committed space is expanded during the 973 // pause without CM observing this change. So the assertions below 974 // is a bit conservative; but better than nothing. 975 assert(_g1h->g1_committed().contains(addr), 976 "address should be within the heap bounds"); 977 978 if (!_nextMarkBitMap->isMarked(addr)) { 979 if (_nextMarkBitMap->parMark(addr)) { 980 // Update the task specific count data for object p. 981 add_to_count_data_for(p, worker_i); 982 } 983 } 984 } 985 986 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) { 987 // The objects on the region have already been marked "in bulk" by 988 // the caller. We only need to decide whether to push the region on 989 // the region stack or not. 990 991 if (!concurrent_marking_in_progress() || !_should_gray_objects) { 992 // We're done with marking and waiting for remark. We do not need to 993 // push anything else on the region stack. 994 return; 995 } 996 997 HeapWord* finger = _finger; 998 999 if (verbose_low()) { 1000 gclog_or_tty->print_cr("[global] attempting to push " 1001 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at " 1002 PTR_FORMAT, mr.start(), mr.end(), finger); 1003 } 1004 1005 if (mr.start() < finger) { 1006 // The finger is always heap region aligned and it is not possible 1007 // for mr to span heap regions. 1008 assert(mr.end() <= finger, "invariant"); 1009 1010 // Separated the asserts so that we know which one fires. 1011 assert(mr.start() <= mr.end(), 1012 "region boundaries should fall within the committed space"); 1013 assert(_heap_start <= mr.start(), 1014 "region boundaries should fall within the committed space"); 1015 assert(mr.end() <= _heap_end, 1016 "region boundaries should fall within the committed space"); 1017 if (verbose_low()) { 1018 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") " 1019 "below the finger, pushing it", 1020 mr.start(), mr.end()); 1021 } 1022 1023 if (!region_stack_push_lock_free(mr)) { 1024 if (verbose_low()) { 1025 gclog_or_tty->print_cr("[global] region stack has overflown."); 1026 } 1027 } 1028 } 1029 } 1030 1031 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p, int worker_i) { 1032 // The object is not marked by the caller. We need to at least mark 1033 // it and maybe push in on the stack. 1034 1035 HeapWord* addr = (HeapWord*)p; 1036 if (!_nextMarkBitMap->isMarked(addr)) { 1037 // We definitely need to mark it, irrespective whether we bail out 1038 // because we're done with marking. 1039 if (_nextMarkBitMap->parMark(addr)) { 1040 // Update the task specific count data for object p 1041 add_to_count_data_for(p, worker_i); 1042 1043 if (!concurrent_marking_in_progress() || !_should_gray_objects) { 1044 // If we're done with concurrent marking and we're waiting for 1045 // remark, then we're not pushing anything on the stack. 1046 return; 1047 } 1048 1049 // No OrderAccess:store_load() is needed. It is implicit in the 1050 // CAS done in parMark(addr) above 1051 HeapWord* finger = _finger; 1052 1053 if (addr < finger) { 1054 if (!mark_stack_push(oop(addr))) { 1055 if (verbose_low()) { 1056 gclog_or_tty->print_cr("[global] global stack overflow " 1057 "during parMark"); 1058 } 1059 } 1060 } 1061 } 1062 } 1063 } 1064 1065 class CMConcurrentMarkingTask: public AbstractGangTask { 1066 private: 1067 ConcurrentMark* _cm; 1068 ConcurrentMarkThread* _cmt; 1069 1070 public: 1071 void work(int worker_i) { 1072 assert(Thread::current()->is_ConcurrentGC_thread(), 1073 "this should only be done by a conc GC thread"); 1074 ResourceMark rm; 1075 1076 double start_vtime = os::elapsedVTime(); 1077 1078 ConcurrentGCThread::stsJoin(); 1079 1080 assert((size_t) worker_i < _cm->active_tasks(), "invariant"); 1081 CMTask* the_task = _cm->task(worker_i); 1082 the_task->record_start_time(); 1083 if (!_cm->has_aborted()) { 1084 do { 1085 double start_vtime_sec = os::elapsedVTime(); 1086 double start_time_sec = os::elapsedTime(); 1087 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1088 1089 the_task->do_marking_step(mark_step_duration_ms, 1090 true /* do_stealing */, 1091 true /* do_termination */); 1092 1093 double end_time_sec = os::elapsedTime(); 1094 double end_vtime_sec = os::elapsedVTime(); 1095 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1096 double elapsed_time_sec = end_time_sec - start_time_sec; 1097 _cm->clear_has_overflown(); 1098 1099 bool ret = _cm->do_yield_check(worker_i); 1100 1101 jlong sleep_time_ms; 1102 if (!_cm->has_aborted() && the_task->has_aborted()) { 1103 sleep_time_ms = 1104 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1105 ConcurrentGCThread::stsLeave(); 1106 os::sleep(Thread::current(), sleep_time_ms, false); 1107 ConcurrentGCThread::stsJoin(); 1108 } 1109 double end_time2_sec = os::elapsedTime(); 1110 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1111 1112 #if 0 1113 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1114 "overhead %1.4lf", 1115 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1116 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1117 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1118 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1119 #endif 1120 } while (!_cm->has_aborted() && the_task->has_aborted()); 1121 } 1122 the_task->record_end_time(); 1123 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1124 1125 ConcurrentGCThread::stsLeave(); 1126 1127 double end_vtime = os::elapsedVTime(); 1128 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime); 1129 } 1130 1131 CMConcurrentMarkingTask(ConcurrentMark* cm, 1132 ConcurrentMarkThread* cmt) : 1133 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1134 1135 ~CMConcurrentMarkingTask() { } 1136 }; 1137 1138 void ConcurrentMark::markFromRoots() { 1139 // we might be tempted to assert that: 1140 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1141 // "inconsistent argument?"); 1142 // However that wouldn't be right, because it's possible that 1143 // a safepoint is indeed in progress as a younger generation 1144 // stop-the-world GC happens even as we mark in this generation. 1145 1146 _restart_for_overflow = false; 1147 1148 size_t active_workers = MAX2((size_t) 1, parallel_marking_threads()); 1149 force_overflow_conc()->init(); 1150 set_phase(active_workers, true /* concurrent */); 1151 1152 CMConcurrentMarkingTask markingTask(this, cmThread()); 1153 if (parallel_marking_threads() > 0) { 1154 _parallel_workers->run_task(&markingTask); 1155 } else { 1156 markingTask.work(0); 1157 } 1158 print_stats(); 1159 } 1160 1161 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1162 // world is stopped at this checkpoint 1163 assert(SafepointSynchronize::is_at_safepoint(), 1164 "world should be stopped"); 1165 1166 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1167 1168 // If a full collection has happened, we shouldn't do this. 1169 if (has_aborted()) { 1170 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1171 return; 1172 } 1173 1174 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1175 1176 if (VerifyDuringGC) { 1177 HandleMark hm; // handle scope 1178 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1179 Universe::heap()->prepare_for_verify(); 1180 Universe::verify(/* allow dirty */ true, 1181 /* silent */ false, 1182 /* option */ VerifyOption_G1UsePrevMarking); 1183 } 1184 1185 G1CollectorPolicy* g1p = g1h->g1_policy(); 1186 g1p->record_concurrent_mark_remark_start(); 1187 1188 double start = os::elapsedTime(); 1189 1190 checkpointRootsFinalWork(); 1191 1192 double mark_work_end = os::elapsedTime(); 1193 1194 weakRefsWork(clear_all_soft_refs); 1195 1196 if (has_overflown()) { 1197 // Oops. We overflowed. Restart concurrent marking. 1198 _restart_for_overflow = true; 1199 // Clear the flag. We do not need it any more. 1200 clear_has_overflown(); 1201 if (G1TraceMarkStackOverflow) { 1202 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1203 } 1204 } else { 1205 // Aggregate the per-task counting data that we have accumulated 1206 // while marking. 1207 aggregate_all_count_data(); 1208 1209 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1210 // We're done with marking. 1211 // This is the end of the marking cycle, we're expected all 1212 // threads to have SATB queues with active set to true. 1213 satb_mq_set.set_active_all_threads(false, /* new active value */ 1214 true /* expected_active */); 1215 1216 if (VerifyDuringGC) { 1217 HandleMark hm; // handle scope 1218 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1219 Universe::heap()->prepare_for_verify(); 1220 Universe::verify(/* allow dirty */ true, 1221 /* silent */ false, 1222 /* option */ VerifyOption_G1UseNextMarking); 1223 } 1224 assert(!restart_for_overflow(), "sanity"); 1225 } 1226 1227 // Reset the marking state if marking completed 1228 if (!restart_for_overflow()) { 1229 set_non_marking_state(); 1230 } 1231 1232 #if VERIFY_OBJS_PROCESSED 1233 _scan_obj_cl.objs_processed = 0; 1234 ThreadLocalObjQueue::objs_enqueued = 0; 1235 #endif 1236 1237 // Statistics 1238 double now = os::elapsedTime(); 1239 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1240 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1241 _remark_times.add((now - start) * 1000.0); 1242 1243 g1p->record_concurrent_mark_remark_end(); 1244 } 1245 1246 #define CARD_BM_TEST_MODE 0 1247 1248 // Used to calculate the # live objects per region 1249 // for verification purposes 1250 class CalcLiveObjectsClosure: public HeapRegionClosure { 1251 1252 CMBitMapRO* _bm; 1253 ConcurrentMark* _cm; 1254 BitMap* _region_bm; 1255 BitMap* _card_bm; 1256 1257 size_t _tot_words_done; 1258 size_t _tot_live; 1259 size_t _tot_used; 1260 1261 size_t _region_marked_bytes; 1262 1263 intptr_t _bottom_card_num; 1264 1265 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { 1266 BitMap::idx_t start_idx = start_card_num - _bottom_card_num; 1267 BitMap::idx_t last_idx = last_card_num - _bottom_card_num; 1268 1269 for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) { 1270 #if CARD_BM_TEST_MODE 1271 guarantee(_card_bm->at(i), "Should already be set."); 1272 #else 1273 _card_bm->par_at_put(i, 1); 1274 #endif 1275 } 1276 } 1277 1278 public: 1279 CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm, 1280 BitMap* region_bm, BitMap* card_bm) : 1281 _bm(bm), _cm(cm), _region_bm(region_bm), _card_bm(card_bm), 1282 _region_marked_bytes(0), _tot_words_done(0), 1283 _tot_live(0), _tot_used(0) 1284 { 1285 _bottom_card_num = 1286 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> 1287 CardTableModRefBS::card_shift); 1288 } 1289 1290 // It takes a region that's not empty (i.e., it has at least one 1291 // live object in it and sets its corresponding bit on the region 1292 // bitmap to 1. If the region is "starts humongous" it will also set 1293 // to 1 the bits on the region bitmap that correspond to its 1294 // associated "continues humongous" regions. 1295 void set_bit_for_region(HeapRegion* hr) { 1296 assert(!hr->continuesHumongous(), "should have filtered those out"); 1297 1298 size_t index = hr->hrs_index(); 1299 if (!hr->startsHumongous()) { 1300 // Normal (non-humongous) case: just set the bit. 1301 _region_bm->par_at_put((BitMap::idx_t) index, true); 1302 } else { 1303 // Starts humongous case: calculate how many regions are part of 1304 // this humongous region and then set the bit range. It might 1305 // have been a bit more efficient to look at the object that 1306 // spans these humongous regions to calculate their number from 1307 // the object's size. However, it's a good idea to calculate 1308 // this based on the metadata itself, and not the region 1309 // contents, so that this code is not aware of what goes into 1310 // the humongous regions (in case this changes in the future). 1311 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1312 size_t end_index = index + 1; 1313 while (end_index < g1h->n_regions()) { 1314 HeapRegion* chr = g1h->region_at(end_index); 1315 if (!chr->continuesHumongous()) break; 1316 end_index += 1; 1317 } 1318 _region_bm->par_at_put_range((BitMap::idx_t) index, 1319 (BitMap::idx_t) end_index, true); 1320 } 1321 } 1322 1323 bool doHeapRegion(HeapRegion* hr) { 1324 1325 if (hr->continuesHumongous()) { 1326 // We will ignore these here and process them when their 1327 // associated "starts humongous" region is processed (see 1328 // set_bit_for_heap_region()). Note that we cannot rely on their 1329 // associated "starts humongous" region to have their bit set to 1330 // 1 since, due to the region chunking in the parallel region 1331 // iteration, a "continues humongous" region might be visited 1332 // before its associated "starts humongous". 1333 return false; 1334 } 1335 1336 HeapWord* nextTop = hr->next_top_at_mark_start(); 1337 HeapWord* start = hr->bottom(); 1338 1339 assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(), 1340 "Preconditions."); 1341 1342 // Record the number of word's we'll examine. 1343 size_t words_done = (nextTop - start); 1344 1345 // Find the first marked object at or after "start". 1346 start = _bm->getNextMarkedWordAddress(start, nextTop); 1347 1348 size_t marked_bytes = 0; 1349 _region_marked_bytes = 0; 1350 1351 // Below, the term "card num" means the result of shifting an address 1352 // by the card shift -- address 0 corresponds to card number 0. One 1353 // must subtract the card num of the bottom of the heap to obtain a 1354 // card table index. 1355 1356 // The first card num of the sequence of live cards currently being 1357 // constructed. -1 ==> no sequence. 1358 intptr_t start_card_num = -1; 1359 1360 // The last card num of the sequence of live cards currently being 1361 // constructed. -1 ==> no sequence. 1362 intptr_t last_card_num = -1; 1363 1364 while (start < nextTop) { 1365 oop obj = oop(start); 1366 int obj_sz = obj->size(); 1367 1368 // The card num of the start of the current object. 1369 intptr_t obj_card_num = 1370 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); 1371 HeapWord* obj_last = start + obj_sz - 1; 1372 intptr_t obj_last_card_num = 1373 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift); 1374 1375 if (obj_card_num != last_card_num) { 1376 if (start_card_num == -1) { 1377 assert(last_card_num == -1, "Both or neither."); 1378 start_card_num = obj_card_num; 1379 } else { 1380 assert(last_card_num != -1, "Both or neither."); 1381 assert(obj_card_num >= last_card_num, "Inv"); 1382 if ((obj_card_num - last_card_num) > 1) { 1383 // Mark the last run, and start a new one. 1384 mark_card_num_range(start_card_num, last_card_num); 1385 start_card_num = obj_card_num; 1386 } 1387 } 1388 #if CARD_BM_TEST_MODE 1389 /* 1390 gclog_or_tty->print_cr("Setting bits from %d/%d.", 1391 obj_card_num - _bottom_card_num, 1392 obj_last_card_num - _bottom_card_num); 1393 */ 1394 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) { 1395 _card_bm->par_at_put(j - _bottom_card_num, 1); 1396 } 1397 #endif 1398 } 1399 // In any case, we set the last card num. 1400 last_card_num = obj_last_card_num; 1401 1402 marked_bytes += (size_t)obj_sz * HeapWordSize; 1403 1404 // Find the next marked object after this one. 1405 start = _bm->getNextMarkedWordAddress(start + 1, nextTop); 1406 } 1407 1408 // Handle the last range, if any. 1409 if (start_card_num != -1) { 1410 mark_card_num_range(start_card_num, last_card_num); 1411 } 1412 1413 // Mark the allocated-since-marking portion... 1414 HeapWord* top = hr->top(); 1415 if (nextTop < top) { 1416 start_card_num = intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift); 1417 last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift); 1418 1419 mark_card_num_range(start_card_num, last_card_num); 1420 1421 // This definitely means the region has live objects. 1422 set_bit_for_region(hr); 1423 } 1424 1425 // Update the live region bitmap. 1426 if (marked_bytes > 0) { 1427 set_bit_for_region(hr); 1428 } 1429 1430 // Set the marked bytes for the current region so that 1431 // it can be queried by a calling verificiation routine 1432 _region_marked_bytes = marked_bytes; 1433 1434 _tot_live += hr->next_live_bytes(); 1435 _tot_used += hr->used(); 1436 _tot_words_done = words_done; 1437 1438 return false; 1439 } 1440 1441 size_t region_marked_bytes() const { return _region_marked_bytes; } 1442 size_t tot_words_done() const { return _tot_words_done; } 1443 size_t tot_live() const { return _tot_live; } 1444 size_t tot_used() const { return _tot_used; } 1445 }; 1446 1447 // Aggregate the counting data that was constructed concurrently 1448 // with marking. 1449 class AddToMarkedBytesClosure: public HeapRegionClosure { 1450 ConcurrentMark* _cm; 1451 size_t _task_num; 1452 size_t _max_task_num; 1453 1454 bool _final; 1455 1456 public: 1457 AddToMarkedBytesClosure(ConcurrentMark *cm, 1458 size_t task_num, 1459 size_t max_task_num) : 1460 _cm(cm), 1461 _task_num(task_num), 1462 _max_task_num(max_task_num), 1463 _final(false) 1464 { 1465 assert(0 <= _task_num && _task_num < _max_task_num, "sanity"); 1466 if ((_max_task_num - _task_num) == 1) { 1467 // Last task 1468 _final = true; 1469 } 1470 } 1471 1472 bool doHeapRegion(HeapRegion* hr) { 1473 // Adds the value in the counted marked bytes array for 1474 // _task_num for region hr to the value cached in heap 1475 // region itself. 1476 // For the final task we also set the top at conc count 1477 // for the region. 1478 // The bits in the live region bitmap are set for regions 1479 // that contain live data during the cleanup pause. 1480 1481 if (hr->continuesHumongous()) { 1482 // We will ignore these here and process them when their 1483 // associated "starts humongous" region is processed. 1484 // Note that we cannot rely on their associated 1485 // "starts humongous" region to have their bit set to 1 1486 // since, due to the region chunking in the parallel region 1487 // iteration, a "continues humongous" region might be visited 1488 // before its associated "starts humongous". 1489 return false; 1490 } 1491 1492 int hrs_index = hr->hrs_index(); 1493 size_t* marked_bytes_array = _cm->count_marked_bytes_for(_task_num); 1494 size_t marked_bytes = marked_bytes_array[hrs_index]; 1495 hr->add_to_marked_bytes(marked_bytes); 1496 1497 if (_final) { 1498 HeapWord* ntams = hr->next_top_at_mark_start(); 1499 HeapWord* start = hr->bottom(); 1500 1501 assert(start <= ntams && ntams <= hr->top() && hr->top() <= hr->end(), 1502 "Preconditions."); 1503 1504 hr->set_top_at_conc_mark_count(ntams); 1505 } 1506 1507 return false; 1508 } 1509 }; 1510 1511 void ConcurrentMark::aggregate_all_count_data() { 1512 _card_bm.clear(); 1513 1514 // Unions the per task card bitmaps into the global card bitmap, 1515 // and aggregates the per task marked bytes for each region into 1516 // the heap region itself. 1517 1518 for (int i = 0; i < _max_task_num; i += 1) { 1519 BitMap& task_card_bm = count_card_bitmap_for(i); 1520 _card_bm.set_union(task_card_bm); 1521 1522 // Update the marked bytes for each region 1523 AddToMarkedBytesClosure cl(this, i, _max_task_num); 1524 _g1h->heap_region_iterate(&cl); 1525 } 1526 1527 // We're done with the accumulated per-task concurrent 1528 // counting data so let's clear it for the next marking. 1529 clear_all_count_data(); 1530 } 1531 1532 // Final update of count data (during cleanup). 1533 // Adds [top_at_count, NTAMS) to the marked bytes for each 1534 // region. Sets the bits in the card bitmap corresponding 1535 // to the interval [top_at_count, top], and sets the 1536 // liveness bit for each region containing live data 1537 // in the region bitmap. 1538 1539 class FinalCountDataUpdateClosure: public HeapRegionClosure { 1540 ConcurrentMark* _cm; 1541 BitMap* _region_bm; 1542 BitMap* _card_bm; 1543 intptr_t _bottom_card_num; 1544 1545 size_t _total_live_bytes; 1546 size_t _total_used_bytes; 1547 size_t _total_words_done; 1548 1549 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { 1550 BitMap::idx_t start_idx = start_card_num - _bottom_card_num; 1551 BitMap::idx_t last_idx = last_card_num - _bottom_card_num; 1552 1553 // Inclusive bit range [start_idx, last_idx]. par_at_put_range 1554 // is exclusive so we have to also set the bit for last_idx. 1555 // Passing last_idx+1 to the clear_range would work in 1556 // most cases but could trip an OOB assertion. 1557 1558 if ((last_idx - start_idx) > 0) { 1559 _card_bm->par_at_put_range(start_idx, last_idx, true); 1560 } 1561 _card_bm->par_set_bit(last_idx); 1562 } 1563 1564 // It takes a region that's not empty (i.e., it has at least one 1565 // live object in it and sets its corresponding bit on the region 1566 // bitmap to 1. If the region is "starts humongous" it will also set 1567 // to 1 the bits on the region bitmap that correspond to its 1568 // associated "continues humongous" regions. 1569 void set_bit_for_region(HeapRegion* hr) { 1570 assert(!hr->continuesHumongous(), "should have filtered those out"); 1571 1572 size_t index = hr->hrs_index(); 1573 if (!hr->startsHumongous()) { 1574 // Normal (non-humongous) case: just set the bit. 1575 _region_bm->par_set_bit((BitMap::idx_t) index); 1576 } else { 1577 // Starts humongous case: calculate how many regions are part of 1578 // this humongous region and then set the bit range. It might 1579 // have been a bit more efficient to look at the object that 1580 // spans these humongous regions to calculate their number from 1581 // the object's size. However, it's a good idea to calculate 1582 // this based on the metadata itself, and not the region 1583 // contents, so that this code is not aware of what goes into 1584 // the humongous regions (in case this changes in the future). 1585 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1586 size_t end_index = index + 1; 1587 while (end_index < g1h->n_regions()) { 1588 HeapRegion* chr = g1h->region_at(end_index); 1589 if (!chr->continuesHumongous()) break; 1590 end_index += 1; 1591 } 1592 _region_bm->par_at_put_range((BitMap::idx_t) index, 1593 (BitMap::idx_t) end_index, true); 1594 } 1595 } 1596 1597 public: 1598 FinalCountDataUpdateClosure(ConcurrentMark* cm, 1599 BitMap* region_bm, 1600 BitMap* card_bm) : 1601 _cm(cm), _region_bm(region_bm), _card_bm(card_bm), 1602 _total_words_done(0), _total_live_bytes(0), _total_used_bytes(0) 1603 { 1604 _bottom_card_num = 1605 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> 1606 CardTableModRefBS::card_shift); 1607 } 1608 1609 bool doHeapRegion(HeapRegion* hr) { 1610 1611 if (hr->continuesHumongous()) { 1612 // We will ignore these here and process them when their 1613 // associated "starts humongous" region is processed (see 1614 // set_bit_for_heap_region()). Note that we cannot rely on their 1615 // associated "starts humongous" region to have their bit set to 1616 // 1 since, due to the region chunking in the parallel region 1617 // iteration, a "continues humongous" region might be visited 1618 // before its associated "starts humongous". 1619 return false; 1620 } 1621 1622 HeapWord* start = hr->top_at_conc_mark_count(); 1623 HeapWord* ntams = hr->next_top_at_mark_start(); 1624 HeapWord* top = hr->top(); 1625 1626 assert(hr->bottom() <= start && start <= hr->end() && 1627 hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1628 1629 size_t words_done = ntams - hr->bottom(); 1630 1631 intptr_t start_card_num = intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); 1632 intptr_t last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift); 1633 1634 1635 if (start < ntams) { 1636 // Region was changed between remark and cleanup pauses 1637 // We need to add (ntams - start) to the marked bytes 1638 // for this region, and set bits for the range 1639 // [ card_num(start), card_num(ntams) ) in the 1640 // card bitmap. 1641 size_t live_bytes = (ntams - start) * HeapWordSize; 1642 hr->add_to_marked_bytes(live_bytes); 1643 1644 // Record the new top at conc count 1645 hr->set_top_at_conc_mark_count(ntams); 1646 1647 // The setting of the bits card bitmap takes place below 1648 } 1649 1650 // Mark the allocated-since-marking portion... 1651 if (ntams < top) { 1652 // This definitely means the region has live objects. 1653 set_bit_for_region(hr); 1654 } 1655 1656 // Now set the bits for [start, top] 1657 mark_card_num_range(start_card_num, last_card_num); 1658 1659 // Set the bit for the region if it contains live data 1660 if (hr->next_marked_bytes() > 0) { 1661 set_bit_for_region(hr); 1662 } 1663 1664 _total_words_done += words_done; 1665 _total_used_bytes += hr->used(); 1666 _total_live_bytes += hr->next_marked_bytes(); 1667 1668 return false; 1669 } 1670 1671 size_t total_words_done() const { return _total_words_done; } 1672 size_t total_live_bytes() const { return _total_live_bytes; } 1673 size_t total_used_bytes() const { return _total_used_bytes; } 1674 }; 1675 1676 // Heap region closure used for verifying the counting data 1677 // that was accumulated concurrently and aggregated during 1678 // the remark pause. This closure is applied to the heap 1679 // regions during the STW cleanup pause. 1680 1681 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1682 ConcurrentMark* _cm; 1683 CalcLiveObjectsClosure _calc_cl; 1684 BitMap* _region_bm; // Region BM to be verified 1685 BitMap* _card_bm; // Card BM to be verified 1686 bool _verbose; // verbose output? 1687 1688 BitMap* _exp_region_bm; // Expected Region BM values 1689 BitMap* _exp_card_bm; // Expected card BM values 1690 1691 intptr_t _bottom_card_num; // Used for calculatint bitmap indices 1692 1693 int _failures; 1694 1695 public: 1696 VerifyLiveObjectDataHRClosure(ConcurrentMark* cm, 1697 BitMap* region_bm, 1698 BitMap* card_bm, 1699 BitMap* exp_region_bm, 1700 BitMap* exp_card_bm, 1701 bool verbose) : 1702 _cm(cm), 1703 _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm), 1704 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1705 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1706 _failures(0) 1707 { 1708 _bottom_card_num = 1709 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> 1710 CardTableModRefBS::card_shift); 1711 } 1712 1713 int failures() const { return _failures; } 1714 1715 bool doHeapRegion(HeapRegion* hr) { 1716 if (hr->continuesHumongous()) { 1717 // We will ignore these here and process them when their 1718 // associated "starts humongous" region is processed (see 1719 // set_bit_for_heap_region()). Note that we cannot rely on their 1720 // associated "starts humongous" region to have their bit set to 1721 // 1 since, due to the region chunking in the parallel region 1722 // iteration, a "continues humongous" region might be visited 1723 // before its associated "starts humongous". 1724 return false; 1725 } 1726 1727 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1728 // this region and set the corresponding bits in the expected region 1729 // and card bitmaps. 1730 bool res = _calc_cl.doHeapRegion(hr); 1731 assert(res == false, "should be continuing"); 1732 1733 // Note that the calculated count data could be a subset of the 1734 // count data that was accumlated during marking. See the comment 1735 // in G1ParCopyHelper::copy_to_survivor space for an explanation 1736 // why. 1737 1738 if (_verbose) { 1739 gclog_or_tty->print("Region %d: bottom: "PTR_FORMAT", ntams: " 1740 PTR_FORMAT", top: "PTR_FORMAT", end: "PTR_FORMAT, 1741 hr->hrs_index(), hr->bottom(), hr->next_top_at_mark_start(), 1742 hr->top(), hr->end()); 1743 gclog_or_tty->print_cr(", marked_bytes: calc/actual "SIZE_FORMAT"/"SIZE_FORMAT, 1744 _calc_cl.region_marked_bytes(), 1745 hr->next_marked_bytes()); 1746 } 1747 1748 // Verify that _top_at_conc_count == ntams 1749 if (hr->top_at_conc_mark_count() != hr->next_top_at_mark_start()) { 1750 if (_verbose) { 1751 gclog_or_tty->print_cr("Region %d: top at conc count incorrect: expected " 1752 PTR_FORMAT", actual: "PTR_FORMAT, 1753 hr->hrs_index(), hr->next_top_at_mark_start(), 1754 hr->top_at_conc_mark_count()); 1755 } 1756 _failures += 1; 1757 } 1758 1759 // Verify the marked bytes for this region. 1760 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1761 size_t act_marked_bytes = hr->next_marked_bytes(); 1762 1763 // We're OK if actual marked bytes >= expected. 1764 if (exp_marked_bytes > act_marked_bytes) { 1765 if (_verbose) { 1766 gclog_or_tty->print_cr("Region %d: marked bytes mismatch: expected: " 1767 SIZE_FORMAT", actual: "SIZE_FORMAT, 1768 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1769 } 1770 _failures += 1; 1771 } 1772 1773 // Verify the bit, for this region, in the actual and expected 1774 // (which was just calculated) region bit maps. 1775 // We're not OK if the expected bit is set and the actual is not set. 1776 BitMap::idx_t index = (BitMap::idx_t)hr->hrs_index(); 1777 1778 bool expected = _exp_region_bm->at(index); 1779 bool actual = _region_bm->at(index); 1780 if (expected && !actual) { 1781 if (_verbose) { 1782 gclog_or_tty->print_cr("Region %d: region bitmap mismatch: expected: %d, actual: %d", 1783 hr->hrs_index(), expected, actual); 1784 } 1785 _failures += 1; 1786 } 1787 1788 // Verify that the card bit maps for the cards spanned by the current 1789 // region match. The set of offsets that have set bits in the expected 1790 // bitmap should be a subset of the offsets with set bits from the actual 1791 // calculated card bitmap. 1792 // Again it's more important that if the expected bit is set then the 1793 // actual bit be set. 1794 intptr_t start_card_num = 1795 intptr_t(uintptr_t(hr->bottom()) >> CardTableModRefBS::card_shift); 1796 intptr_t top_card_num = 1797 intptr_t(uintptr_t(hr->top()) >> CardTableModRefBS::card_shift); 1798 1799 BitMap::idx_t start_idx = start_card_num - _bottom_card_num; 1800 BitMap::idx_t end_idx = top_card_num - _bottom_card_num; 1801 1802 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1803 expected = _exp_card_bm->at(i); 1804 actual = _card_bm->at(i); 1805 1806 if (expected && !actual) { 1807 if (_verbose) { 1808 gclog_or_tty->print_cr("Region %d: card bitmap mismatch at idx %d: expected: %d, actual: %d", 1809 hr->hrs_index(), i, expected, actual); 1810 } 1811 _failures += 1; 1812 } 1813 } 1814 if (_failures) { 1815 // Stop iteration? 1816 return true; 1817 } 1818 1819 return false; 1820 } 1821 }; 1822 1823 class Mux2HRClosure: public HeapRegionClosure { 1824 HeapRegionClosure* _cl1; 1825 HeapRegionClosure* _cl2; 1826 1827 public: 1828 Mux2HRClosure(HeapRegionClosure *c1, HeapRegionClosure *c2) : _cl1(c1), _cl2(c2) { } 1829 bool doHeapRegion(HeapRegion* hr) { 1830 bool res1 = _cl1->doHeapRegion(hr); 1831 bool res2 = _cl2->doHeapRegion(hr); 1832 1833 // Only continue if both return false; 1834 return res1 || res2; 1835 } 1836 }; 1837 1838 class G1ParFinalCountTask: public AbstractGangTask { 1839 protected: 1840 G1CollectedHeap* _g1h; 1841 CMBitMap* _bm; 1842 size_t _n_workers; 1843 size_t *_live_bytes; 1844 size_t *_used_bytes; 1845 1846 BitMap* _actual_region_bm; 1847 BitMap* _actual_card_bm; 1848 1849 BitMap _expected_region_bm; 1850 BitMap _expected_card_bm; 1851 1852 int _failures; 1853 1854 public: 1855 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm, 1856 BitMap* region_bm, BitMap* card_bm) 1857 : AbstractGangTask("G1 final counting"), 1858 _g1h(g1h), _bm(bm), 1859 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1860 _expected_region_bm(0, false), _expected_card_bm(0, false), 1861 _failures(0) 1862 { 1863 if (ParallelGCThreads > 0) { 1864 _n_workers = _g1h->workers()->total_workers(); 1865 } else { 1866 _n_workers = 1; 1867 } 1868 1869 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1870 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1871 1872 if (VerifyDuringGC) { 1873 _expected_card_bm.resize(_actual_card_bm->size(), false); 1874 _expected_region_bm.resize(_actual_region_bm->size(), false); 1875 } 1876 } 1877 1878 ~G1ParFinalCountTask() { 1879 if (VerifyDuringGC) { 1880 _expected_region_bm.resize(0); 1881 _expected_card_bm.resize(0); 1882 } 1883 FREE_C_HEAP_ARRAY(size_t, _live_bytes); 1884 FREE_C_HEAP_ARRAY(size_t, _used_bytes); 1885 } 1886 1887 void work(int i) { 1888 1889 FinalCountDataUpdateClosure final_update_cl(_g1h->concurrent_mark(), 1890 _actual_region_bm, _actual_card_bm); 1891 1892 VerifyLiveObjectDataHRClosure verify_cl(_g1h->concurrent_mark(), 1893 _actual_region_bm, _actual_card_bm, 1894 &_expected_region_bm, 1895 &_expected_card_bm, 1896 true /* verbose */); 1897 1898 Mux2HRClosure update_and_verify_cl(&final_update_cl, &verify_cl); 1899 1900 HeapRegionClosure* hr_cl = &final_update_cl; 1901 if (VerifyDuringGC) { 1902 hr_cl = &update_and_verify_cl; 1903 } 1904 1905 if (G1CollectedHeap::use_parallel_gc_threads()) { 1906 _g1h->heap_region_par_iterate_chunked(hr_cl, i, 1907 HeapRegion::FinalCountClaimValue); 1908 } else { 1909 _g1h->heap_region_iterate(hr_cl); 1910 } 1911 1912 assert((size_t) i < _n_workers, "invariant"); 1913 _live_bytes[i] = final_update_cl.total_live_bytes(); 1914 _used_bytes[i] = final_update_cl.total_used_bytes(); 1915 1916 if (VerifyDuringGC) { 1917 _failures += verify_cl.failures(); 1918 } 1919 } 1920 1921 size_t live_bytes() { 1922 size_t live_bytes = 0; 1923 for (size_t i = 0; i < _n_workers; ++i) 1924 live_bytes += _live_bytes[i]; 1925 return live_bytes; 1926 } 1927 1928 size_t used_bytes() { 1929 size_t used_bytes = 0; 1930 for (size_t i = 0; i < _n_workers; ++i) 1931 used_bytes += _used_bytes[i]; 1932 return used_bytes; 1933 } 1934 1935 int failures() const { return _failures; } 1936 }; 1937 1938 class G1ParNoteEndTask; 1939 1940 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1941 G1CollectedHeap* _g1; 1942 int _worker_num; 1943 size_t _max_live_bytes; 1944 size_t _regions_claimed; 1945 size_t _freed_bytes; 1946 FreeRegionList* _local_cleanup_list; 1947 HumongousRegionSet* _humongous_proxy_set; 1948 HRRSCleanupTask* _hrrs_cleanup_task; 1949 double _claimed_region_time; 1950 double _max_region_time; 1951 1952 public: 1953 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1954 int worker_num, 1955 FreeRegionList* local_cleanup_list, 1956 HumongousRegionSet* humongous_proxy_set, 1957 HRRSCleanupTask* hrrs_cleanup_task); 1958 size_t freed_bytes() { return _freed_bytes; } 1959 1960 bool doHeapRegion(HeapRegion *r); 1961 1962 size_t max_live_bytes() { return _max_live_bytes; } 1963 size_t regions_claimed() { return _regions_claimed; } 1964 double claimed_region_time_sec() { return _claimed_region_time; } 1965 double max_region_time_sec() { return _max_region_time; } 1966 }; 1967 1968 class G1ParNoteEndTask: public AbstractGangTask { 1969 friend class G1NoteEndOfConcMarkClosure; 1970 1971 protected: 1972 G1CollectedHeap* _g1h; 1973 size_t _max_live_bytes; 1974 size_t _freed_bytes; 1975 FreeRegionList* _cleanup_list; 1976 1977 public: 1978 G1ParNoteEndTask(G1CollectedHeap* g1h, 1979 FreeRegionList* cleanup_list) : 1980 AbstractGangTask("G1 note end"), _g1h(g1h), 1981 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1982 1983 void work(int i) { 1984 double start = os::elapsedTime(); 1985 FreeRegionList local_cleanup_list("Local Cleanup List"); 1986 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1987 HRRSCleanupTask hrrs_cleanup_task; 1988 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list, 1989 &humongous_proxy_set, 1990 &hrrs_cleanup_task); 1991 if (G1CollectedHeap::use_parallel_gc_threads()) { 1992 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, 1993 HeapRegion::NoteEndClaimValue); 1994 } else { 1995 _g1h->heap_region_iterate(&g1_note_end); 1996 } 1997 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1998 1999 // Now update the lists 2000 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 2001 NULL /* free_list */, 2002 &humongous_proxy_set, 2003 true /* par */); 2004 { 2005 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 2006 _max_live_bytes += g1_note_end.max_live_bytes(); 2007 _freed_bytes += g1_note_end.freed_bytes(); 2008 2009 // If we iterate over the global cleanup list at the end of 2010 // cleanup to do this printing we will not guarantee to only 2011 // generate output for the newly-reclaimed regions (the list 2012 // might not be empty at the beginning of cleanup; we might 2013 // still be working on its previous contents). So we do the 2014 // printing here, before we append the new regions to the global 2015 // cleanup list. 2016 2017 G1HRPrinter* hr_printer = _g1h->hr_printer(); 2018 if (hr_printer->is_active()) { 2019 HeapRegionLinkedListIterator iter(&local_cleanup_list); 2020 while (iter.more_available()) { 2021 HeapRegion* hr = iter.get_next(); 2022 hr_printer->cleanup(hr); 2023 } 2024 } 2025 2026 _cleanup_list->add_as_tail(&local_cleanup_list); 2027 assert(local_cleanup_list.is_empty(), "post-condition"); 2028 2029 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 2030 } 2031 double end = os::elapsedTime(); 2032 if (G1PrintParCleanupStats) { 2033 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] " 2034 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n", 2035 i, start, end, (end-start)*1000.0, 2036 g1_note_end.regions_claimed(), 2037 g1_note_end.claimed_region_time_sec()*1000.0, 2038 g1_note_end.max_region_time_sec()*1000.0); 2039 } 2040 } 2041 size_t max_live_bytes() { return _max_live_bytes; } 2042 size_t freed_bytes() { return _freed_bytes; } 2043 }; 2044 2045 class G1ParScrubRemSetTask: public AbstractGangTask { 2046 protected: 2047 G1RemSet* _g1rs; 2048 BitMap* _region_bm; 2049 BitMap* _card_bm; 2050 public: 2051 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 2052 BitMap* region_bm, BitMap* card_bm) : 2053 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 2054 _region_bm(region_bm), _card_bm(card_bm) 2055 {} 2056 2057 void work(int i) { 2058 if (G1CollectedHeap::use_parallel_gc_threads()) { 2059 _g1rs->scrub_par(_region_bm, _card_bm, i, 2060 HeapRegion::ScrubRemSetClaimValue); 2061 } else { 2062 _g1rs->scrub(_region_bm, _card_bm); 2063 } 2064 } 2065 2066 }; 2067 2068 G1NoteEndOfConcMarkClosure:: 2069 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 2070 int worker_num, 2071 FreeRegionList* local_cleanup_list, 2072 HumongousRegionSet* humongous_proxy_set, 2073 HRRSCleanupTask* hrrs_cleanup_task) 2074 : _g1(g1), _worker_num(worker_num), 2075 _max_live_bytes(0), _regions_claimed(0), 2076 _freed_bytes(0), 2077 _claimed_region_time(0.0), _max_region_time(0.0), 2078 _local_cleanup_list(local_cleanup_list), 2079 _humongous_proxy_set(humongous_proxy_set), 2080 _hrrs_cleanup_task(hrrs_cleanup_task) { } 2081 2082 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) { 2083 // We use a claim value of zero here because all regions 2084 // were claimed with value 1 in the FinalCount task. 2085 hr->reset_gc_time_stamp(); 2086 if (!hr->continuesHumongous()) { 2087 double start = os::elapsedTime(); 2088 _regions_claimed++; 2089 hr->note_end_of_marking(); 2090 _max_live_bytes += hr->max_live_bytes(); 2091 _g1->free_region_if_empty(hr, 2092 &_freed_bytes, 2093 _local_cleanup_list, 2094 _humongous_proxy_set, 2095 _hrrs_cleanup_task, 2096 true /* par */); 2097 double region_time = (os::elapsedTime() - start); 2098 _claimed_region_time += region_time; 2099 if (region_time > _max_region_time) { 2100 _max_region_time = region_time; 2101 } 2102 } 2103 return false; 2104 } 2105 2106 void ConcurrentMark::cleanup() { 2107 // world is stopped at this checkpoint 2108 assert(SafepointSynchronize::is_at_safepoint(), 2109 "world should be stopped"); 2110 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2111 2112 // If a full collection has happened, we shouldn't do this. 2113 if (has_aborted()) { 2114 g1h->set_marking_complete(); // So bitmap clearing isn't confused 2115 return; 2116 } 2117 2118 g1h->verify_region_sets_optional(); 2119 2120 if (VerifyDuringGC) { 2121 HandleMark hm; // handle scope 2122 gclog_or_tty->print(" VerifyDuringGC:(before)"); 2123 Universe::heap()->prepare_for_verify(); 2124 Universe::verify(/* allow dirty */ true, 2125 /* silent */ false, 2126 /* option */ VerifyOption_G1UsePrevMarking); 2127 } 2128 2129 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 2130 g1p->record_concurrent_mark_cleanup_start(); 2131 2132 double start = os::elapsedTime(); 2133 2134 HeapRegionRemSet::reset_for_cleanup_tasks(); 2135 2136 // Clear the global region bitmap - it will be filled as part 2137 // of the final counting task. 2138 _region_bm.clear(); 2139 2140 // Do counting once more with the world stopped for good measure. 2141 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), 2142 &_region_bm, &_card_bm); 2143 2144 if (G1CollectedHeap::use_parallel_gc_threads()) { 2145 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 2146 "sanity check"); 2147 2148 int n_workers = g1h->workers()->total_workers(); 2149 g1h->set_par_threads(n_workers); 2150 g1h->workers()->run_task(&g1_par_count_task); 2151 g1h->set_par_threads(0); 2152 2153 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 2154 "sanity check"); 2155 } else { 2156 g1_par_count_task.work(0); 2157 } 2158 2159 // Verify that there were no verification failures of 2160 // the live counting data. 2161 if (VerifyDuringGC) { 2162 assert(g1_par_count_task.failures() == 0, "Unexpected failures"); 2163 } 2164 2165 size_t known_garbage_bytes = 2166 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes(); 2167 g1p->set_known_garbage_bytes(known_garbage_bytes); 2168 2169 size_t start_used_bytes = g1h->used(); 2170 _at_least_one_mark_complete = true; 2171 g1h->set_marking_complete(); 2172 2173 ergo_verbose4(ErgoConcCycles, 2174 "finish cleanup", 2175 ergo_format_byte("occupancy") 2176 ergo_format_byte("capacity") 2177 ergo_format_byte_perc("known garbage"), 2178 start_used_bytes, g1h->capacity(), 2179 known_garbage_bytes, 2180 ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0); 2181 2182 double count_end = os::elapsedTime(); 2183 double this_final_counting_time = (count_end - start); 2184 if (G1PrintParCleanupStats) { 2185 gclog_or_tty->print_cr("Cleanup:"); 2186 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms", 2187 this_final_counting_time*1000.0); 2188 } 2189 _total_counting_time += this_final_counting_time; 2190 2191 if (G1PrintRegionLivenessInfo) { 2192 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 2193 _g1h->heap_region_iterate(&cl); 2194 } 2195 2196 // Install newly created mark bitMap as "prev". 2197 swapMarkBitMaps(); 2198 2199 g1h->reset_gc_time_stamp(); 2200 2201 // Note end of marking in all heap regions. 2202 double note_end_start = os::elapsedTime(); 2203 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 2204 if (G1CollectedHeap::use_parallel_gc_threads()) { 2205 int n_workers = g1h->workers()->total_workers(); 2206 g1h->set_par_threads(n_workers); 2207 g1h->workers()->run_task(&g1_par_note_end_task); 2208 g1h->set_par_threads(0); 2209 2210 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 2211 "sanity check"); 2212 } else { 2213 g1_par_note_end_task.work(0); 2214 } 2215 2216 if (!cleanup_list_is_empty()) { 2217 // The cleanup list is not empty, so we'll have to process it 2218 // concurrently. Notify anyone else that might be wanting free 2219 // regions that there will be more free regions coming soon. 2220 g1h->set_free_regions_coming(); 2221 } 2222 double note_end_end = os::elapsedTime(); 2223 if (G1PrintParCleanupStats) { 2224 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.", 2225 (note_end_end - note_end_start)*1000.0); 2226 } 2227 2228 2229 // call below, since it affects the metric by which we sort the heap 2230 // regions. 2231 if (G1ScrubRemSets) { 2232 double rs_scrub_start = os::elapsedTime(); 2233 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 2234 if (G1CollectedHeap::use_parallel_gc_threads()) { 2235 int n_workers = g1h->workers()->total_workers(); 2236 g1h->set_par_threads(n_workers); 2237 g1h->workers()->run_task(&g1_par_scrub_rs_task); 2238 g1h->set_par_threads(0); 2239 2240 assert(g1h->check_heap_region_claim_values( 2241 HeapRegion::ScrubRemSetClaimValue), 2242 "sanity check"); 2243 } else { 2244 g1_par_scrub_rs_task.work(0); 2245 } 2246 2247 double rs_scrub_end = os::elapsedTime(); 2248 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 2249 _total_rs_scrub_time += this_rs_scrub_time; 2250 } 2251 2252 // this will also free any regions totally full of garbage objects, 2253 // and sort the regions. 2254 g1h->g1_policy()->record_concurrent_mark_cleanup_end( 2255 g1_par_note_end_task.freed_bytes(), 2256 g1_par_note_end_task.max_live_bytes()); 2257 2258 // Statistics. 2259 double end = os::elapsedTime(); 2260 _cleanup_times.add((end - start) * 1000.0); 2261 2262 // G1CollectedHeap::heap()->print(); 2263 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d", 2264 // G1CollectedHeap::heap()->get_gc_time_stamp()); 2265 2266 if (PrintGC || PrintGCDetails) { 2267 g1h->print_size_transition(gclog_or_tty, 2268 start_used_bytes, 2269 g1h->used(), 2270 g1h->capacity()); 2271 } 2272 2273 size_t cleaned_up_bytes = start_used_bytes - g1h->used(); 2274 g1p->decrease_known_garbage_bytes(cleaned_up_bytes); 2275 2276 // Clean up will have freed any regions completely full of garbage. 2277 // Update the soft reference policy with the new heap occupancy. 2278 Universe::update_heap_info_at_gc(); 2279 2280 // We need to make this be a "collection" so any collection pause that 2281 // races with it goes around and waits for completeCleanup to finish. 2282 g1h->increment_total_collections(); 2283 2284 if (VerifyDuringGC) { 2285 HandleMark hm; // handle scope 2286 gclog_or_tty->print(" VerifyDuringGC:(after)"); 2287 Universe::heap()->prepare_for_verify(); 2288 Universe::verify(/* allow dirty */ true, 2289 /* silent */ false, 2290 /* option */ VerifyOption_G1UsePrevMarking); 2291 } 2292 2293 g1h->verify_region_sets_optional(); 2294 } 2295 2296 void ConcurrentMark::completeCleanup() { 2297 if (has_aborted()) return; 2298 2299 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2300 2301 _cleanup_list.verify_optional(); 2302 FreeRegionList tmp_free_list("Tmp Free List"); 2303 2304 if (G1ConcRegionFreeingVerbose) { 2305 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2306 "cleanup list has "SIZE_FORMAT" entries", 2307 _cleanup_list.length()); 2308 } 2309 2310 // Noone else should be accessing the _cleanup_list at this point, 2311 // so it's not necessary to take any locks 2312 while (!_cleanup_list.is_empty()) { 2313 HeapRegion* hr = _cleanup_list.remove_head(); 2314 assert(hr != NULL, "the list was not empty"); 2315 hr->par_clear(); 2316 tmp_free_list.add_as_tail(hr); 2317 2318 // Instead of adding one region at a time to the secondary_free_list, 2319 // we accumulate them in the local list and move them a few at a 2320 // time. This also cuts down on the number of notify_all() calls 2321 // we do during this process. We'll also append the local list when 2322 // _cleanup_list is empty (which means we just removed the last 2323 // region from the _cleanup_list). 2324 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2325 _cleanup_list.is_empty()) { 2326 if (G1ConcRegionFreeingVerbose) { 2327 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2328 "appending "SIZE_FORMAT" entries to the " 2329 "secondary_free_list, clean list still has " 2330 SIZE_FORMAT" entries", 2331 tmp_free_list.length(), 2332 _cleanup_list.length()); 2333 } 2334 2335 { 2336 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2337 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 2338 SecondaryFreeList_lock->notify_all(); 2339 } 2340 2341 if (G1StressConcRegionFreeing) { 2342 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2343 os::sleep(Thread::current(), (jlong) 1, false); 2344 } 2345 } 2346 } 2347 } 2348 assert(tmp_free_list.is_empty(), "post-condition"); 2349 } 2350 2351 // Support closures for reference procssing in G1 2352 2353 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2354 HeapWord* addr = (HeapWord*)obj; 2355 return addr != NULL && 2356 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2357 } 2358 2359 class G1CMKeepAliveClosure: public OopClosure { 2360 G1CollectedHeap* _g1; 2361 ConcurrentMark* _cm; 2362 CMBitMap* _bitMap; 2363 public: 2364 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm, 2365 CMBitMap* bitMap) : 2366 _g1(g1), _cm(cm), 2367 _bitMap(bitMap) 2368 { 2369 assert(Thread::current()->is_VM_thread(), "otherwise fix worker id"); 2370 } 2371 2372 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2373 virtual void do_oop( oop* p) { do_oop_work(p); } 2374 2375 template <class T> void do_oop_work(T* p) { 2376 oop obj = oopDesc::load_decode_heap_oop(p); 2377 HeapWord* addr = (HeapWord*)obj; 2378 2379 if (_cm->verbose_high()) { 2380 gclog_or_tty->print_cr("\t[0] we're looking at location " 2381 "*"PTR_FORMAT" = "PTR_FORMAT, 2382 p, (void*) obj); 2383 } 2384 2385 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) { 2386 _bitMap->mark(addr); 2387 // Update the task specific count data for obj 2388 _cm->add_to_count_data_for(obj, 0 /* worker_i */); 2389 2390 _cm->mark_stack_push(obj); 2391 } 2392 } 2393 }; 2394 2395 class G1CMDrainMarkingStackClosure: public VoidClosure { 2396 CMMarkStack* _markStack; 2397 CMBitMap* _bitMap; 2398 G1CMKeepAliveClosure* _oopClosure; 2399 public: 2400 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack, 2401 G1CMKeepAliveClosure* oopClosure) : 2402 _bitMap(bitMap), 2403 _markStack(markStack), 2404 _oopClosure(oopClosure) 2405 {} 2406 2407 void do_void() { 2408 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false); 2409 } 2410 }; 2411 2412 // 'Keep Alive' closure used by parallel reference processing. 2413 // An instance of this closure is used in the parallel reference processing 2414 // code rather than an instance of G1CMKeepAliveClosure. We could have used 2415 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are 2416 // placed on to discovered ref lists once so we can mark and push with no 2417 // need to check whether the object has already been marked. Using the 2418 // G1CMKeepAliveClosure would mean, however, having all the worker threads 2419 // operating on the global mark stack. This means that an individual 2420 // worker would be doing lock-free pushes while it processes its own 2421 // discovered ref list followed by drain call. If the discovered ref lists 2422 // are unbalanced then this could cause interference with the other 2423 // workers. Using a CMTask (and its embedded local data structures) 2424 // avoids that potential interference. 2425 class G1CMParKeepAliveAndDrainClosure: public OopClosure { 2426 ConcurrentMark* _cm; 2427 CMTask* _task; 2428 CMBitMap* _bitMap; 2429 int _ref_counter_limit; 2430 int _ref_counter; 2431 public: 2432 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, 2433 CMTask* task, 2434 CMBitMap* bitMap) : 2435 _cm(cm), _task(task), _bitMap(bitMap), 2436 _ref_counter_limit(G1RefProcDrainInterval) 2437 { 2438 assert(_ref_counter_limit > 0, "sanity"); 2439 _ref_counter = _ref_counter_limit; 2440 } 2441 2442 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2443 virtual void do_oop( oop* p) { do_oop_work(p); } 2444 2445 template <class T> void do_oop_work(T* p) { 2446 if (!_cm->has_overflown()) { 2447 oop obj = oopDesc::load_decode_heap_oop(p); 2448 if (_cm->verbose_high()) { 2449 gclog_or_tty->print_cr("\t[%d] we're looking at location " 2450 "*"PTR_FORMAT" = "PTR_FORMAT, 2451 _task->task_id(), p, (void*) obj); 2452 } 2453 2454 _task->deal_with_reference(obj); 2455 _ref_counter--; 2456 2457 if (_ref_counter == 0) { 2458 // We have dealt with _ref_counter_limit references, pushing them and objects 2459 // reachable from them on to the local stack (and possibly the global stack). 2460 // Call do_marking_step() to process these entries. We call the routine in a 2461 // loop, which we'll exit if there's nothing more to do (i.e. we're done 2462 // with the entries that we've pushed as a result of the deal_with_reference 2463 // calls above) or we overflow. 2464 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2465 // while there may still be some work to do. (See the comment at the 2466 // beginning of CMTask::do_marking_step() for those conditions - one of which 2467 // is reaching the specified time target.) It is only when 2468 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2469 // that the marking has completed. 2470 do { 2471 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2472 _task->do_marking_step(mark_step_duration_ms, 2473 false /* do_stealing */, 2474 false /* do_termination */); 2475 } while (_task->has_aborted() && !_cm->has_overflown()); 2476 _ref_counter = _ref_counter_limit; 2477 } 2478 } else { 2479 if (_cm->verbose_high()) { 2480 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id()); 2481 } 2482 } 2483 } 2484 }; 2485 2486 class G1CMParDrainMarkingStackClosure: public VoidClosure { 2487 ConcurrentMark* _cm; 2488 CMTask* _task; 2489 public: 2490 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) : 2491 _cm(cm), _task(task) 2492 {} 2493 2494 void do_void() { 2495 do { 2496 if (_cm->verbose_high()) { 2497 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", 2498 _task->task_id()); 2499 } 2500 2501 // We call CMTask::do_marking_step() to completely drain the local and 2502 // global marking stacks. The routine is called in a loop, which we'll 2503 // exit if there's nothing more to do (i.e. we'completely drained the 2504 // entries that were pushed as a result of applying the 2505 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref 2506 // lists above) or we overflow the global marking stack. 2507 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2508 // while there may still be some work to do. (See the comment at the 2509 // beginning of CMTask::do_marking_step() for those conditions - one of which 2510 // is reaching the specified time target.) It is only when 2511 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2512 // that the marking has completed. 2513 2514 _task->do_marking_step(1000000000.0 /* something very large */, 2515 true /* do_stealing */, 2516 true /* do_termination */); 2517 } while (_task->has_aborted() && !_cm->has_overflown()); 2518 } 2519 }; 2520 2521 // Implementation of AbstractRefProcTaskExecutor for parallel 2522 // reference processing at the end of G1 concurrent marking 2523 2524 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2525 private: 2526 G1CollectedHeap* _g1h; 2527 ConcurrentMark* _cm; 2528 CMBitMap* _bitmap; 2529 WorkGang* _workers; 2530 int _active_workers; 2531 2532 public: 2533 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2534 ConcurrentMark* cm, 2535 CMBitMap* bitmap, 2536 WorkGang* workers, 2537 int n_workers) : 2538 _g1h(g1h), _cm(cm), _bitmap(bitmap), 2539 _workers(workers), _active_workers(n_workers) 2540 { } 2541 2542 // Executes the given task using concurrent marking worker threads. 2543 virtual void execute(ProcessTask& task); 2544 virtual void execute(EnqueueTask& task); 2545 }; 2546 2547 class G1CMRefProcTaskProxy: public AbstractGangTask { 2548 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2549 ProcessTask& _proc_task; 2550 G1CollectedHeap* _g1h; 2551 ConcurrentMark* _cm; 2552 CMBitMap* _bitmap; 2553 2554 public: 2555 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2556 G1CollectedHeap* g1h, 2557 ConcurrentMark* cm, 2558 CMBitMap* bitmap) : 2559 AbstractGangTask("Process reference objects in parallel"), 2560 _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap) 2561 {} 2562 2563 virtual void work(int i) { 2564 CMTask* marking_task = _cm->task(i); 2565 G1CMIsAliveClosure g1_is_alive(_g1h); 2566 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap); 2567 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task); 2568 2569 _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2570 } 2571 }; 2572 2573 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2574 assert(_workers != NULL, "Need parallel worker threads."); 2575 2576 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap); 2577 2578 // We need to reset the phase for each task execution so that 2579 // the termination protocol of CMTask::do_marking_step works. 2580 _cm->set_phase(_active_workers, false /* concurrent */); 2581 _g1h->set_par_threads(_active_workers); 2582 _workers->run_task(&proc_task_proxy); 2583 _g1h->set_par_threads(0); 2584 } 2585 2586 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2587 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2588 EnqueueTask& _enq_task; 2589 2590 public: 2591 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2592 AbstractGangTask("Enqueue reference objects in parallel"), 2593 _enq_task(enq_task) 2594 { } 2595 2596 virtual void work(int i) { 2597 _enq_task.work(i); 2598 } 2599 }; 2600 2601 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2602 assert(_workers != NULL, "Need parallel worker threads."); 2603 2604 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2605 2606 _g1h->set_par_threads(_active_workers); 2607 _workers->run_task(&enq_task_proxy); 2608 _g1h->set_par_threads(0); 2609 } 2610 2611 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2612 ResourceMark rm; 2613 HandleMark hm; 2614 2615 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2616 2617 // Is alive closure. 2618 G1CMIsAliveClosure g1_is_alive(g1h); 2619 2620 // Inner scope to exclude the cleaning of the string and symbol 2621 // tables from the displayed time. 2622 { 2623 bool verbose = PrintGC && PrintGCDetails; 2624 if (verbose) { 2625 gclog_or_tty->put(' '); 2626 } 2627 TraceTime t("GC ref-proc", verbose, false, gclog_or_tty); 2628 2629 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2630 2631 // See the comment in G1CollectedHeap::ref_processing_init() 2632 // about how reference processing currently works in G1. 2633 2634 // Process weak references. 2635 rp->setup_policy(clear_all_soft_refs); 2636 assert(_markStack.isEmpty(), "mark stack should be empty"); 2637 2638 G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap()); 2639 G1CMDrainMarkingStackClosure 2640 g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive); 2641 2642 // We use the work gang from the G1CollectedHeap and we utilize all 2643 // the worker threads. 2644 int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1; 2645 active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1); 2646 2647 G1CMRefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(), 2648 g1h->workers(), active_workers); 2649 2650 if (rp->processing_is_mt()) { 2651 // Set the degree of MT here. If the discovery is done MT, there 2652 // may have been a different number of threads doing the discovery 2653 // and a different number of discovered lists may have Ref objects. 2654 // That is OK as long as the Reference lists are balanced (see 2655 // balance_all_queues() and balance_queues()). 2656 rp->set_active_mt_degree(active_workers); 2657 2658 rp->process_discovered_references(&g1_is_alive, 2659 &g1_keep_alive, 2660 &g1_drain_mark_stack, 2661 &par_task_executor); 2662 2663 // The work routines of the parallel keep_alive and drain_marking_stack 2664 // will set the has_overflown flag if we overflow the global marking 2665 // stack. 2666 } else { 2667 rp->process_discovered_references(&g1_is_alive, 2668 &g1_keep_alive, 2669 &g1_drain_mark_stack, 2670 NULL); 2671 } 2672 2673 assert(_markStack.overflow() || _markStack.isEmpty(), 2674 "mark stack should be empty (unless it overflowed)"); 2675 if (_markStack.overflow()) { 2676 // Should have been done already when we tried to push an 2677 // entry on to the global mark stack. But let's do it again. 2678 set_has_overflown(); 2679 } 2680 2681 if (rp->processing_is_mt()) { 2682 assert(rp->num_q() == active_workers, "why not"); 2683 rp->enqueue_discovered_references(&par_task_executor); 2684 } else { 2685 rp->enqueue_discovered_references(); 2686 } 2687 2688 rp->verify_no_references_recorded(); 2689 assert(!rp->discovery_enabled(), "Post condition"); 2690 } 2691 2692 // Now clean up stale oops in StringTable 2693 StringTable::unlink(&g1_is_alive); 2694 // Clean up unreferenced symbols in symbol table. 2695 SymbolTable::unlink(); 2696 } 2697 2698 void ConcurrentMark::swapMarkBitMaps() { 2699 CMBitMapRO* temp = _prevMarkBitMap; 2700 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2701 _nextMarkBitMap = (CMBitMap*) temp; 2702 } 2703 2704 class CMRemarkTask: public AbstractGangTask { 2705 private: 2706 ConcurrentMark *_cm; 2707 2708 public: 2709 void work(int worker_i) { 2710 // Since all available tasks are actually started, we should 2711 // only proceed if we're supposed to be actived. 2712 if ((size_t)worker_i < _cm->active_tasks()) { 2713 CMTask* task = _cm->task(worker_i); 2714 task->record_start_time(); 2715 do { 2716 task->do_marking_step(1000000000.0 /* something very large */, 2717 true /* do_stealing */, 2718 true /* do_termination */); 2719 } while (task->has_aborted() && !_cm->has_overflown()); 2720 // If we overflow, then we do not want to restart. We instead 2721 // want to abort remark and do concurrent marking again. 2722 task->record_end_time(); 2723 } 2724 } 2725 2726 CMRemarkTask(ConcurrentMark* cm) : 2727 AbstractGangTask("Par Remark"), _cm(cm) { } 2728 }; 2729 2730 void ConcurrentMark::checkpointRootsFinalWork() { 2731 ResourceMark rm; 2732 HandleMark hm; 2733 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2734 2735 g1h->ensure_parsability(false); 2736 2737 if (G1CollectedHeap::use_parallel_gc_threads()) { 2738 G1CollectedHeap::StrongRootsScope srs(g1h); 2739 // this is remark, so we'll use up all available threads 2740 int active_workers = ParallelGCThreads; 2741 set_phase(active_workers, false /* concurrent */); 2742 2743 CMRemarkTask remarkTask(this); 2744 // We will start all available threads, even if we decide that the 2745 // active_workers will be fewer. The extra ones will just bail out 2746 // immediately. 2747 int n_workers = g1h->workers()->total_workers(); 2748 g1h->set_par_threads(n_workers); 2749 g1h->workers()->run_task(&remarkTask); 2750 g1h->set_par_threads(0); 2751 } else { 2752 G1CollectedHeap::StrongRootsScope srs(g1h); 2753 // this is remark, so we'll use up all available threads 2754 int active_workers = 1; 2755 set_phase(active_workers, false /* concurrent */); 2756 2757 CMRemarkTask remarkTask(this); 2758 // We will start all available threads, even if we decide that the 2759 // active_workers will be fewer. The extra ones will just bail out 2760 // immediately. 2761 remarkTask.work(0); 2762 } 2763 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2764 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2765 2766 print_stats(); 2767 2768 #if VERIFY_OBJS_PROCESSED 2769 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2770 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2771 _scan_obj_cl.objs_processed, 2772 ThreadLocalObjQueue::objs_enqueued); 2773 guarantee(_scan_obj_cl.objs_processed == 2774 ThreadLocalObjQueue::objs_enqueued, 2775 "Different number of objs processed and enqueued."); 2776 } 2777 #endif 2778 } 2779 2780 #ifndef PRODUCT 2781 2782 class PrintReachableOopClosure: public OopClosure { 2783 private: 2784 G1CollectedHeap* _g1h; 2785 outputStream* _out; 2786 VerifyOption _vo; 2787 bool _all; 2788 2789 public: 2790 PrintReachableOopClosure(outputStream* out, 2791 VerifyOption vo, 2792 bool all) : 2793 _g1h(G1CollectedHeap::heap()), 2794 _out(out), _vo(vo), _all(all) { } 2795 2796 void do_oop(narrowOop* p) { do_oop_work(p); } 2797 void do_oop( oop* p) { do_oop_work(p); } 2798 2799 template <class T> void do_oop_work(T* p) { 2800 oop obj = oopDesc::load_decode_heap_oop(p); 2801 const char* str = NULL; 2802 const char* str2 = ""; 2803 2804 if (obj == NULL) { 2805 str = ""; 2806 } else if (!_g1h->is_in_g1_reserved(obj)) { 2807 str = " O"; 2808 } else { 2809 HeapRegion* hr = _g1h->heap_region_containing(obj); 2810 guarantee(hr != NULL, "invariant"); 2811 bool over_tams = false; 2812 bool marked = false; 2813 2814 switch (_vo) { 2815 case VerifyOption_G1UsePrevMarking: 2816 over_tams = hr->obj_allocated_since_prev_marking(obj); 2817 marked = _g1h->isMarkedPrev(obj); 2818 break; 2819 case VerifyOption_G1UseNextMarking: 2820 over_tams = hr->obj_allocated_since_next_marking(obj); 2821 marked = _g1h->isMarkedNext(obj); 2822 break; 2823 case VerifyOption_G1UseMarkWord: 2824 marked = obj->is_gc_marked(); 2825 break; 2826 default: 2827 ShouldNotReachHere(); 2828 } 2829 2830 if (over_tams) { 2831 str = " >"; 2832 if (marked) { 2833 str2 = " AND MARKED"; 2834 } 2835 } else if (marked) { 2836 str = " M"; 2837 } else { 2838 str = " NOT"; 2839 } 2840 } 2841 2842 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2843 p, (void*) obj, str, str2); 2844 } 2845 }; 2846 2847 class PrintReachableObjectClosure : public ObjectClosure { 2848 private: 2849 G1CollectedHeap* _g1h; 2850 outputStream* _out; 2851 VerifyOption _vo; 2852 bool _all; 2853 HeapRegion* _hr; 2854 2855 public: 2856 PrintReachableObjectClosure(outputStream* out, 2857 VerifyOption vo, 2858 bool all, 2859 HeapRegion* hr) : 2860 _g1h(G1CollectedHeap::heap()), 2861 _out(out), _vo(vo), _all(all), _hr(hr) { } 2862 2863 void do_object(oop o) { 2864 bool over_tams = false; 2865 bool marked = false; 2866 2867 switch (_vo) { 2868 case VerifyOption_G1UsePrevMarking: 2869 over_tams = _hr->obj_allocated_since_prev_marking(o); 2870 marked = _g1h->isMarkedPrev(o); 2871 break; 2872 case VerifyOption_G1UseNextMarking: 2873 over_tams = _hr->obj_allocated_since_next_marking(o); 2874 marked = _g1h->isMarkedNext(o); 2875 break; 2876 case VerifyOption_G1UseMarkWord: 2877 marked = o->is_gc_marked(); 2878 break; 2879 default: 2880 ShouldNotReachHere(); 2881 } 2882 bool print_it = _all || over_tams || marked; 2883 2884 if (print_it) { 2885 _out->print_cr(" "PTR_FORMAT"%s", 2886 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2887 PrintReachableOopClosure oopCl(_out, _vo, _all); 2888 o->oop_iterate(&oopCl); 2889 } 2890 } 2891 }; 2892 2893 class PrintReachableRegionClosure : public HeapRegionClosure { 2894 private: 2895 outputStream* _out; 2896 VerifyOption _vo; 2897 bool _all; 2898 2899 public: 2900 bool doHeapRegion(HeapRegion* hr) { 2901 HeapWord* b = hr->bottom(); 2902 HeapWord* e = hr->end(); 2903 HeapWord* t = hr->top(); 2904 HeapWord* p = NULL; 2905 2906 switch (_vo) { 2907 case VerifyOption_G1UsePrevMarking: 2908 p = hr->prev_top_at_mark_start(); 2909 break; 2910 case VerifyOption_G1UseNextMarking: 2911 p = hr->next_top_at_mark_start(); 2912 break; 2913 case VerifyOption_G1UseMarkWord: 2914 // When we are verifying marking using the mark word 2915 // TAMS has no relevance. 2916 assert(p == NULL, "post-condition"); 2917 break; 2918 default: 2919 ShouldNotReachHere(); 2920 } 2921 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2922 "TAMS: "PTR_FORMAT, b, e, t, p); 2923 _out->cr(); 2924 2925 HeapWord* from = b; 2926 HeapWord* to = t; 2927 2928 if (to > from) { 2929 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2930 _out->cr(); 2931 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2932 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2933 _out->cr(); 2934 } 2935 2936 return false; 2937 } 2938 2939 PrintReachableRegionClosure(outputStream* out, 2940 VerifyOption vo, 2941 bool all) : 2942 _out(out), _vo(vo), _all(all) { } 2943 }; 2944 2945 static const char* verify_option_to_tams(VerifyOption vo) { 2946 switch (vo) { 2947 case VerifyOption_G1UsePrevMarking: 2948 return "PTAMS"; 2949 case VerifyOption_G1UseNextMarking: 2950 return "NTAMS"; 2951 default: 2952 return "NONE"; 2953 } 2954 } 2955 2956 void ConcurrentMark::print_reachable(const char* str, 2957 VerifyOption vo, 2958 bool all) { 2959 gclog_or_tty->cr(); 2960 gclog_or_tty->print_cr("== Doing heap dump... "); 2961 2962 if (G1PrintReachableBaseFile == NULL) { 2963 gclog_or_tty->print_cr(" #### error: no base file defined"); 2964 return; 2965 } 2966 2967 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2968 (JVM_MAXPATHLEN - 1)) { 2969 gclog_or_tty->print_cr(" #### error: file name too long"); 2970 return; 2971 } 2972 2973 char file_name[JVM_MAXPATHLEN]; 2974 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2975 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2976 2977 fileStream fout(file_name); 2978 if (!fout.is_open()) { 2979 gclog_or_tty->print_cr(" #### error: could not open file"); 2980 return; 2981 } 2982 2983 outputStream* out = &fout; 2984 out->print_cr("-- USING %s", verify_option_to_tams(vo)); 2985 out->cr(); 2986 2987 out->print_cr("--- ITERATING OVER REGIONS"); 2988 out->cr(); 2989 PrintReachableRegionClosure rcl(out, vo, all); 2990 _g1h->heap_region_iterate(&rcl); 2991 out->cr(); 2992 2993 gclog_or_tty->print_cr(" done"); 2994 gclog_or_tty->flush(); 2995 } 2996 2997 #endif // PRODUCT 2998 2999 // This note is for drainAllSATBBuffers and the code in between. 3000 // In the future we could reuse a task to do this work during an 3001 // evacuation pause (since now tasks are not active and can be claimed 3002 // during an evacuation pause). This was a late change to the code and 3003 // is currently not being taken advantage of. 3004 3005 class CMGlobalObjectClosure : public ObjectClosure { 3006 private: 3007 ConcurrentMark* _cm; 3008 3009 public: 3010 void do_object(oop obj) { 3011 _cm->deal_with_reference(obj); 3012 } 3013 3014 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { } 3015 }; 3016 3017 void ConcurrentMark::deal_with_reference(oop obj) { 3018 if (verbose_high()) { 3019 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT, 3020 (void*) obj); 3021 } 3022 3023 HeapWord* objAddr = (HeapWord*) obj; 3024 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error"); 3025 if (_g1h->is_in_g1_reserved(objAddr)) { 3026 assert(obj != NULL, "null check is implicit"); 3027 if (!_nextMarkBitMap->isMarked(objAddr)) { 3028 // Only get the containing region if the object is not marked on the 3029 // bitmap (otherwise, it's a waste of time since we won't do 3030 // anything with it). 3031 HeapRegion* hr = _g1h->heap_region_containing_raw(obj); 3032 if (!hr->obj_allocated_since_next_marking(obj)) { 3033 if (verbose_high()) { 3034 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered " 3035 "marked", (void*) obj); 3036 } 3037 3038 // we need to mark it first 3039 if (_nextMarkBitMap->parMark(objAddr)) { 3040 // Update the task specific count data for obj 3041 add_to_count_data_for(obj, hr, 0 /* worker_i */); 3042 3043 // No OrderAccess:store_load() is needed. It is implicit in the 3044 // CAS done in parMark(objAddr) above 3045 HeapWord* finger = _finger; 3046 if (objAddr < finger) { 3047 if (verbose_high()) { 3048 gclog_or_tty->print_cr("[global] below the global finger " 3049 "("PTR_FORMAT"), pushing it", finger); 3050 } 3051 if (!mark_stack_push(obj)) { 3052 if (verbose_low()) { 3053 gclog_or_tty->print_cr("[global] global stack overflow during " 3054 "deal_with_reference"); 3055 } 3056 } 3057 } 3058 } 3059 } 3060 } 3061 } 3062 } 3063 3064 void ConcurrentMark::drainAllSATBBuffers() { 3065 CMGlobalObjectClosure oc(this); 3066 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3067 satb_mq_set.set_closure(&oc); 3068 3069 while (satb_mq_set.apply_closure_to_completed_buffer()) { 3070 if (verbose_medium()) { 3071 gclog_or_tty->print_cr("[global] processed an SATB buffer"); 3072 } 3073 } 3074 3075 // no need to check whether we should do this, as this is only 3076 // called during an evacuation pause 3077 satb_mq_set.iterate_closure_all_threads(); 3078 3079 satb_mq_set.set_closure(NULL); 3080 assert(satb_mq_set.completed_buffers_num() == 0, "invariant"); 3081 } 3082 3083 void ConcurrentMark::markPrev(oop p) { 3084 // Note we are overriding the read-only view of the prev map here, via 3085 // the cast. 3086 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p); 3087 } 3088 3089 void ConcurrentMark::clear(oop p) { 3090 assert(p != NULL && p->is_oop(), "expected an oop"); 3091 HeapWord* addr = (HeapWord*)p; 3092 assert(addr >= _nextMarkBitMap->startWord() || 3093 addr < _nextMarkBitMap->endWord(), "in a region"); 3094 3095 _nextMarkBitMap->clear(addr); 3096 } 3097 3098 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) { 3099 // Note we are overriding the read-only view of the prev map here, via 3100 // the cast. 3101 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 3102 _nextMarkBitMap->clearRange(mr); 3103 } 3104 3105 HeapRegion* 3106 ConcurrentMark::claim_region(int task_num) { 3107 // "checkpoint" the finger 3108 HeapWord* finger = _finger; 3109 3110 // _heap_end will not change underneath our feet; it only changes at 3111 // yield points. 3112 while (finger < _heap_end) { 3113 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 3114 3115 // Note on how this code handles humongous regions. In the 3116 // normal case the finger will reach the start of a "starts 3117 // humongous" (SH) region. Its end will either be the end of the 3118 // last "continues humongous" (CH) region in the sequence, or the 3119 // standard end of the SH region (if the SH is the only region in 3120 // the sequence). That way claim_region() will skip over the CH 3121 // regions. However, there is a subtle race between a CM thread 3122 // executing this method and a mutator thread doing a humongous 3123 // object allocation. The two are not mutually exclusive as the CM 3124 // thread does not need to hold the Heap_lock when it gets 3125 // here. So there is a chance that claim_region() will come across 3126 // a free region that's in the progress of becoming a SH or a CH 3127 // region. In the former case, it will either 3128 // a) Miss the update to the region's end, in which case it will 3129 // visit every subsequent CH region, will find their bitmaps 3130 // empty, and do nothing, or 3131 // b) Will observe the update of the region's end (in which case 3132 // it will skip the subsequent CH regions). 3133 // If it comes across a region that suddenly becomes CH, the 3134 // scenario will be similar to b). So, the race between 3135 // claim_region() and a humongous object allocation might force us 3136 // to do a bit of unnecessary work (due to some unnecessary bitmap 3137 // iterations) but it should not introduce and correctness issues. 3138 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 3139 HeapWord* bottom = curr_region->bottom(); 3140 HeapWord* end = curr_region->end(); 3141 HeapWord* limit = curr_region->next_top_at_mark_start(); 3142 3143 if (verbose_low()) { 3144 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 3145 "["PTR_FORMAT", "PTR_FORMAT"), " 3146 "limit = "PTR_FORMAT, 3147 task_num, curr_region, bottom, end, limit); 3148 } 3149 3150 // Is the gap between reading the finger and doing the CAS too long? 3151 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 3152 if (res == finger) { 3153 // we succeeded 3154 3155 // notice that _finger == end cannot be guaranteed here since, 3156 // someone else might have moved the finger even further 3157 assert(_finger >= end, "the finger should have moved forward"); 3158 3159 if (verbose_low()) { 3160 gclog_or_tty->print_cr("[%d] we were successful with region = " 3161 PTR_FORMAT, task_num, curr_region); 3162 } 3163 3164 if (limit > bottom) { 3165 if (verbose_low()) { 3166 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 3167 "returning it ", task_num, curr_region); 3168 } 3169 return curr_region; 3170 } else { 3171 assert(limit == bottom, 3172 "the region limit should be at bottom"); 3173 if (verbose_low()) { 3174 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 3175 "returning NULL", task_num, curr_region); 3176 } 3177 // we return NULL and the caller should try calling 3178 // claim_region() again. 3179 return NULL; 3180 } 3181 } else { 3182 assert(_finger > finger, "the finger should have moved forward"); 3183 if (verbose_low()) { 3184 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 3185 "global finger = "PTR_FORMAT", " 3186 "our finger = "PTR_FORMAT, 3187 task_num, _finger, finger); 3188 } 3189 3190 // read it again 3191 finger = _finger; 3192 } 3193 } 3194 3195 return NULL; 3196 } 3197 3198 bool ConcurrentMark::invalidate_aborted_regions_in_cset() { 3199 bool result = false; 3200 for (int i = 0; i < (int)_max_task_num; ++i) { 3201 CMTask* the_task = _tasks[i]; 3202 MemRegion mr = the_task->aborted_region(); 3203 if (mr.start() != NULL) { 3204 assert(mr.end() != NULL, "invariant"); 3205 assert(mr.word_size() > 0, "invariant"); 3206 HeapRegion* hr = _g1h->heap_region_containing(mr.start()); 3207 assert(hr != NULL, "invariant"); 3208 if (hr->in_collection_set()) { 3209 // The region points into the collection set 3210 the_task->set_aborted_region(MemRegion()); 3211 result = true; 3212 } 3213 } 3214 } 3215 return result; 3216 } 3217 3218 bool ConcurrentMark::has_aborted_regions() { 3219 for (int i = 0; i < (int)_max_task_num; ++i) { 3220 CMTask* the_task = _tasks[i]; 3221 MemRegion mr = the_task->aborted_region(); 3222 if (mr.start() != NULL) { 3223 assert(mr.end() != NULL, "invariant"); 3224 assert(mr.word_size() > 0, "invariant"); 3225 return true; 3226 } 3227 } 3228 return false; 3229 } 3230 3231 void ConcurrentMark::oops_do(OopClosure* cl) { 3232 if (_markStack.size() > 0 && verbose_low()) { 3233 gclog_or_tty->print_cr("[global] scanning the global marking stack, " 3234 "size = %d", _markStack.size()); 3235 } 3236 // we first iterate over the contents of the mark stack... 3237 _markStack.oops_do(cl); 3238 3239 for (int i = 0; i < (int)_max_task_num; ++i) { 3240 OopTaskQueue* queue = _task_queues->queue((int)i); 3241 3242 if (queue->size() > 0 && verbose_low()) { 3243 gclog_or_tty->print_cr("[global] scanning task queue of task %d, " 3244 "size = %d", i, queue->size()); 3245 } 3246 3247 // ...then over the contents of the all the task queues. 3248 queue->oops_do(cl); 3249 } 3250 3251 // Invalidate any entries, that are in the region stack, that 3252 // point into the collection set 3253 if (_regionStack.invalidate_entries_into_cset()) { 3254 // otherwise, any gray objects copied during the evacuation pause 3255 // might not be visited. 3256 assert(_should_gray_objects, "invariant"); 3257 } 3258 3259 // Invalidate any aborted regions, recorded in the individual CM 3260 // tasks, that point into the collection set. 3261 if (invalidate_aborted_regions_in_cset()) { 3262 // otherwise, any gray objects copied during the evacuation pause 3263 // might not be visited. 3264 assert(_should_gray_objects, "invariant"); 3265 } 3266 3267 } 3268 3269 void ConcurrentMark::clear_marking_state(bool clear_overflow) { 3270 _markStack.setEmpty(); 3271 _markStack.clear_overflow(); 3272 _regionStack.setEmpty(); 3273 _regionStack.clear_overflow(); 3274 if (clear_overflow) { 3275 clear_has_overflown(); 3276 } else { 3277 assert(has_overflown(), "pre-condition"); 3278 } 3279 _finger = _heap_start; 3280 3281 for (int i = 0; i < (int)_max_task_num; ++i) { 3282 OopTaskQueue* queue = _task_queues->queue(i); 3283 queue->set_empty(); 3284 // Clear any partial regions from the CMTasks 3285 _tasks[i]->clear_aborted_region(); 3286 } 3287 } 3288 3289 // Clear the per-worker arrays used to store the per-region counting data 3290 void ConcurrentMark::clear_all_count_data() { 3291 assert(SafepointSynchronize::is_at_safepoint() || 3292 !Universe::is_fully_initialized(), "must be"); 3293 3294 int max_regions = _g1h->max_regions(); 3295 3296 assert(_max_task_num != 0, "unitialized"); 3297 assert(_count_card_bitmaps != NULL, "uninitialized"); 3298 assert(_count_marked_bytes != NULL, "uninitialized"); 3299 3300 for (int i = 0; i < _max_task_num; i += 1) { 3301 BitMap& task_card_bm = count_card_bitmap_for(i); 3302 size_t* marked_bytes_array = count_marked_bytes_for(i); 3303 3304 assert(task_card_bm.size() == _card_bm.size(), "size mismatch"); 3305 assert(marked_bytes_array != NULL, "uninitialized"); 3306 3307 for (int j = 0; j < max_regions; j++) { 3308 marked_bytes_array[j] = 0; 3309 } 3310 task_card_bm.clear(); 3311 } 3312 } 3313 3314 // Adds the given region to the counting data structures 3315 // for the given task id. 3316 void ConcurrentMark::add_to_count_data_for(MemRegion mr, 3317 HeapRegion* hr, 3318 int worker_i) { 3319 G1CollectedHeap* g1h = _g1h; 3320 HeapWord* start = mr.start(); 3321 HeapWord* last = mr.last(); 3322 size_t index = hr->hrs_index(); 3323 3324 assert(!hr->continuesHumongous(), "should not be HC region"); 3325 assert(hr == g1h->heap_region_containing(start), "sanity"); 3326 assert(hr == g1h->heap_region_containing(mr.last()), "sanity"); 3327 assert(0 <= worker_i && worker_i < _max_task_num, "oob"); 3328 3329 BitMap& task_card_bm = count_card_bitmap_for(worker_i); 3330 size_t* marked_bytes_array = count_marked_bytes_for(worker_i); 3331 3332 // Below, the term "card num" means the result of shifting an address 3333 // by the card shift -- address 0 corresponds to card number 0. One 3334 // must subtract the card num of the bottom of the heap to obtain a 3335 // card table index. 3336 3337 intptr_t start_card_num = 3338 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); 3339 intptr_t last_card_num = 3340 intptr_t(uintptr_t(last) >> CardTableModRefBS::card_shift); 3341 3342 intptr_t bottom_card_num = 3343 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> 3344 CardTableModRefBS::card_shift); 3345 3346 BitMap::idx_t start_idx = start_card_num - bottom_card_num; 3347 BitMap::idx_t last_idx = last_card_num - bottom_card_num; 3348 3349 // The card bitmap is task/worker specific => no need to use 'par' routines. 3350 // Inclusive bit range [start_idx, last_idx]. set_range is exclusive 3351 // so we have to also explicitly set the bit for last_idx. 3352 // Passing last_idx+1 to the clear_range would work in most cases 3353 // but could trip an OOB assertion. 3354 3355 if ((last_idx - start_idx) > 0) { 3356 task_card_bm.set_range(start_idx, last_idx); 3357 } 3358 task_card_bm.set_bit(last_idx); 3359 3360 // Add to the task local marked bytes for this region. 3361 marked_bytes_array[index] += mr.byte_size(); 3362 } 3363 3364 void ConcurrentMark::add_to_count_data_for(oop obj, HeapRegion* hr, int worker_i) { 3365 MemRegion mr((HeapWord*)obj, obj->size()); 3366 add_to_count_data_for(mr, hr, worker_i); 3367 } 3368 3369 void ConcurrentMark::add_to_count_data_for(MemRegion mr, int worker_i) { 3370 HeapRegion* hr = _g1h->heap_region_containing(mr.start()); 3371 add_to_count_data_for(mr, hr, worker_i); 3372 } 3373 3374 void ConcurrentMark::add_to_count_data_for(oop obj, int worker_i) { 3375 MemRegion mr((HeapWord*)obj, obj->size()); 3376 add_to_count_data_for(mr, worker_i); 3377 } 3378 3379 // Updates the counting data with liveness info recorded for a 3380 // region (typically a GCLab). 3381 void ConcurrentMark::add_to_count_data_for_region(MemRegion lab_mr, 3382 BitMap* lab_card_bm, 3383 intptr_t lab_bottom_card_num, 3384 size_t lab_marked_bytes, 3385 int worker_i) { 3386 HeapRegion* hr = _g1h->heap_region_containing(lab_mr.start()); 3387 3388 BitMap& task_card_bm = count_card_bitmap_for(worker_i); 3389 size_t* marked_bytes_array = count_marked_bytes_for(worker_i); 3390 3391 // Below, the term "card num" means the result of shifting an address 3392 // by the card shift -- address 0 corresponds to card number 0. One 3393 // must subtract the card num of the bottom of the heap to obtain a 3394 // card table index. 3395 3396 intptr_t heap_bottom_card_num = 3397 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> 3398 CardTableModRefBS::card_shift); 3399 3400 assert(intptr_t(uintptr_t(lab_mr.start()) >> CardTableModRefBS::card_shift) == lab_bottom_card_num, 3401 "sanity"); 3402 3403 // We have to map the indices of set bits in lab_card_bm, using 3404 // lab_bottom_card_num, to indices the card bitmap for the given task. 3405 3406 BitMap::idx_t end_idx = lab_card_bm->size(); 3407 BitMap::idx_t start_idx = lab_card_bm->get_next_one_offset(0, end_idx); 3408 while (start_idx < end_idx) { 3409 assert(lab_card_bm->at(start_idx), "should be set"); 3410 3411 intptr_t lab_card_num = lab_bottom_card_num + start_idx; 3412 BitMap::idx_t card_bm_idx = lab_card_num - heap_bottom_card_num; 3413 3414 task_card_bm.set_bit(card_bm_idx); 3415 3416 // Get the offset of the next set bit 3417 start_idx = lab_card_bm->get_next_one_offset(start_idx+1, end_idx); 3418 } 3419 3420 // Now add to the marked bytes 3421 marked_bytes_array[hr->hrs_index()] += lab_marked_bytes; 3422 } 3423 3424 void ConcurrentMark::clear_count_data_for_heap_region(HeapRegion* hr) { 3425 // Clears the count data for the given region from _all_ of 3426 // the per-task counting data structures. 3427 3428 MemRegion used_region = hr->used_region(); 3429 HeapWord* start = used_region.start(); 3430 HeapWord* last = used_region.last(); 3431 size_t hr_index = hr->hrs_index(); 3432 3433 intptr_t bottom_card_num = 3434 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> 3435 CardTableModRefBS::card_shift); 3436 3437 intptr_t start_card_num = 3438 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); 3439 intptr_t last_card_num = 3440 intptr_t(uintptr_t(last) >> CardTableModRefBS::card_shift); 3441 3442 BitMap::idx_t start_idx = start_card_num - bottom_card_num; 3443 BitMap::idx_t last_idx = last_card_num - bottom_card_num; 3444 3445 size_t used_region_bytes = used_region.byte_size(); 3446 size_t marked_bytes = 0; 3447 3448 for (int i=0; i < _max_task_num; i += 1) { 3449 BitMap& task_card_bm = count_card_bitmap_for(i); 3450 size_t* marked_bytes_array = count_marked_bytes_for(i); 3451 3452 marked_bytes += marked_bytes_array[hr_index]; 3453 // clear the amount of marked bytes in the task array for this 3454 // region 3455 marked_bytes_array[hr_index] = 0; 3456 3457 // Clear the inclusive range [start_idx, last_idx] from the 3458 // card bitmap. The clear_range routine is exclusive so we 3459 // need to also explicitly clear the bit at last_idx. 3460 // Passing last_idx+1 to the clear_range would work in 3461 // most cases but could trip an OOB assertion. 3462 3463 if ((last_idx - start_idx) > 0) { 3464 task_card_bm.clear_range(start_idx, last_idx); 3465 } 3466 task_card_bm.clear_bit(last_idx); 3467 } 3468 // We could assert here that marked_bytes == used_region_bytes 3469 } 3470 3471 void ConcurrentMark::print_stats() { 3472 if (verbose_stats()) { 3473 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3474 for (size_t i = 0; i < _active_tasks; ++i) { 3475 _tasks[i]->print_stats(); 3476 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3477 } 3478 } 3479 } 3480 3481 class CSMarkOopClosure: public OopClosure { 3482 friend class CSMarkBitMapClosure; 3483 3484 G1CollectedHeap* _g1h; 3485 CMBitMap* _bm; 3486 ConcurrentMark* _cm; 3487 oop* _ms; 3488 jint* _array_ind_stack; 3489 int _ms_size; 3490 int _ms_ind; 3491 int _array_increment; 3492 3493 bool push(oop obj, int arr_ind = 0) { 3494 if (_ms_ind == _ms_size) { 3495 gclog_or_tty->print_cr("Mark stack is full."); 3496 return false; 3497 } 3498 _ms[_ms_ind] = obj; 3499 if (obj->is_objArray()) { 3500 _array_ind_stack[_ms_ind] = arr_ind; 3501 } 3502 _ms_ind++; 3503 return true; 3504 } 3505 3506 oop pop() { 3507 if (_ms_ind == 0) { 3508 return NULL; 3509 } else { 3510 _ms_ind--; 3511 return _ms[_ms_ind]; 3512 } 3513 } 3514 3515 template <class T> bool drain() { 3516 while (_ms_ind > 0) { 3517 oop obj = pop(); 3518 assert(obj != NULL, "Since index was non-zero."); 3519 if (obj->is_objArray()) { 3520 jint arr_ind = _array_ind_stack[_ms_ind]; 3521 objArrayOop aobj = objArrayOop(obj); 3522 jint len = aobj->length(); 3523 jint next_arr_ind = arr_ind + _array_increment; 3524 if (next_arr_ind < len) { 3525 push(obj, next_arr_ind); 3526 } 3527 // Now process this portion of this one. 3528 int lim = MIN2(next_arr_ind, len); 3529 for (int j = arr_ind; j < lim; j++) { 3530 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j)); 3531 } 3532 3533 } else { 3534 obj->oop_iterate(this); 3535 } 3536 if (abort()) return false; 3537 } 3538 return true; 3539 } 3540 3541 public: 3542 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) : 3543 _g1h(G1CollectedHeap::heap()), 3544 _cm(cm), 3545 _bm(cm->nextMarkBitMap()), 3546 _ms_size(ms_size), _ms_ind(0), 3547 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)), 3548 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)), 3549 _array_increment(MAX2(ms_size/8, 16)) 3550 {} 3551 3552 ~CSMarkOopClosure() { 3553 FREE_C_HEAP_ARRAY(oop, _ms); 3554 FREE_C_HEAP_ARRAY(jint, _array_ind_stack); 3555 } 3556 3557 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 3558 virtual void do_oop( oop* p) { do_oop_work(p); } 3559 3560 template <class T> void do_oop_work(T* p) { 3561 T heap_oop = oopDesc::load_heap_oop(p); 3562 if (oopDesc::is_null(heap_oop)) return; 3563 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); 3564 if (obj->is_forwarded()) { 3565 // If the object has already been forwarded, we have to make sure 3566 // that it's marked. So follow the forwarding pointer. Note that 3567 // this does the right thing for self-forwarding pointers in the 3568 // evacuation failure case. 3569 obj = obj->forwardee(); 3570 } 3571 HeapRegion* hr = _g1h->heap_region_containing(obj); 3572 if (hr != NULL) { 3573 if (hr->in_collection_set()) { 3574 if (_g1h->is_obj_ill(obj)) { 3575 _bm->mark((HeapWord*)obj); 3576 // Update the task specific count data for object 3577 _cm->add_to_count_data_for(obj, hr, 0 /* worker_i */); 3578 3579 if (!push(obj)) { 3580 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed."); 3581 set_abort(); 3582 } 3583 } 3584 } else { 3585 // Outside the collection set; we need to gray it 3586 _cm->deal_with_reference(obj); 3587 } 3588 } 3589 } 3590 }; 3591 3592 class CSMarkBitMapClosure: public BitMapClosure { 3593 G1CollectedHeap* _g1h; 3594 CMBitMap* _bitMap; 3595 ConcurrentMark* _cm; 3596 CSMarkOopClosure _oop_cl; 3597 public: 3598 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) : 3599 _g1h(G1CollectedHeap::heap()), 3600 _bitMap(cm->nextMarkBitMap()), 3601 _oop_cl(cm, ms_size) 3602 {} 3603 3604 ~CSMarkBitMapClosure() {} 3605 3606 bool do_bit(size_t offset) { 3607 // convert offset into a HeapWord* 3608 HeapWord* addr = _bitMap->offsetToHeapWord(offset); 3609 assert(_bitMap->endWord() && addr < _bitMap->endWord(), 3610 "address out of range"); 3611 assert(_bitMap->isMarked(addr), "tautology"); 3612 oop obj = oop(addr); 3613 if (!obj->is_forwarded()) { 3614 if (!_oop_cl.push(obj)) return false; 3615 if (UseCompressedOops) { 3616 if (!_oop_cl.drain<narrowOop>()) return false; 3617 } else { 3618 if (!_oop_cl.drain<oop>()) return false; 3619 } 3620 } 3621 // Otherwise... 3622 return true; 3623 } 3624 }; 3625 3626 3627 class CompleteMarkingInCSHRClosure: public HeapRegionClosure { 3628 CMBitMap* _bm; 3629 CSMarkBitMapClosure _bit_cl; 3630 enum SomePrivateConstants { 3631 MSSize = 1000 3632 }; 3633 bool _completed; 3634 public: 3635 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) : 3636 _bm(cm->nextMarkBitMap()), 3637 _bit_cl(cm, MSSize), 3638 _completed(true) 3639 {} 3640 3641 ~CompleteMarkingInCSHRClosure() {} 3642 3643 bool doHeapRegion(HeapRegion* r) { 3644 if (!r->evacuation_failed()) { 3645 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start()); 3646 if (!mr.is_empty()) { 3647 if (!_bm->iterate(&_bit_cl, mr)) { 3648 _completed = false; 3649 return true; 3650 } 3651 } 3652 } 3653 return false; 3654 } 3655 3656 bool completed() { return _completed; } 3657 }; 3658 3659 class ClearMarksInHRClosure: public HeapRegionClosure { 3660 ConcurrentMark* _cm; 3661 CMBitMap* _bm; 3662 public: 3663 ClearMarksInHRClosure(ConcurrentMark* cm, CMBitMap* bm): 3664 _cm(cm), _bm(bm) 3665 { } 3666 3667 bool doHeapRegion(HeapRegion* r) { 3668 if (!r->used_region().is_empty() && !r->evacuation_failed()) { 3669 _bm->clearRange(r->used_region()); 3670 // Need to remove values from the count info 3671 _cm->clear_count_data_for_heap_region(r); 3672 } 3673 return false; 3674 } 3675 }; 3676 3677 void ConcurrentMark::complete_marking_in_collection_set() { 3678 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 3679 3680 if (!g1h->mark_in_progress()) { 3681 g1h->g1_policy()->record_mark_closure_time(0.0); 3682 return; 3683 } 3684 3685 int i = 1; 3686 double start = os::elapsedTime(); 3687 while (true) { 3688 i++; 3689 CompleteMarkingInCSHRClosure cmplt(this); 3690 g1h->collection_set_iterate(&cmplt); 3691 if (cmplt.completed()) break; 3692 } 3693 double end_time = os::elapsedTime(); 3694 double elapsed_time_ms = (end_time - start) * 1000.0; 3695 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms); 3696 3697 ClearMarksInHRClosure clr(this, nextMarkBitMap()); 3698 g1h->collection_set_iterate(&clr); 3699 } 3700 3701 // The next two methods deal with the following optimisation. Some 3702 // objects are gray by being marked and located above the finger. If 3703 // they are copied, during an evacuation pause, below the finger then 3704 // the need to be pushed on the stack. The observation is that, if 3705 // there are no regions in the collection set located above the 3706 // finger, then the above cannot happen, hence we do not need to 3707 // explicitly gray any objects when copying them to below the 3708 // finger. The global stack will be scanned to ensure that, if it 3709 // points to objects being copied, it will update their 3710 // location. There is a tricky situation with the gray objects in 3711 // region stack that are being coped, however. See the comment in 3712 // newCSet(). 3713 3714 void ConcurrentMark::newCSet() { 3715 if (!concurrent_marking_in_progress()) { 3716 // nothing to do if marking is not in progress 3717 return; 3718 } 3719 3720 // find what the lowest finger is among the global and local fingers 3721 _min_finger = _finger; 3722 for (int i = 0; i < (int)_max_task_num; ++i) { 3723 CMTask* task = _tasks[i]; 3724 HeapWord* task_finger = task->finger(); 3725 if (task_finger != NULL && task_finger < _min_finger) { 3726 _min_finger = task_finger; 3727 } 3728 } 3729 3730 _should_gray_objects = false; 3731 3732 // This fixes a very subtle and fustrating bug. It might be the case 3733 // that, during en evacuation pause, heap regions that contain 3734 // objects that are gray (by being in regions contained in the 3735 // region stack) are included in the collection set. Since such gray 3736 // objects will be moved, and because it's not easy to redirect 3737 // region stack entries to point to a new location (because objects 3738 // in one region might be scattered to multiple regions after they 3739 // are copied), one option is to ensure that all marked objects 3740 // copied during a pause are pushed on the stack. Notice, however, 3741 // that this problem can only happen when the region stack is not 3742 // empty during an evacuation pause. So, we make the fix a bit less 3743 // conservative and ensure that regions are pushed on the stack, 3744 // irrespective whether all collection set regions are below the 3745 // finger, if the region stack is not empty. This is expected to be 3746 // a rare case, so I don't think it's necessary to be smarted about it. 3747 if (!region_stack_empty() || has_aborted_regions()) { 3748 _should_gray_objects = true; 3749 } 3750 } 3751 3752 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) { 3753 if (!concurrent_marking_in_progress()) return; 3754 3755 HeapWord* region_end = hr->end(); 3756 if (region_end > _min_finger) { 3757 _should_gray_objects = true; 3758 } 3759 } 3760 3761 // Resets the region fields of active CMTasks whose values point 3762 // into the collection set. 3763 void ConcurrentMark::reset_active_task_region_fields_in_cset() { 3764 assert(SafepointSynchronize::is_at_safepoint(), "should be in STW"); 3765 assert(parallel_marking_threads() <= _max_task_num, "sanity"); 3766 3767 for (int i = 0; i < (int)parallel_marking_threads(); i += 1) { 3768 CMTask* task = _tasks[i]; 3769 HeapWord* task_finger = task->finger(); 3770 if (task_finger != NULL) { 3771 assert(_g1h->is_in_g1_reserved(task_finger), "not in heap"); 3772 HeapRegion* finger_region = _g1h->heap_region_containing(task_finger); 3773 if (finger_region->in_collection_set()) { 3774 // The task's current region is in the collection set. 3775 // This region will be evacuated in the current GC and 3776 // the region fields in the task will be stale. 3777 task->giveup_current_region(); 3778 } 3779 } 3780 } 3781 } 3782 3783 // abandon current marking iteration due to a Full GC 3784 void ConcurrentMark::abort() { 3785 // Clear all marks to force marking thread to do nothing 3786 _nextMarkBitMap->clearAll(); 3787 // Empty mark stack 3788 clear_marking_state(); 3789 for (int i = 0; i < (int)_max_task_num; ++i) { 3790 _tasks[i]->clear_region_fields(); 3791 } 3792 _has_aborted = true; 3793 3794 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3795 satb_mq_set.abandon_partial_marking(); 3796 // This can be called either during or outside marking, we'll read 3797 // the expected_active value from the SATB queue set. 3798 satb_mq_set.set_active_all_threads( 3799 false, /* new active value */ 3800 satb_mq_set.is_active() /* expected_active */); 3801 } 3802 3803 static void print_ms_time_info(const char* prefix, const char* name, 3804 NumberSeq& ns) { 3805 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3806 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3807 if (ns.num() > 0) { 3808 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3809 prefix, ns.sd(), ns.maximum()); 3810 } 3811 } 3812 3813 void ConcurrentMark::print_summary_info() { 3814 gclog_or_tty->print_cr(" Concurrent marking:"); 3815 print_ms_time_info(" ", "init marks", _init_times); 3816 print_ms_time_info(" ", "remarks", _remark_times); 3817 { 3818 print_ms_time_info(" ", "final marks", _remark_mark_times); 3819 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3820 3821 } 3822 print_ms_time_info(" ", "cleanups", _cleanup_times); 3823 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3824 _total_counting_time, 3825 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3826 (double)_cleanup_times.num() 3827 : 0.0)); 3828 if (G1ScrubRemSets) { 3829 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3830 _total_rs_scrub_time, 3831 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3832 (double)_cleanup_times.num() 3833 : 0.0)); 3834 } 3835 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3836 (_init_times.sum() + _remark_times.sum() + 3837 _cleanup_times.sum())/1000.0); 3838 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3839 "(%8.2f s marking).", 3840 cmThread()->vtime_accum(), 3841 cmThread()->vtime_mark_accum()); 3842 } 3843 3844 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3845 _parallel_workers->print_worker_threads_on(st); 3846 } 3847 3848 // Closures 3849 // XXX: there seems to be a lot of code duplication here; 3850 // should refactor and consolidate the shared code. 3851 3852 // This closure is used to mark refs into the CMS generation in 3853 // the CMS bit map. Called at the first checkpoint. 3854 3855 // We take a break if someone is trying to stop the world. 3856 bool ConcurrentMark::do_yield_check(int worker_i) { 3857 if (should_yield()) { 3858 if (worker_i == 0) { 3859 _g1h->g1_policy()->record_concurrent_pause(); 3860 } 3861 cmThread()->yield(); 3862 if (worker_i == 0) { 3863 _g1h->g1_policy()->record_concurrent_pause_end(); 3864 } 3865 return true; 3866 } else { 3867 return false; 3868 } 3869 } 3870 3871 bool ConcurrentMark::should_yield() { 3872 return cmThread()->should_yield(); 3873 } 3874 3875 bool ConcurrentMark::containing_card_is_marked(void* p) { 3876 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3877 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3878 } 3879 3880 bool ConcurrentMark::containing_cards_are_marked(void* start, 3881 void* last) { 3882 return containing_card_is_marked(start) && 3883 containing_card_is_marked(last); 3884 } 3885 3886 #ifndef PRODUCT 3887 // for debugging purposes 3888 void ConcurrentMark::print_finger() { 3889 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3890 _heap_start, _heap_end, _finger); 3891 for (int i = 0; i < (int) _max_task_num; ++i) { 3892 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 3893 } 3894 gclog_or_tty->print_cr(""); 3895 } 3896 #endif 3897 3898 void CMTask::scan_object(oop obj) { 3899 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3900 3901 if (_cm->verbose_high()) { 3902 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT, 3903 _task_id, (void*) obj); 3904 } 3905 3906 size_t obj_size = obj->size(); 3907 _words_scanned += obj_size; 3908 3909 obj->oop_iterate(_cm_oop_closure); 3910 statsOnly( ++_objs_scanned ); 3911 check_limits(); 3912 } 3913 3914 // Closure for iteration over bitmaps 3915 class CMBitMapClosure : public BitMapClosure { 3916 private: 3917 // the bitmap that is being iterated over 3918 CMBitMap* _nextMarkBitMap; 3919 ConcurrentMark* _cm; 3920 CMTask* _task; 3921 // true if we're scanning a heap region claimed by the task (so that 3922 // we move the finger along), false if we're not, i.e. currently when 3923 // scanning a heap region popped from the region stack (so that we 3924 // do not move the task finger along; it'd be a mistake if we did so). 3925 bool _scanning_heap_region; 3926 3927 public: 3928 CMBitMapClosure(CMTask *task, 3929 ConcurrentMark* cm, 3930 CMBitMap* nextMarkBitMap) 3931 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3932 3933 void set_scanning_heap_region(bool scanning_heap_region) { 3934 _scanning_heap_region = scanning_heap_region; 3935 } 3936 3937 bool do_bit(size_t offset) { 3938 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3939 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3940 assert( addr < _cm->finger(), "invariant"); 3941 3942 if (_scanning_heap_region) { 3943 statsOnly( _task->increase_objs_found_on_bitmap() ); 3944 assert(addr >= _task->finger(), "invariant"); 3945 // We move that task's local finger along. 3946 _task->move_finger_to(addr); 3947 } else { 3948 // We move the task's region finger along. 3949 _task->move_region_finger_to(addr); 3950 } 3951 3952 _task->scan_object(oop(addr)); 3953 // we only partially drain the local queue and global stack 3954 _task->drain_local_queue(true); 3955 _task->drain_global_stack(true); 3956 3957 // if the has_aborted flag has been raised, we need to bail out of 3958 // the iteration 3959 return !_task->has_aborted(); 3960 } 3961 }; 3962 3963 // Closure for iterating over objects, currently only used for 3964 // processing SATB buffers. 3965 class CMObjectClosure : public ObjectClosure { 3966 private: 3967 CMTask* _task; 3968 3969 public: 3970 void do_object(oop obj) { 3971 _task->deal_with_reference(obj); 3972 } 3973 3974 CMObjectClosure(CMTask* task) : _task(task) { } 3975 }; 3976 3977 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3978 ConcurrentMark* cm, 3979 CMTask* task) 3980 : _g1h(g1h), _cm(cm), _task(task) { 3981 assert(_ref_processor == NULL, "should be initialized to NULL"); 3982 3983 if (G1UseConcMarkReferenceProcessing) { 3984 _ref_processor = g1h->ref_processor_cm(); 3985 assert(_ref_processor != NULL, "should not be NULL"); 3986 } 3987 } 3988 3989 void CMTask::setup_for_region(HeapRegion* hr) { 3990 // Separated the asserts so that we know which one fires. 3991 assert(hr != NULL, 3992 "claim_region() should have filtered out continues humongous regions"); 3993 assert(!hr->continuesHumongous(), 3994 "claim_region() should have filtered out continues humongous regions"); 3995 3996 if (_cm->verbose_low()) { 3997 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 3998 _task_id, hr); 3999 } 4000 4001 _curr_region = hr; 4002 _finger = hr->bottom(); 4003 update_region_limit(); 4004 } 4005 4006 void CMTask::update_region_limit() { 4007 HeapRegion* hr = _curr_region; 4008 HeapWord* bottom = hr->bottom(); 4009 HeapWord* limit = hr->next_top_at_mark_start(); 4010 4011 if (limit == bottom) { 4012 if (_cm->verbose_low()) { 4013 gclog_or_tty->print_cr("[%d] found an empty region " 4014 "["PTR_FORMAT", "PTR_FORMAT")", 4015 _task_id, bottom, limit); 4016 } 4017 // The region was collected underneath our feet. 4018 // We set the finger to bottom to ensure that the bitmap 4019 // iteration that will follow this will not do anything. 4020 // (this is not a condition that holds when we set the region up, 4021 // as the region is not supposed to be empty in the first place) 4022 _finger = bottom; 4023 } else if (limit >= _region_limit) { 4024 assert(limit >= _finger, "peace of mind"); 4025 } else { 4026 assert(limit < _region_limit, "only way to get here"); 4027 // This can happen under some pretty unusual circumstances. An 4028 // evacuation pause empties the region underneath our feet (NTAMS 4029 // at bottom). We then do some allocation in the region (NTAMS 4030 // stays at bottom), followed by the region being used as a GC 4031 // alloc region (NTAMS will move to top() and the objects 4032 // originally below it will be grayed). All objects now marked in 4033 // the region are explicitly grayed, if below the global finger, 4034 // and we do not need in fact to scan anything else. So, we simply 4035 // set _finger to be limit to ensure that the bitmap iteration 4036 // doesn't do anything. 4037 _finger = limit; 4038 } 4039 4040 _region_limit = limit; 4041 } 4042 4043 void CMTask::giveup_current_region() { 4044 assert(_curr_region != NULL, "invariant"); 4045 if (_cm->verbose_low()) { 4046 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 4047 _task_id, _curr_region); 4048 } 4049 clear_region_fields(); 4050 } 4051 4052 void CMTask::clear_region_fields() { 4053 // Values for these three fields that indicate that we're not 4054 // holding on to a region. 4055 _curr_region = NULL; 4056 _finger = NULL; 4057 _region_limit = NULL; 4058 4059 _region_finger = NULL; 4060 } 4061 4062 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 4063 if (cm_oop_closure == NULL) { 4064 assert(_cm_oop_closure != NULL, "invariant"); 4065 } else { 4066 assert(_cm_oop_closure == NULL, "invariant"); 4067 } 4068 _cm_oop_closure = cm_oop_closure; 4069 } 4070 4071 void CMTask::reset(CMBitMap* nextMarkBitMap) { 4072 guarantee(nextMarkBitMap != NULL, "invariant"); 4073 4074 if (_cm->verbose_low()) { 4075 gclog_or_tty->print_cr("[%d] resetting", _task_id); 4076 } 4077 4078 _nextMarkBitMap = nextMarkBitMap; 4079 clear_region_fields(); 4080 assert(_aborted_region.is_empty(), "should have been cleared"); 4081 4082 _calls = 0; 4083 _elapsed_time_ms = 0.0; 4084 _termination_time_ms = 0.0; 4085 _termination_start_time_ms = 0.0; 4086 4087 #if _MARKING_STATS_ 4088 _local_pushes = 0; 4089 _local_pops = 0; 4090 _local_max_size = 0; 4091 _objs_scanned = 0; 4092 _global_pushes = 0; 4093 _global_pops = 0; 4094 _global_max_size = 0; 4095 _global_transfers_to = 0; 4096 _global_transfers_from = 0; 4097 _region_stack_pops = 0; 4098 _regions_claimed = 0; 4099 _objs_found_on_bitmap = 0; 4100 _satb_buffers_processed = 0; 4101 _steal_attempts = 0; 4102 _steals = 0; 4103 _aborted = 0; 4104 _aborted_overflow = 0; 4105 _aborted_cm_aborted = 0; 4106 _aborted_yield = 0; 4107 _aborted_timed_out = 0; 4108 _aborted_satb = 0; 4109 _aborted_termination = 0; 4110 #endif // _MARKING_STATS_ 4111 } 4112 4113 bool CMTask::should_exit_termination() { 4114 regular_clock_call(); 4115 // This is called when we are in the termination protocol. We should 4116 // quit if, for some reason, this task wants to abort or the global 4117 // stack is not empty (this means that we can get work from it). 4118 return !_cm->mark_stack_empty() || has_aborted(); 4119 } 4120 4121 void CMTask::reached_limit() { 4122 assert(_words_scanned >= _words_scanned_limit || 4123 _refs_reached >= _refs_reached_limit , 4124 "shouldn't have been called otherwise"); 4125 regular_clock_call(); 4126 } 4127 4128 void CMTask::regular_clock_call() { 4129 if (has_aborted()) return; 4130 4131 // First, we need to recalculate the words scanned and refs reached 4132 // limits for the next clock call. 4133 recalculate_limits(); 4134 4135 // During the regular clock call we do the following 4136 4137 // (1) If an overflow has been flagged, then we abort. 4138 if (_cm->has_overflown()) { 4139 set_has_aborted(); 4140 return; 4141 } 4142 4143 // If we are not concurrent (i.e. we're doing remark) we don't need 4144 // to check anything else. The other steps are only needed during 4145 // the concurrent marking phase. 4146 if (!concurrent()) return; 4147 4148 // (2) If marking has been aborted for Full GC, then we also abort. 4149 if (_cm->has_aborted()) { 4150 set_has_aborted(); 4151 statsOnly( ++_aborted_cm_aborted ); 4152 return; 4153 } 4154 4155 double curr_time_ms = os::elapsedVTime() * 1000.0; 4156 4157 // (3) If marking stats are enabled, then we update the step history. 4158 #if _MARKING_STATS_ 4159 if (_words_scanned >= _words_scanned_limit) { 4160 ++_clock_due_to_scanning; 4161 } 4162 if (_refs_reached >= _refs_reached_limit) { 4163 ++_clock_due_to_marking; 4164 } 4165 4166 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 4167 _interval_start_time_ms = curr_time_ms; 4168 _all_clock_intervals_ms.add(last_interval_ms); 4169 4170 if (_cm->verbose_medium()) { 4171 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 4172 "scanned = %d%s, refs reached = %d%s", 4173 _task_id, last_interval_ms, 4174 _words_scanned, 4175 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 4176 _refs_reached, 4177 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 4178 } 4179 #endif // _MARKING_STATS_ 4180 4181 // (4) We check whether we should yield. If we have to, then we abort. 4182 if (_cm->should_yield()) { 4183 // We should yield. To do this we abort the task. The caller is 4184 // responsible for yielding. 4185 set_has_aborted(); 4186 statsOnly( ++_aborted_yield ); 4187 return; 4188 } 4189 4190 // (5) We check whether we've reached our time quota. If we have, 4191 // then we abort. 4192 double elapsed_time_ms = curr_time_ms - _start_time_ms; 4193 if (elapsed_time_ms > _time_target_ms) { 4194 set_has_aborted(); 4195 _has_timed_out = true; 4196 statsOnly( ++_aborted_timed_out ); 4197 return; 4198 } 4199 4200 // (6) Finally, we check whether there are enough completed STAB 4201 // buffers available for processing. If there are, we abort. 4202 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 4203 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 4204 if (_cm->verbose_low()) { 4205 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 4206 _task_id); 4207 } 4208 // we do need to process SATB buffers, we'll abort and restart 4209 // the marking task to do so 4210 set_has_aborted(); 4211 statsOnly( ++_aborted_satb ); 4212 return; 4213 } 4214 } 4215 4216 void CMTask::recalculate_limits() { 4217 _real_words_scanned_limit = _words_scanned + words_scanned_period; 4218 _words_scanned_limit = _real_words_scanned_limit; 4219 4220 _real_refs_reached_limit = _refs_reached + refs_reached_period; 4221 _refs_reached_limit = _real_refs_reached_limit; 4222 } 4223 4224 void CMTask::decrease_limits() { 4225 // This is called when we believe that we're going to do an infrequent 4226 // operation which will increase the per byte scanned cost (i.e. move 4227 // entries to/from the global stack). It basically tries to decrease the 4228 // scanning limit so that the clock is called earlier. 4229 4230 if (_cm->verbose_medium()) { 4231 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 4232 } 4233 4234 _words_scanned_limit = _real_words_scanned_limit - 4235 3 * words_scanned_period / 4; 4236 _refs_reached_limit = _real_refs_reached_limit - 4237 3 * refs_reached_period / 4; 4238 } 4239 4240 void CMTask::move_entries_to_global_stack() { 4241 // local array where we'll store the entries that will be popped 4242 // from the local queue 4243 oop buffer[global_stack_transfer_size]; 4244 4245 int n = 0; 4246 oop obj; 4247 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 4248 buffer[n] = obj; 4249 ++n; 4250 } 4251 4252 if (n > 0) { 4253 // we popped at least one entry from the local queue 4254 4255 statsOnly( ++_global_transfers_to; _local_pops += n ); 4256 4257 if (!_cm->mark_stack_push(buffer, n)) { 4258 if (_cm->verbose_low()) { 4259 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", 4260 _task_id); 4261 } 4262 set_has_aborted(); 4263 } else { 4264 // the transfer was successful 4265 4266 if (_cm->verbose_medium()) { 4267 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 4268 _task_id, n); 4269 } 4270 statsOnly( int tmp_size = _cm->mark_stack_size(); 4271 if (tmp_size > _global_max_size) { 4272 _global_max_size = tmp_size; 4273 } 4274 _global_pushes += n ); 4275 } 4276 } 4277 4278 // this operation was quite expensive, so decrease the limits 4279 decrease_limits(); 4280 } 4281 4282 void CMTask::get_entries_from_global_stack() { 4283 // local array where we'll store the entries that will be popped 4284 // from the global stack. 4285 oop buffer[global_stack_transfer_size]; 4286 int n; 4287 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 4288 assert(n <= global_stack_transfer_size, 4289 "we should not pop more than the given limit"); 4290 if (n > 0) { 4291 // yes, we did actually pop at least one entry 4292 4293 statsOnly( ++_global_transfers_from; _global_pops += n ); 4294 if (_cm->verbose_medium()) { 4295 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 4296 _task_id, n); 4297 } 4298 for (int i = 0; i < n; ++i) { 4299 bool success = _task_queue->push(buffer[i]); 4300 // We only call this when the local queue is empty or under a 4301 // given target limit. So, we do not expect this push to fail. 4302 assert(success, "invariant"); 4303 } 4304 4305 statsOnly( int tmp_size = _task_queue->size(); 4306 if (tmp_size > _local_max_size) { 4307 _local_max_size = tmp_size; 4308 } 4309 _local_pushes += n ); 4310 } 4311 4312 // this operation was quite expensive, so decrease the limits 4313 decrease_limits(); 4314 } 4315 4316 void CMTask::drain_local_queue(bool partially) { 4317 if (has_aborted()) return; 4318 4319 // Decide what the target size is, depending whether we're going to 4320 // drain it partially (so that other tasks can steal if they run out 4321 // of things to do) or totally (at the very end). 4322 size_t target_size; 4323 if (partially) { 4324 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 4325 } else { 4326 target_size = 0; 4327 } 4328 4329 if (_task_queue->size() > target_size) { 4330 if (_cm->verbose_high()) { 4331 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 4332 _task_id, target_size); 4333 } 4334 4335 oop obj; 4336 bool ret = _task_queue->pop_local(obj); 4337 while (ret) { 4338 statsOnly( ++_local_pops ); 4339 4340 if (_cm->verbose_high()) { 4341 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 4342 (void*) obj); 4343 } 4344 4345 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 4346 assert(!_g1h->is_on_master_free_list( 4347 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 4348 4349 scan_object(obj); 4350 4351 if (_task_queue->size() <= target_size || has_aborted()) { 4352 ret = false; 4353 } else { 4354 ret = _task_queue->pop_local(obj); 4355 } 4356 } 4357 4358 if (_cm->verbose_high()) { 4359 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 4360 _task_id, _task_queue->size()); 4361 } 4362 } 4363 } 4364 4365 void CMTask::drain_global_stack(bool partially) { 4366 if (has_aborted()) return; 4367 4368 // We have a policy to drain the local queue before we attempt to 4369 // drain the global stack. 4370 assert(partially || _task_queue->size() == 0, "invariant"); 4371 4372 // Decide what the target size is, depending whether we're going to 4373 // drain it partially (so that other tasks can steal if they run out 4374 // of things to do) or totally (at the very end). Notice that, 4375 // because we move entries from the global stack in chunks or 4376 // because another task might be doing the same, we might in fact 4377 // drop below the target. But, this is not a problem. 4378 size_t target_size; 4379 if (partially) { 4380 target_size = _cm->partial_mark_stack_size_target(); 4381 } else { 4382 target_size = 0; 4383 } 4384 4385 if (_cm->mark_stack_size() > target_size) { 4386 if (_cm->verbose_low()) { 4387 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 4388 _task_id, target_size); 4389 } 4390 4391 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 4392 get_entries_from_global_stack(); 4393 drain_local_queue(partially); 4394 } 4395 4396 if (_cm->verbose_low()) { 4397 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 4398 _task_id, _cm->mark_stack_size()); 4399 } 4400 } 4401 } 4402 4403 // SATB Queue has several assumptions on whether to call the par or 4404 // non-par versions of the methods. this is why some of the code is 4405 // replicated. We should really get rid of the single-threaded version 4406 // of the code to simplify things. 4407 void CMTask::drain_satb_buffers() { 4408 if (has_aborted()) return; 4409 4410 // We set this so that the regular clock knows that we're in the 4411 // middle of draining buffers and doesn't set the abort flag when it 4412 // notices that SATB buffers are available for draining. It'd be 4413 // very counter productive if it did that. :-) 4414 _draining_satb_buffers = true; 4415 4416 CMObjectClosure oc(this); 4417 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 4418 if (G1CollectedHeap::use_parallel_gc_threads()) { 4419 satb_mq_set.set_par_closure(_task_id, &oc); 4420 } else { 4421 satb_mq_set.set_closure(&oc); 4422 } 4423 4424 // This keeps claiming and applying the closure to completed buffers 4425 // until we run out of buffers or we need to abort. 4426 if (G1CollectedHeap::use_parallel_gc_threads()) { 4427 while (!has_aborted() && 4428 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 4429 if (_cm->verbose_medium()) { 4430 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 4431 } 4432 statsOnly( ++_satb_buffers_processed ); 4433 regular_clock_call(); 4434 } 4435 } else { 4436 while (!has_aborted() && 4437 satb_mq_set.apply_closure_to_completed_buffer()) { 4438 if (_cm->verbose_medium()) { 4439 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 4440 } 4441 statsOnly( ++_satb_buffers_processed ); 4442 regular_clock_call(); 4443 } 4444 } 4445 4446 if (!concurrent() && !has_aborted()) { 4447 // We should only do this during remark. 4448 if (G1CollectedHeap::use_parallel_gc_threads()) { 4449 satb_mq_set.par_iterate_closure_all_threads(_task_id); 4450 } else { 4451 satb_mq_set.iterate_closure_all_threads(); 4452 } 4453 } 4454 4455 _draining_satb_buffers = false; 4456 4457 assert(has_aborted() || 4458 concurrent() || 4459 satb_mq_set.completed_buffers_num() == 0, "invariant"); 4460 4461 if (G1CollectedHeap::use_parallel_gc_threads()) { 4462 satb_mq_set.set_par_closure(_task_id, NULL); 4463 } else { 4464 satb_mq_set.set_closure(NULL); 4465 } 4466 4467 // again, this was a potentially expensive operation, decrease the 4468 // limits to get the regular clock call early 4469 decrease_limits(); 4470 } 4471 4472 void CMTask::drain_region_stack(BitMapClosure* bc) { 4473 if (has_aborted()) return; 4474 4475 assert(_region_finger == NULL, 4476 "it should be NULL when we're not scanning a region"); 4477 4478 if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) { 4479 if (_cm->verbose_low()) { 4480 gclog_or_tty->print_cr("[%d] draining region stack, size = %d", 4481 _task_id, _cm->region_stack_size()); 4482 } 4483 4484 MemRegion mr; 4485 4486 if (!_aborted_region.is_empty()) { 4487 mr = _aborted_region; 4488 _aborted_region = MemRegion(); 4489 4490 if (_cm->verbose_low()) { 4491 gclog_or_tty->print_cr("[%d] scanning aborted region " 4492 "[ " PTR_FORMAT ", " PTR_FORMAT " )", 4493 _task_id, mr.start(), mr.end()); 4494 } 4495 } else { 4496 mr = _cm->region_stack_pop_lock_free(); 4497 // it returns MemRegion() if the pop fails 4498 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 4499 } 4500 4501 while (mr.start() != NULL) { 4502 if (_cm->verbose_medium()) { 4503 gclog_or_tty->print_cr("[%d] we are scanning region " 4504 "["PTR_FORMAT", "PTR_FORMAT")", 4505 _task_id, mr.start(), mr.end()); 4506 } 4507 4508 assert(mr.end() <= _cm->finger(), 4509 "otherwise the region shouldn't be on the stack"); 4510 assert(!mr.is_empty(), "Only non-empty regions live on the region stack"); 4511 if (_nextMarkBitMap->iterate(bc, mr)) { 4512 assert(!has_aborted(), 4513 "cannot abort the task without aborting the bitmap iteration"); 4514 4515 // We finished iterating over the region without aborting. 4516 regular_clock_call(); 4517 if (has_aborted()) { 4518 mr = MemRegion(); 4519 } else { 4520 mr = _cm->region_stack_pop_lock_free(); 4521 // it returns MemRegion() if the pop fails 4522 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 4523 } 4524 } else { 4525 assert(has_aborted(), "currently the only way to do so"); 4526 4527 // The only way to abort the bitmap iteration is to return 4528 // false from the do_bit() method. However, inside the 4529 // do_bit() method we move the _region_finger to point to the 4530 // object currently being looked at. So, if we bail out, we 4531 // have definitely set _region_finger to something non-null. 4532 assert(_region_finger != NULL, "invariant"); 4533 4534 // Make sure that any previously aborted region has been 4535 // cleared. 4536 assert(_aborted_region.is_empty(), "aborted region not cleared"); 4537 4538 // The iteration was actually aborted. So now _region_finger 4539 // points to the address of the object we last scanned. If we 4540 // leave it there, when we restart this task, we will rescan 4541 // the object. It is easy to avoid this. We move the finger by 4542 // enough to point to the next possible object header (the 4543 // bitmap knows by how much we need to move it as it knows its 4544 // granularity). 4545 MemRegion newRegion = 4546 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end()); 4547 4548 if (!newRegion.is_empty()) { 4549 if (_cm->verbose_low()) { 4550 gclog_or_tty->print_cr("[%d] recording unscanned region" 4551 "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask", 4552 _task_id, 4553 newRegion.start(), newRegion.end()); 4554 } 4555 // Now record the part of the region we didn't scan to 4556 // make sure this task scans it later. 4557 _aborted_region = newRegion; 4558 } 4559 // break from while 4560 mr = MemRegion(); 4561 } 4562 _region_finger = NULL; 4563 } 4564 4565 if (_cm->verbose_low()) { 4566 gclog_or_tty->print_cr("[%d] drained region stack, size = %d", 4567 _task_id, _cm->region_stack_size()); 4568 } 4569 } 4570 } 4571 4572 void CMTask::print_stats() { 4573 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 4574 _task_id, _calls); 4575 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 4576 _elapsed_time_ms, _termination_time_ms); 4577 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4578 _step_times_ms.num(), _step_times_ms.avg(), 4579 _step_times_ms.sd()); 4580 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4581 _step_times_ms.maximum(), _step_times_ms.sum()); 4582 4583 #if _MARKING_STATS_ 4584 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4585 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 4586 _all_clock_intervals_ms.sd()); 4587 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4588 _all_clock_intervals_ms.maximum(), 4589 _all_clock_intervals_ms.sum()); 4590 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 4591 _clock_due_to_scanning, _clock_due_to_marking); 4592 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 4593 _objs_scanned, _objs_found_on_bitmap); 4594 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 4595 _local_pushes, _local_pops, _local_max_size); 4596 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 4597 _global_pushes, _global_pops, _global_max_size); 4598 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 4599 _global_transfers_to,_global_transfers_from); 4600 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d", 4601 _regions_claimed, _region_stack_pops); 4602 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 4603 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 4604 _steal_attempts, _steals); 4605 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 4606 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 4607 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 4608 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 4609 _aborted_timed_out, _aborted_satb, _aborted_termination); 4610 #endif // _MARKING_STATS_ 4611 } 4612 4613 /***************************************************************************** 4614 4615 The do_marking_step(time_target_ms) method is the building block 4616 of the parallel marking framework. It can be called in parallel 4617 with other invocations of do_marking_step() on different tasks 4618 (but only one per task, obviously) and concurrently with the 4619 mutator threads, or during remark, hence it eliminates the need 4620 for two versions of the code. When called during remark, it will 4621 pick up from where the task left off during the concurrent marking 4622 phase. Interestingly, tasks are also claimable during evacuation 4623 pauses too, since do_marking_step() ensures that it aborts before 4624 it needs to yield. 4625 4626 The data structures that is uses to do marking work are the 4627 following: 4628 4629 (1) Marking Bitmap. If there are gray objects that appear only 4630 on the bitmap (this happens either when dealing with an overflow 4631 or when the initial marking phase has simply marked the roots 4632 and didn't push them on the stack), then tasks claim heap 4633 regions whose bitmap they then scan to find gray objects. A 4634 global finger indicates where the end of the last claimed region 4635 is. A local finger indicates how far into the region a task has 4636 scanned. The two fingers are used to determine how to gray an 4637 object (i.e. whether simply marking it is OK, as it will be 4638 visited by a task in the future, or whether it needs to be also 4639 pushed on a stack). 4640 4641 (2) Local Queue. The local queue of the task which is accessed 4642 reasonably efficiently by the task. Other tasks can steal from 4643 it when they run out of work. Throughout the marking phase, a 4644 task attempts to keep its local queue short but not totally 4645 empty, so that entries are available for stealing by other 4646 tasks. Only when there is no more work, a task will totally 4647 drain its local queue. 4648 4649 (3) Global Mark Stack. This handles local queue overflow. During 4650 marking only sets of entries are moved between it and the local 4651 queues, as access to it requires a mutex and more fine-grain 4652 interaction with it which might cause contention. If it 4653 overflows, then the marking phase should restart and iterate 4654 over the bitmap to identify gray objects. Throughout the marking 4655 phase, tasks attempt to keep the global mark stack at a small 4656 length but not totally empty, so that entries are available for 4657 popping by other tasks. Only when there is no more work, tasks 4658 will totally drain the global mark stack. 4659 4660 (4) Global Region Stack. Entries on it correspond to areas of 4661 the bitmap that need to be scanned since they contain gray 4662 objects. Pushes on the region stack only happen during 4663 evacuation pauses and typically correspond to areas covered by 4664 GC LABS. If it overflows, then the marking phase should restart 4665 and iterate over the bitmap to identify gray objects. Tasks will 4666 try to totally drain the region stack as soon as possible. 4667 4668 (5) SATB Buffer Queue. This is where completed SATB buffers are 4669 made available. Buffers are regularly removed from this queue 4670 and scanned for roots, so that the queue doesn't get too 4671 long. During remark, all completed buffers are processed, as 4672 well as the filled in parts of any uncompleted buffers. 4673 4674 The do_marking_step() method tries to abort when the time target 4675 has been reached. There are a few other cases when the 4676 do_marking_step() method also aborts: 4677 4678 (1) When the marking phase has been aborted (after a Full GC). 4679 4680 (2) When a global overflow (either on the global stack or the 4681 region stack) has been triggered. Before the task aborts, it 4682 will actually sync up with the other tasks to ensure that all 4683 the marking data structures (local queues, stacks, fingers etc.) 4684 are re-initialised so that when do_marking_step() completes, 4685 the marking phase can immediately restart. 4686 4687 (3) When enough completed SATB buffers are available. The 4688 do_marking_step() method only tries to drain SATB buffers right 4689 at the beginning. So, if enough buffers are available, the 4690 marking step aborts and the SATB buffers are processed at 4691 the beginning of the next invocation. 4692 4693 (4) To yield. when we have to yield then we abort and yield 4694 right at the end of do_marking_step(). This saves us from a lot 4695 of hassle as, by yielding we might allow a Full GC. If this 4696 happens then objects will be compacted underneath our feet, the 4697 heap might shrink, etc. We save checking for this by just 4698 aborting and doing the yield right at the end. 4699 4700 From the above it follows that the do_marking_step() method should 4701 be called in a loop (or, otherwise, regularly) until it completes. 4702 4703 If a marking step completes without its has_aborted() flag being 4704 true, it means it has completed the current marking phase (and 4705 also all other marking tasks have done so and have all synced up). 4706 4707 A method called regular_clock_call() is invoked "regularly" (in 4708 sub ms intervals) throughout marking. It is this clock method that 4709 checks all the abort conditions which were mentioned above and 4710 decides when the task should abort. A work-based scheme is used to 4711 trigger this clock method: when the number of object words the 4712 marking phase has scanned or the number of references the marking 4713 phase has visited reach a given limit. Additional invocations to 4714 the method clock have been planted in a few other strategic places 4715 too. The initial reason for the clock method was to avoid calling 4716 vtime too regularly, as it is quite expensive. So, once it was in 4717 place, it was natural to piggy-back all the other conditions on it 4718 too and not constantly check them throughout the code. 4719 4720 *****************************************************************************/ 4721 4722 void CMTask::do_marking_step(double time_target_ms, 4723 bool do_stealing, 4724 bool do_termination) { 4725 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 4726 assert(concurrent() == _cm->concurrent(), "they should be the same"); 4727 4728 assert(concurrent() || _cm->region_stack_empty(), 4729 "the region stack should have been cleared before remark"); 4730 assert(concurrent() || !_cm->has_aborted_regions(), 4731 "aborted regions should have been cleared before remark"); 4732 assert(_region_finger == NULL, 4733 "this should be non-null only when a region is being scanned"); 4734 4735 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4736 assert(_task_queues != NULL, "invariant"); 4737 assert(_task_queue != NULL, "invariant"); 4738 assert(_task_queues->queue(_task_id) == _task_queue, "invariant"); 4739 4740 assert(!_claimed, 4741 "only one thread should claim this task at any one time"); 4742 4743 // OK, this doesn't safeguard again all possible scenarios, as it is 4744 // possible for two threads to set the _claimed flag at the same 4745 // time. But it is only for debugging purposes anyway and it will 4746 // catch most problems. 4747 _claimed = true; 4748 4749 _start_time_ms = os::elapsedVTime() * 1000.0; 4750 statsOnly( _interval_start_time_ms = _start_time_ms ); 4751 4752 double diff_prediction_ms = 4753 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4754 _time_target_ms = time_target_ms - diff_prediction_ms; 4755 4756 // set up the variables that are used in the work-based scheme to 4757 // call the regular clock method 4758 _words_scanned = 0; 4759 _refs_reached = 0; 4760 recalculate_limits(); 4761 4762 // clear all flags 4763 clear_has_aborted(); 4764 _has_timed_out = false; 4765 _draining_satb_buffers = false; 4766 4767 ++_calls; 4768 4769 if (_cm->verbose_low()) { 4770 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 4771 "target = %1.2lfms >>>>>>>>>>", 4772 _task_id, _calls, _time_target_ms); 4773 } 4774 4775 // Set up the bitmap and oop closures. Anything that uses them is 4776 // eventually called from this method, so it is OK to allocate these 4777 // statically. 4778 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4779 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 4780 set_cm_oop_closure(&cm_oop_closure); 4781 4782 if (_cm->has_overflown()) { 4783 // This can happen if the region stack or the mark stack overflows 4784 // during a GC pause and this task, after a yield point, 4785 // restarts. We have to abort as we need to get into the overflow 4786 // protocol which happens right at the end of this task. 4787 set_has_aborted(); 4788 } 4789 4790 // First drain any available SATB buffers. After this, we will not 4791 // look at SATB buffers before the next invocation of this method. 4792 // If enough completed SATB buffers are queued up, the regular clock 4793 // will abort this task so that it restarts. 4794 drain_satb_buffers(); 4795 // ...then partially drain the local queue and the global stack 4796 drain_local_queue(true); 4797 drain_global_stack(true); 4798 4799 // Then totally drain the region stack. We will not look at 4800 // it again before the next invocation of this method. Entries on 4801 // the region stack are only added during evacuation pauses, for 4802 // which we have to yield. When we do, we abort the task anyway so 4803 // it will look at the region stack again when it restarts. 4804 bitmap_closure.set_scanning_heap_region(false); 4805 drain_region_stack(&bitmap_closure); 4806 // ...then partially drain the local queue and the global stack 4807 drain_local_queue(true); 4808 drain_global_stack(true); 4809 4810 do { 4811 if (!has_aborted() && _curr_region != NULL) { 4812 // This means that we're already holding on to a region. 4813 assert(_finger != NULL, "if region is not NULL, then the finger " 4814 "should not be NULL either"); 4815 4816 // We might have restarted this task after an evacuation pause 4817 // which might have evacuated the region we're holding on to 4818 // underneath our feet. Let's read its limit again to make sure 4819 // that we do not iterate over a region of the heap that 4820 // contains garbage (update_region_limit() will also move 4821 // _finger to the start of the region if it is found empty). 4822 update_region_limit(); 4823 // We will start from _finger not from the start of the region, 4824 // as we might be restarting this task after aborting half-way 4825 // through scanning this region. In this case, _finger points to 4826 // the address where we last found a marked object. If this is a 4827 // fresh region, _finger points to start(). 4828 MemRegion mr = MemRegion(_finger, _region_limit); 4829 4830 if (_cm->verbose_low()) { 4831 gclog_or_tty->print_cr("[%d] we're scanning part " 4832 "["PTR_FORMAT", "PTR_FORMAT") " 4833 "of region "PTR_FORMAT, 4834 _task_id, _finger, _region_limit, _curr_region); 4835 } 4836 4837 // Let's iterate over the bitmap of the part of the 4838 // region that is left. 4839 bitmap_closure.set_scanning_heap_region(true); 4840 if (mr.is_empty() || 4841 _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4842 // We successfully completed iterating over the region. Now, 4843 // let's give up the region. 4844 giveup_current_region(); 4845 regular_clock_call(); 4846 } else { 4847 assert(has_aborted(), "currently the only way to do so"); 4848 // The only way to abort the bitmap iteration is to return 4849 // false from the do_bit() method. However, inside the 4850 // do_bit() method we move the _finger to point to the 4851 // object currently being looked at. So, if we bail out, we 4852 // have definitely set _finger to something non-null. 4853 assert(_finger != NULL, "invariant"); 4854 4855 // Region iteration was actually aborted. So now _finger 4856 // points to the address of the object we last scanned. If we 4857 // leave it there, when we restart this task, we will rescan 4858 // the object. It is easy to avoid this. We move the finger by 4859 // enough to point to the next possible object header (the 4860 // bitmap knows by how much we need to move it as it knows its 4861 // granularity). 4862 assert(_finger < _region_limit, "invariant"); 4863 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 4864 // Check if bitmap iteration was aborted while scanning the last object 4865 if (new_finger >= _region_limit) { 4866 giveup_current_region(); 4867 } else { 4868 move_finger_to(new_finger); 4869 } 4870 } 4871 } 4872 // At this point we have either completed iterating over the 4873 // region we were holding on to, or we have aborted. 4874 4875 // We then partially drain the local queue and the global stack. 4876 // (Do we really need this?) 4877 drain_local_queue(true); 4878 drain_global_stack(true); 4879 4880 // Read the note on the claim_region() method on why it might 4881 // return NULL with potentially more regions available for 4882 // claiming and why we have to check out_of_regions() to determine 4883 // whether we're done or not. 4884 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4885 // We are going to try to claim a new region. We should have 4886 // given up on the previous one. 4887 // Separated the asserts so that we know which one fires. 4888 assert(_curr_region == NULL, "invariant"); 4889 assert(_finger == NULL, "invariant"); 4890 assert(_region_limit == NULL, "invariant"); 4891 if (_cm->verbose_low()) { 4892 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 4893 } 4894 HeapRegion* claimed_region = _cm->claim_region(_task_id); 4895 if (claimed_region != NULL) { 4896 // Yes, we managed to claim one 4897 statsOnly( ++_regions_claimed ); 4898 4899 if (_cm->verbose_low()) { 4900 gclog_or_tty->print_cr("[%d] we successfully claimed " 4901 "region "PTR_FORMAT, 4902 _task_id, claimed_region); 4903 } 4904 4905 setup_for_region(claimed_region); 4906 assert(_curr_region == claimed_region, "invariant"); 4907 } 4908 // It is important to call the regular clock here. It might take 4909 // a while to claim a region if, for example, we hit a large 4910 // block of empty regions. So we need to call the regular clock 4911 // method once round the loop to make sure it's called 4912 // frequently enough. 4913 regular_clock_call(); 4914 } 4915 4916 if (!has_aborted() && _curr_region == NULL) { 4917 assert(_cm->out_of_regions(), 4918 "at this point we should be out of regions"); 4919 } 4920 } while ( _curr_region != NULL && !has_aborted()); 4921 4922 if (!has_aborted()) { 4923 // We cannot check whether the global stack is empty, since other 4924 // tasks might be pushing objects to it concurrently. We also cannot 4925 // check if the region stack is empty because if a thread is aborting 4926 // it can push a partially done region back. 4927 assert(_cm->out_of_regions(), 4928 "at this point we should be out of regions"); 4929 4930 if (_cm->verbose_low()) { 4931 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 4932 } 4933 4934 // Try to reduce the number of available SATB buffers so that 4935 // remark has less work to do. 4936 drain_satb_buffers(); 4937 } 4938 4939 // Since we've done everything else, we can now totally drain the 4940 // local queue and global stack. 4941 drain_local_queue(false); 4942 drain_global_stack(false); 4943 4944 // Attempt at work stealing from other task's queues. 4945 if (do_stealing && !has_aborted()) { 4946 // We have not aborted. This means that we have finished all that 4947 // we could. Let's try to do some stealing... 4948 4949 // We cannot check whether the global stack is empty, since other 4950 // tasks might be pushing objects to it concurrently. We also cannot 4951 // check if the region stack is empty because if a thread is aborting 4952 // it can push a partially done region back. 4953 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4954 "only way to reach here"); 4955 4956 if (_cm->verbose_low()) { 4957 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 4958 } 4959 4960 while (!has_aborted()) { 4961 oop obj; 4962 statsOnly( ++_steal_attempts ); 4963 4964 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 4965 if (_cm->verbose_medium()) { 4966 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 4967 _task_id, (void*) obj); 4968 } 4969 4970 statsOnly( ++_steals ); 4971 4972 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4973 "any stolen object should be marked"); 4974 scan_object(obj); 4975 4976 // And since we're towards the end, let's totally drain the 4977 // local queue and global stack. 4978 drain_local_queue(false); 4979 drain_global_stack(false); 4980 } else { 4981 break; 4982 } 4983 } 4984 } 4985 4986 // If we are about to wrap up and go into termination, check if we 4987 // should raise the overflow flag. 4988 if (do_termination && !has_aborted()) { 4989 if (_cm->force_overflow()->should_force()) { 4990 _cm->set_has_overflown(); 4991 regular_clock_call(); 4992 } 4993 } 4994 4995 // We still haven't aborted. Now, let's try to get into the 4996 // termination protocol. 4997 if (do_termination && !has_aborted()) { 4998 // We cannot check whether the global stack is empty, since other 4999 // tasks might be concurrently pushing objects on it. We also cannot 5000 // check if the region stack is empty because if a thread is aborting 5001 // it can push a partially done region back. 5002 // Separated the asserts so that we know which one fires. 5003 assert(_cm->out_of_regions(), "only way to reach here"); 5004 assert(_task_queue->size() == 0, "only way to reach here"); 5005 5006 if (_cm->verbose_low()) { 5007 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 5008 } 5009 5010 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 5011 // The CMTask class also extends the TerminatorTerminator class, 5012 // hence its should_exit_termination() method will also decide 5013 // whether to exit the termination protocol or not. 5014 bool finished = _cm->terminator()->offer_termination(this); 5015 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 5016 _termination_time_ms += 5017 termination_end_time_ms - _termination_start_time_ms; 5018 5019 if (finished) { 5020 // We're all done. 5021 5022 if (_task_id == 0) { 5023 // let's allow task 0 to do this 5024 if (concurrent()) { 5025 assert(_cm->concurrent_marking_in_progress(), "invariant"); 5026 // we need to set this to false before the next 5027 // safepoint. This way we ensure that the marking phase 5028 // doesn't observe any more heap expansions. 5029 _cm->clear_concurrent_marking_in_progress(); 5030 } 5031 } 5032 5033 // We can now guarantee that the global stack is empty, since 5034 // all other tasks have finished. We separated the guarantees so 5035 // that, if a condition is false, we can immediately find out 5036 // which one. 5037 guarantee(_cm->out_of_regions(), "only way to reach here"); 5038 guarantee(_aborted_region.is_empty(), "only way to reach here"); 5039 guarantee(_cm->region_stack_empty(), "only way to reach here"); 5040 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 5041 guarantee(_task_queue->size() == 0, "only way to reach here"); 5042 guarantee(!_cm->has_overflown(), "only way to reach here"); 5043 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 5044 guarantee(!_cm->region_stack_overflow(), "only way to reach here"); 5045 5046 if (_cm->verbose_low()) { 5047 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 5048 } 5049 } else { 5050 // Apparently there's more work to do. Let's abort this task. It 5051 // will restart it and we can hopefully find more things to do. 5052 5053 if (_cm->verbose_low()) { 5054 gclog_or_tty->print_cr("[%d] apparently there is more work to do", 5055 _task_id); 5056 } 5057 5058 set_has_aborted(); 5059 statsOnly( ++_aborted_termination ); 5060 } 5061 } 5062 5063 // Mainly for debugging purposes to make sure that a pointer to the 5064 // closure which was statically allocated in this frame doesn't 5065 // escape it by accident. 5066 set_cm_oop_closure(NULL); 5067 double end_time_ms = os::elapsedVTime() * 1000.0; 5068 double elapsed_time_ms = end_time_ms - _start_time_ms; 5069 // Update the step history. 5070 _step_times_ms.add(elapsed_time_ms); 5071 5072 if (has_aborted()) { 5073 // The task was aborted for some reason. 5074 5075 statsOnly( ++_aborted ); 5076 5077 if (_has_timed_out) { 5078 double diff_ms = elapsed_time_ms - _time_target_ms; 5079 // Keep statistics of how well we did with respect to hitting 5080 // our target only if we actually timed out (if we aborted for 5081 // other reasons, then the results might get skewed). 5082 _marking_step_diffs_ms.add(diff_ms); 5083 } 5084 5085 if (_cm->has_overflown()) { 5086 // This is the interesting one. We aborted because a global 5087 // overflow was raised. This means we have to restart the 5088 // marking phase and start iterating over regions. However, in 5089 // order to do this we have to make sure that all tasks stop 5090 // what they are doing and re-initialise in a safe manner. We 5091 // will achieve this with the use of two barrier sync points. 5092 5093 if (_cm->verbose_low()) { 5094 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 5095 } 5096 5097 _cm->enter_first_sync_barrier(_task_id); 5098 // When we exit this sync barrier we know that all tasks have 5099 // stopped doing marking work. So, it's now safe to 5100 // re-initialise our data structures. At the end of this method, 5101 // task 0 will clear the global data structures. 5102 5103 statsOnly( ++_aborted_overflow ); 5104 5105 // We clear the local state of this task... 5106 clear_region_fields(); 5107 5108 // ...and enter the second barrier. 5109 _cm->enter_second_sync_barrier(_task_id); 5110 // At this point everything has bee re-initialised and we're 5111 // ready to restart. 5112 } 5113 5114 if (_cm->verbose_low()) { 5115 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 5116 "elapsed = %1.2lfms <<<<<<<<<<", 5117 _task_id, _time_target_ms, elapsed_time_ms); 5118 if (_cm->has_aborted()) { 5119 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 5120 _task_id); 5121 } 5122 } 5123 } else { 5124 if (_cm->verbose_low()) { 5125 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 5126 "elapsed = %1.2lfms <<<<<<<<<<", 5127 _task_id, _time_target_ms, elapsed_time_ms); 5128 } 5129 } 5130 5131 _claimed = false; 5132 } 5133 5134 CMTask::CMTask(int task_id, 5135 ConcurrentMark* cm, 5136 CMTaskQueue* task_queue, 5137 CMTaskQueueSet* task_queues) 5138 : _g1h(G1CollectedHeap::heap()), 5139 _task_id(task_id), _cm(cm), 5140 _claimed(false), 5141 _nextMarkBitMap(NULL), _hash_seed(17), 5142 _task_queue(task_queue), 5143 _task_queues(task_queues), 5144 _cm_oop_closure(NULL), 5145 _aborted_region(MemRegion()) { 5146 guarantee(task_queue != NULL, "invariant"); 5147 guarantee(task_queues != NULL, "invariant"); 5148 5149 statsOnly( _clock_due_to_scanning = 0; 5150 _clock_due_to_marking = 0 ); 5151 5152 _marking_step_diffs_ms.add(0.5); 5153 } 5154 5155 // These are formatting macros that are used below to ensure 5156 // consistent formatting. The *_H_* versions are used to format the 5157 // header for a particular value and they should be kept consistent 5158 // with the corresponding macro. Also note that most of the macros add 5159 // the necessary white space (as a prefix) which makes them a bit 5160 // easier to compose. 5161 5162 // All the output lines are prefixed with this string to be able to 5163 // identify them easily in a large log file. 5164 #define G1PPRL_LINE_PREFIX "###" 5165 5166 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 5167 #ifdef _LP64 5168 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 5169 #else // _LP64 5170 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 5171 #endif // _LP64 5172 5173 // For per-region info 5174 #define G1PPRL_TYPE_FORMAT " %-4s" 5175 #define G1PPRL_TYPE_H_FORMAT " %4s" 5176 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 5177 #define G1PPRL_BYTE_H_FORMAT " %9s" 5178 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 5179 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 5180 5181 // For summary info 5182 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 5183 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 5184 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 5185 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 5186 5187 G1PrintRegionLivenessInfoClosure:: 5188 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 5189 : _out(out), 5190 _total_used_bytes(0), _total_capacity_bytes(0), 5191 _total_prev_live_bytes(0), _total_next_live_bytes(0), 5192 _hum_used_bytes(0), _hum_capacity_bytes(0), 5193 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) { 5194 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 5195 MemRegion g1_committed = g1h->g1_committed(); 5196 MemRegion g1_reserved = g1h->g1_reserved(); 5197 double now = os::elapsedTime(); 5198 5199 // Print the header of the output. 5200 _out->cr(); 5201 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 5202 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 5203 G1PPRL_SUM_ADDR_FORMAT("committed") 5204 G1PPRL_SUM_ADDR_FORMAT("reserved") 5205 G1PPRL_SUM_BYTE_FORMAT("region-size"), 5206 g1_committed.start(), g1_committed.end(), 5207 g1_reserved.start(), g1_reserved.end(), 5208 (size_t)HeapRegion::GrainBytes); 5209 _out->print_cr(G1PPRL_LINE_PREFIX); 5210 _out->print_cr(G1PPRL_LINE_PREFIX 5211 G1PPRL_TYPE_H_FORMAT 5212 G1PPRL_ADDR_BASE_H_FORMAT 5213 G1PPRL_BYTE_H_FORMAT 5214 G1PPRL_BYTE_H_FORMAT 5215 G1PPRL_BYTE_H_FORMAT 5216 G1PPRL_DOUBLE_H_FORMAT, 5217 "type", "address-range", 5218 "used", "prev-live", "next-live", "gc-eff"); 5219 _out->print_cr(G1PPRL_LINE_PREFIX 5220 G1PPRL_TYPE_H_FORMAT 5221 G1PPRL_ADDR_BASE_H_FORMAT 5222 G1PPRL_BYTE_H_FORMAT 5223 G1PPRL_BYTE_H_FORMAT 5224 G1PPRL_BYTE_H_FORMAT 5225 G1PPRL_DOUBLE_H_FORMAT, 5226 "", "", 5227 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)"); 5228 } 5229 5230 // It takes as a parameter a reference to one of the _hum_* fields, it 5231 // deduces the corresponding value for a region in a humongous region 5232 // series (either the region size, or what's left if the _hum_* field 5233 // is < the region size), and updates the _hum_* field accordingly. 5234 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 5235 size_t bytes = 0; 5236 // The > 0 check is to deal with the prev and next live bytes which 5237 // could be 0. 5238 if (*hum_bytes > 0) { 5239 bytes = MIN2((size_t) HeapRegion::GrainBytes, *hum_bytes); 5240 *hum_bytes -= bytes; 5241 } 5242 return bytes; 5243 } 5244 5245 // It deduces the values for a region in a humongous region series 5246 // from the _hum_* fields and updates those accordingly. It assumes 5247 // that that _hum_* fields have already been set up from the "starts 5248 // humongous" region and we visit the regions in address order. 5249 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 5250 size_t* capacity_bytes, 5251 size_t* prev_live_bytes, 5252 size_t* next_live_bytes) { 5253 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 5254 *used_bytes = get_hum_bytes(&_hum_used_bytes); 5255 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 5256 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 5257 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 5258 } 5259 5260 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 5261 const char* type = ""; 5262 HeapWord* bottom = r->bottom(); 5263 HeapWord* end = r->end(); 5264 size_t capacity_bytes = r->capacity(); 5265 size_t used_bytes = r->used(); 5266 size_t prev_live_bytes = r->live_bytes(); 5267 size_t next_live_bytes = r->next_live_bytes(); 5268 double gc_eff = r->gc_efficiency(); 5269 if (r->used() == 0) { 5270 type = "FREE"; 5271 } else if (r->is_survivor()) { 5272 type = "SURV"; 5273 } else if (r->is_young()) { 5274 type = "EDEN"; 5275 } else if (r->startsHumongous()) { 5276 type = "HUMS"; 5277 5278 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 5279 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 5280 "they should have been zeroed after the last time we used them"); 5281 // Set up the _hum_* fields. 5282 _hum_capacity_bytes = capacity_bytes; 5283 _hum_used_bytes = used_bytes; 5284 _hum_prev_live_bytes = prev_live_bytes; 5285 _hum_next_live_bytes = next_live_bytes; 5286 get_hum_bytes(&used_bytes, &capacity_bytes, 5287 &prev_live_bytes, &next_live_bytes); 5288 end = bottom + HeapRegion::GrainWords; 5289 } else if (r->continuesHumongous()) { 5290 type = "HUMC"; 5291 get_hum_bytes(&used_bytes, &capacity_bytes, 5292 &prev_live_bytes, &next_live_bytes); 5293 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 5294 } else { 5295 type = "OLD"; 5296 } 5297 5298 _total_used_bytes += used_bytes; 5299 _total_capacity_bytes += capacity_bytes; 5300 _total_prev_live_bytes += prev_live_bytes; 5301 _total_next_live_bytes += next_live_bytes; 5302 5303 // Print a line for this particular region. 5304 _out->print_cr(G1PPRL_LINE_PREFIX 5305 G1PPRL_TYPE_FORMAT 5306 G1PPRL_ADDR_BASE_FORMAT 5307 G1PPRL_BYTE_FORMAT 5308 G1PPRL_BYTE_FORMAT 5309 G1PPRL_BYTE_FORMAT 5310 G1PPRL_DOUBLE_FORMAT, 5311 type, bottom, end, 5312 used_bytes, prev_live_bytes, next_live_bytes, gc_eff); 5313 5314 return false; 5315 } 5316 5317 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 5318 // Print the footer of the output. 5319 _out->print_cr(G1PPRL_LINE_PREFIX); 5320 _out->print_cr(G1PPRL_LINE_PREFIX 5321 " SUMMARY" 5322 G1PPRL_SUM_MB_FORMAT("capacity") 5323 G1PPRL_SUM_MB_PERC_FORMAT("used") 5324 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 5325 G1PPRL_SUM_MB_PERC_FORMAT("next-live"), 5326 bytes_to_mb(_total_capacity_bytes), 5327 bytes_to_mb(_total_used_bytes), 5328 perc(_total_used_bytes, _total_capacity_bytes), 5329 bytes_to_mb(_total_prev_live_bytes), 5330 perc(_total_prev_live_bytes, _total_capacity_bytes), 5331 bytes_to_mb(_total_next_live_bytes), 5332 perc(_total_next_live_bytes, _total_capacity_bytes)); 5333 _out->cr(); 5334 }